FFT字符串匹配
定义字符串下标从000,开始,有文本串AAA长度为nnn,模式串BBB长度为mmm,我们可以考虑一个函数f(x,y)=A(x)−B(y)f(x, y) = A(x) - B(y)f(x,y)=A(x)−B(y)。
我们设F(x)(x≥m−1)=∑i=0m−1f(x−m+1+i,i)F(x)(x \ge m - 1) = \sum\limits_{i = 0} ^{m - 1} f(x - m + 1 + i, i)F(x)(x≥m−1)=i=0∑m−1f(x−m+1+i,i),由定义显然可以得到如果F(x)=0F(x) = 0F(x)=0,则A[x−m+1,x]=BA[x - m + 1, x] = BA[x−m+1,x]=B也就是两个字符匹配上了,
但是考虑"ab","ba""ab", "ba""ab","ba"两个字符串,他们也是匹配的,我们稍微修改一下f(x,y)f(x, y)f(x,y)函数令其为:(A(x)−B(y))2(A(x) - B(y)) ^ 2(A(x)−B(y))2,这样这个函数就没有问题了。
我们考虑翻转一下BBB串,令其为SSS,则有B(i)=S(m−i−1)B(i) = S(m - i - 1)B(i)=S(m−i−1),
F(x)=∑i=0m−1(A(x−m+1+i)−S(m−i−1))2F(x)=∑i=0m−1S(m−i−1)2+∑i=0m−1A(x−m+1+i)2−2×∑i=0m−1A(x−m+1+i)S(m−i−1)F(x) = \sum\limits_{i = 0} ^{m - 1} \left(A(x - m + 1 + i) - S(m - i - 1)\right) ^ 2\\ F(x) = \sum_{i = 0} ^{m - 1} S(m - i - 1) ^ 2 + \sum_{i = 0} ^{m - 1} A(x - m + 1 + i) ^ 2 - 2 \times \sum_{i = 0} ^{m - 1} A(x - m + 1 + i) S(m - i - 1)\\ F(x)=i=0∑m−1(A(x−m+1+i)−S(m−i−1))2F(x)=i=0∑m−1S(m−i−1)2+i=0∑m−1A(x−m+1+i)2−2×i=0∑m−1A(x−m+1+i)S(m−i−1)
第一项是一个定值,第二可以O(n)O(n)O(n)预处理,然后前缀和O(1)O(1)O(1)得到,第三项不难发现是一个卷积的形式,所以可以通过FFTFFTFFT得到,整体复杂度O(nlogn)O(n \log n)O(nlogn)。
以上我们已经可以解决当模式串的字符串匹配了,尽管复杂度不如KMPKMPKMP优秀,但是我们考虑一个缺项字符串匹配:
a*b
aebr*ob
我们考虑重新设计f(x,y)f(x, y)f(x,y)函数,定义f(x,y)=(A(x)−B(y))2A(x)B(y)f(x, y) = (A(x) - B(y)) ^ 2 A(x) B(y)f(x,y)=(A(x)−B(y))2A(x)B(y),同样的考虑翻转BBB串,有B(i)=S(m−1−i)B(i) = S(m - 1 - i)B(i)=S(m−1−i)。
F(x)=∑i=0m−1(A(x−m+1+i)−S(m−1−i))2A(x−m+1+i)S(m−1−i)∑i=0m−1A(x−m+1+i)S(m−1−i)3+∑i=0m−1A(x−m+1+i)3S(m−1−i)−2×∑i=0m−1A(x−m+1+i)2S(m−1−i)2F(x) = \sum_{i = 0} ^{m - 1} (A(x - m + 1 + i) - S(m - 1 - i)) ^ 2 A(x - m + 1 + i) S(m - 1 - i)\\ \sum_{i = 0} ^{m - 1}A(x - m + 1 + i) S(m - 1 - i) ^ 3 + \sum_{i = 0} ^{m - 1} A(x - m + 1 + i) ^ 3 S(m - 1 - i) - 2 \times \sum_{i = 0} ^{m - 1} A(x - m + 1 + i) ^ 2 S(m - 1 - i) ^ 2\\ F(x)=i=0∑m−1(A(x−m+1+i)−S(m−1−i))2A(x−m+1+i)S(m−1−i)i=0∑m−1A(x−m+1+i)S(m−1−i)3+i=0∑m−1A(x−m+1+i)3S(m−1−i)−2×i=0∑m−1A(x−m+1+i)2S(m−1−i)2
容易发现这里是三个多项式相加的形式,所以只要做三次FFTFFTFFT即可得到答案,放一个模板题。
#include <bits/stdc++.h>using namespace std;struct Complex {double r, i;Complex(double _r = 0, double _i = 0) : r(_r), i(_i) {}
};Complex operator + (const Complex &a, const Complex &b) {return Complex(a.r + b.r, a.i + b.i);
}Complex operator - (const Complex &a, const Complex &b) {return Complex(a.r - b.r, a.i - b.i);
}Complex operator * (const Complex &a, const Complex &b) {return Complex(a.r * b.r - a.i * b.i, a.r * b.i + a.i * b.r);
}Complex operator / (const Complex &a, const Complex &b) {return Complex((a.r * b.r + a.i * b.i) / (b.r * b.r + b.i * b.i), (a.i * b.r - a.r * b.i) / (b.r * b.r + b.i * b.i));
}Complex operator * (const Complex &a, const double &b) {return Complex(a.r * b, a.i * b);
}const int N = 2e6 + 10;int r[N];void get_r(int lim) {for (int i = 0; i < lim; i++) {r[i] = (i & 1) * (lim >> 1) + (r[i >> 1] >> 1);}
}void FFT(Complex *f, int lim, int rev) {for (int i = 0; i < lim; i++) {if (i < r[i]) {swap(f[i], f[r[i]]);}}const double pi = acos(-1.0);for (int mid = 1; mid < lim; mid <<= 1) {Complex wn = Complex(cos(pi / mid), rev * sin(pi / mid));for (int len = mid << 1, cur = 0; cur < lim; cur += len) {Complex w = Complex(1, 0);for (int k = 0; k < mid; k++, w = w * wn) {Complex x = f[cur + k], y = w * f[cur + mid + k];f[cur + k] = x + y, f[cur + mid + k] = x - y;}}}if (rev == -1) {for (int i = 0; i < lim; i++) {f[i].r /= lim;}}
}// const int N = 1e6 + 10;Complex a[N], b[N], c[N];char str1[N], str2[N];int A[N], S[N], n, m, lim;int main() {// freopen("in.txt", "r", stdin);// freopen("out.txt", "w", stdout);scanf("%d %d %s %s", &m, &n, str2, str1);for (int i = 0; i < n; i++) {A[i] = str1[i] == '*' ? 0 : str1[i] - 'a' + 1;}for (int i = 0; i < m; i++) {S[i] = str2[m - i - 1] == '*' ? 0 : str2[m - i - 1] - 'a' + 1;}lim = 1;while (lim < n + m) {lim <<= 1;}get_r(lim);for (int i = 0; i < lim; i++) {b[i] = Complex(A[i], 0);c[i] = Complex(S[i] * S[i] * S[i], 0);}FFT(b, lim, 1), FFT(c, lim, 1);for (int i = 0; i < lim; i++) {a[i] = a[i] + b[i] * c[i];}for (int i = 0; i < lim; i++) {b[i] = Complex(A[i] * A[i] * A[i], 0);c[i] = Complex(S[i], 0);}FFT(b, lim, 1), FFT(c, lim, 1);for (int i = 0; i < lim; i++) {a[i] = a[i] + b[i] * c[i];}for (int i = 0; i < lim; i++) {b[i] = Complex(A[i] * A[i], 0);c[i] = Complex(S[i] * S[i], 0);}FFT(b, lim, 1), FFT(c, lim, 1);for (int i = 0; i < lim; i++) {a[i] = a[i] - 2 * b[i] * c[i];}FFT(a, lim, -1);vector<int> ans;for (int i = m - 1; i < n; i++) {if ((long long)(a[i].r + 0.5) == 0) {ans.push_back(i - m + 2);}}printf("%d\n", ans.size());for (auto it : ans) {printf("%d ", it);}puts("");return 0;
}