New Distinct Substrings(后缀数组)
给定一个字符串,求不相同的子串的个数。\(n<=50005\)。
显然,任何一个子串一定是后缀上的前缀。先(按套路)把后缀排好序,对于当前的后缀\(S_i\),显然他有\(n-sa[i]\)个前缀。其中,有\(height[i]\)个前缀字符串在编号比它小的后缀中出现过,因此它对答案的贡献是\(n-sa[i]-height[i]\)。
#include <cstdio>
#include <cstring>
using namespace std;const int maxn=50005;
int T, n, m=maxn;
char s[maxn];bool cmp(int *r, int a, int b, int j){return r[a]==r[b]&&r[a+j]==r[b+j]; }
int *x, *y, *t, wa[maxn], wb[maxn], ws[maxn], sa[maxn], wv[maxn], ht[maxn];
void Suffixsort(char *r){int i, j, p=0; x=wa, y=wb; m=maxn;for (i=0; i<m; ++i) ws[i]=0;for (i=0; i<n; ++i) ++ws[x[i]=r[i]];for (i=1; i<m; ++i) ws[i]+=ws[i-1];for (i=0; i<n; ++i) sa[--ws[r[i]]]=i;for (j=1; p<n&&j<n; j<<=1, m=p+1){for (p=0, i=n-j; i<n; ++i) y[p++]=i;for (i=0; i<n; ++i) if (sa[i]>=j) y[p++]=sa[i]-j;for (i=0; i<n; ++i) wv[i]=x[y[i]];for (i=0; i<m; ++i) ws[i]=0;for (i=0; i<n; ++i) ++ws[x[i]];for (i=1; i<m; ++i) ws[i]+=ws[i-1];for (i=n-1; i>0; --i) sa[--ws[wv[i]]]=y[i]; //这句话依然是背下来的t=x; x=y; y=t; x[sa[0]]=1;for (p=1, i=1; i<n; ++i) x[sa[i]]=cmp(y, sa[i-1], sa[i], j)?p:++p; //+1 }memset(ht, 0, sizeof(ht));for (int i=0; i<n; ++i) --x[i]; p=0;for (int i=0; i<n; ht[x[i++]]=p){if (!x[i]) continue;for (p?p--:0, j=sa[x[i]-1]; r[i+p]==r[j+p]&&i+p<n; ++p);} ht[0]=0;return;
}int main(){scanf("%d", &T); int ans;while (T--) {scanf("%s", s); n=strlen(s); Suffixsort(s); ans=0;//for (int i=0; i<n; ++i) printf("%d ", sa[i]); puts("");//for (int i=1; i<n; ++i) printf("%d ", ht[i]);for (int i=0; i<n; ++i) ans+=n-sa[i]-ht[i];printf("%d\n", ans);}return 0;
}