一种后缀数组的奇妙运用

最近参加了thucamp，其中day5的E题是一个有难度的字符串。

大部分人都是用后缀自动机做，我也是这样感觉的，不过想了好一会儿也没有很清晰的思路，但是突然想到了一个用后缀数组建树的方法，我2h码了7kb的代码，但是居然能够一遍过，有点不可思议。

题目：
有一个长度为 $n(<=2*10^5)$ ，对于每个 $i$ ，有 $p_i$ 的概率选出前缀 $i$ ，有 $1-p_i$ 的概率选出前缀 $i$ 反转，求所有字符串构成的trie树的大小的期望值。

思路：令 $s_2$ 为 $s$ 反转，那么 $s_2$ 所有的后缀就是所有前缀 $i$ 的反转。

考虑 $s_2$ 所有后缀和 $s$ 构成的字典树，某个节点会不会出现取决于它的子树中是否存在被选择的 $s_2$ 后缀或 $s$ 的前缀。

特别地，如果存在某个 $i$ ，使得 $s 2$ 长度为 $i$ 后缀和 $s$ 长度为 $i$ 的前缀都存在 $x$ 的子树中，那么 $x$ 必然存在。

现在我们先不考虑 $s$ ，只考虑 $s_2$ 所有后缀形成的字典树。

然后我们把原来的字典树上的点按照子树中有哪些后缀分类，总共有不超过 $2 n$ 种，而且每一类必然是一直往上的一条链，我们完全可以把每一类以一个点表示，因此我们只需要建出归类以后的树即可。

于是我们先用后缀数组排序，求出 $h e i g h t$ 数组，然后类似笛卡尔树一样合并，具体做法是把height从大到小排序，然后不断合并。

于是我们就可以统计出没有 $s$ 时的贡献了。

然后就是考虑 $s$ 经过点的贡献。

我们先要确定s的路径，这只需要找到和s有最长公共前缀的 $s_2$ 后缀，然后在合并后的字典树往上跳即可。因为 $s_2$ 的后缀已经排序，所以我们只需要二分答案即可。

还有就是要找到所有必然存在的点，注意到单调性，因此我们仍然用二分来检查。

最后顺着路径统计一遍即可。

#include<bits/stdc++.h>
#define rep(i,x,y) for(int i=x;i<=y;i++)
#define dwn(i,x,y) for(int i=x;i>=y;i--)
#define ll long long
using namespace std;
template<typename T>inline void qr(T &x){x=0;int f=0;char s=getchar();while(!isdigit(s))f|=s=='-',s=getchar();while(isdigit(s))x=x*10+s-48,s=getchar();x=f?-x:x;
}
int cc=0,buf[31];
template<typename T>inline void qw(T x){if(x<0)putchar('-'),x=-x;do{buf[++cc]=int(x%10);x/=10;}while(x);while(cc)putchar(buf[cc--]+'0');
}
const int N=2e5+10,mod=998244353;
int power(int a,int b){int ret=1;while(b){if(b&1)ret=1ll*ret*a%mod;a=1ll*a*a%mod;b>>=1;}return ret;
}
int n;
char s[N],s2[N];//s2 -> reverse s
int height[N],sa[N],rk[N];
struct node{int h,id;
}a[N];
int fa[N],id[N];
int p[N];
int sum[N],isum[N],suf[N];
int zero[N];
int findfa(int x){return x==fa[x]?x:fa[x]=findfa(fa[x]);}
struct tree{int l,r,lenL,lenR,fa;
}t[N<<1];int cnt,root;
int val[N<<1];
int tp,sta[N];
namespace SA{//use s2 to create height[]int c[N],x[N],y[N];void getSA(){int m=256;rep(i,1,n)c[x[i]=s2[i]]++;rep(i,2,m)c[i]+=c[i-1];dwn(i,n,1)sa[c[x[i]]--]=i;for(int k=1;k<=n;k<<=1){int num=0;rep(i,n-k+1,n)y[++num]=i;rep(i,1,n)if(sa[i]>k)y[++num]=sa[i]-k;rep(i,1,m)c[i]=0;rep(i,1,n)c[x[i]]++;rep(i,2,m)c[i]+=c[i-1];dwn(i,n,1)sa[c[x[y[i]]]--]=y[i],y[i]=0;swap(x,y);x[sa[1]]=num=1;rep(i,2,n)x[sa[i]]=(y[sa[i]]==y[sa[i-1]]&&y[sa[i]+k]==y[sa[i-1]+k])?num:++num;if(num==n)break;m=num;}rep(i,1,n)rk[sa[i]]=i;int k=0;rep(i,1,n){if(rk[i]==1)continue;if(k)k--;int j=sa[rk[i]-1];while(i+k<=n&&j+k<=n&&s2[i+k]==s2[j+k])k++;height[rk[i]]=k;}}
}
bool cmp(node p1,node p2){//you have to sort the id, or the structure of the tree will have some problemsif(p1.h==p2.h)return p1.id<p2.id;return p1.h>p2.h;
}
bool iszero(int x,int y){if(zero[y]-zero[x-1])return 0;return 1;
}
void solve(){qr(n);if(n==1){puts("2");return;}scanf("%s",s+1);rep(i,1,n)qr(p[i]);suf[n+1]=1;dwn(i,n,1)suf[i]=1ll*suf[i+1]*(mod+1-p[i])%mod;rep(i,1,n)s2[n+1-i]=s[i];SA::getSA();//build the merged suffix trierep(i,2,n)a[i-1]=(node){height[i],i};sort(a+1,a+n,cmp);rep(i,1,n){t[++cnt]=(tree){i,i,0,n-sa[i]+1};fa[i]=i;id[i]=i;}rep(i,1,n-1){int now=a[i].id;int x=id[findfa(now-1)],y=id[findfa(now)];if(t[x].lenR==a[i].h){t[y].fa=x;t[y].lenL=a[i].h+1;t[x].l=min(t[x].l,t[y].l);t[x].r=max(t[x].r,t[y].r);fa[findfa(now)]=findfa(now-1);}else if(t[y].lenR==a[i].h){t[x].fa=y;t[x].lenL=a[i].h+1;t[y].l=min(t[y].l,t[x].l);t[y].r=max(t[y].r,t[x].r);fa[findfa(now-1)]=findfa(now);}else{t[x].lenL=t[y].lenL=a[i].h+1;t[x].fa=t[y].fa=++cnt;t[cnt]=(tree){min(t[x].l,t[y].l),max(t[x].r,t[y].r),0,a[i].h};fa[findfa(now)]=findfa(now-1);id[findfa(now-1)]=cnt;}}rep(i,1,cnt){if(t[i].l==1&&t[i].r==n){root=i;break;}}//we precalculate the ans of the suffix trie//and then update the value of the vertex in the prefixrep(i,0,n)sum[i]=1;rep(i,1,n){int pos=rk[n+1-i];sum[pos]=p[i];}isum[0]=1;rep(i,1,n){if(!sum[i]){sum[i]=1;zero[i]=1;}sum[i]=1ll*sum[i]*sum[i-1]%mod;zero[i]+=zero[i-1];isum[i]=power(sum[i],mod-2);}int ans=0;rep(i,1,cnt){int num=t[i].lenR-t[i].lenL+1;int E=(mod+1-1ll*sum[t[i].r]*isum[t[i].l-1]%mod*iszero(t[i].l,t[i].r))%mod;ans=(ans+1ll*num*E%mod)%mod;}//we binary search the lexicographic order to find the path of prefix in the trieint l=1,r=n,mid,pos=n+1;while(l<=r){mid=(l+r)/2;int flag=0,len=n+1-sa[mid];rep(i,1,len){if(s[i]<s2[sa[mid]+i-1]){flag=1;break;}if(s[i]>s2[sa[mid]+i-1]){flag=2;break;}}if(!flag)flag=2;if(flag==1)pos=mid,r=mid-1;else l=mid+1;}int t1=0,t2=0;if(1<=pos&&pos<=n){rep(i,1,n+1-sa[pos]){if(s[i]==s2[sa[pos]+i-1])t2++;else break;}}pos--;if(1<=pos&&pos<=n){rep(i,1,n+1-sa[pos]){if(s[i]==s2[sa[pos]+i-1])t1++;else break;}}if(t1<t2)pos++;int match=max(t1,t2);while(1){sta[++tp]=pos;if(pos==root)break;pos=t[pos].fa;}memset(id,0,sizeof(id));//reuse idrep(i,1,tp){int x=sta[i];rep(j,t[x].lenL,min(match,t[x].lenR)){id[j]=x;}}//find how many vertex will certainly exist//if the subtree include the prefix and the suffix(binary search)l=1,r=match,mid,pos=0;while(l<=r){mid=(l+r)/2;bool bk=0;int x=id[mid];rep(i,mid,n){int tt=rk[n+1-i];if(t[x].l<=tt&&tt<=t[x].r){bk=1;break;}}if(bk)pos=mid,l=mid+1;else r=mid-1;}rep(i,0,match){int x=id[i];int E=(mod+1-1ll*sum[t[x].r]*isum[t[x].l-1]%mod*iszero(t[x].l,t[x].r))%mod;ans=(ans-E+mod)%mod;if(i<=pos)E=1;else{E=1ll*sum[t[x].r]*isum[t[x].l-1]%mod*iszero(t[x].l,t[x].r);E=1ll*E*suf[i]%mod;E=(mod+1-E)%mod;}ans=(ans+E)%mod;}rep(i,match+1,n){int E=(mod+1-suf[i])%mod;ans=(ans+E)%mod;}qw(ans);puts("");
}
int main(){int tt;tt=1;while(tt--)solve();return 0;
}