重温AC自动机(附7题)

tonyfang posted @ 2016年12月13日 22:32 in 随笔 with tags c++ OI , 226 阅读

最近在写一些字符串的题呀……

头昏脑涨……

关于AC自动机的应用,一种是模板;一种是关于fail的DP(经常加上矩阵);另外就是关于fail树的问题(本篇不讨论,后续会新写文章介绍,本篇着重前两个内容)。

【模板】

HDU 2222

注意一个结尾可能有多个单词,用类似于边链表存储即可。

# include <stdio.h>
# include <algorithm>
# include <string.h>
# include <queue>
// # include <bits/stdc++.h>

using namespace std;

typedef long long ll;
typedef long double ld;
typedef unsigned long long ull;
const int M = 600010;
const int N = 26;
int ch[M][N], fail[M], lst[M], cnt[M], sz=0;
int isend[M];
char str[M*2];

inline void insert() {
	int len = strlen(str), p=0;
	for (int i=0; i<len; ++i) {
		int c = str[i] - 'a';
		if(!ch[p][c]) ch[p][c] = ++sz;
		p = ch[p][c];
	}
	isend[p] ++;
}

queue<int> q;
inline void getfail() {
	while(!q.empty()) q.pop();
	fail[0] = 0;
	for (int c=0; c<26; ++c) {
		int p = ch[0][c];
		if(p) {
			fail[p] = lst[p] = 0;
			q.push(p);
		}
	}
	while(!q.empty()) {
		int top = q.front(); q.pop();
		for (int c=0; c<26; ++c) {
			int p = ch[top][c];
			if(!p) continue;
			q.push(p);
			int v = fail[top];
			while(v && !ch[v][c]) v = fail[v];
			fail[p] = ch[v][c];
			lst[p] = isend[fail[p]] ? fail[p] : lst[fail[p]];
		}
	}
}

inline void tadd(int x) {for (; x; x=lst[x]) cnt[x] ++;}

inline void find() {
	int p=0, len = strlen(str);
	memset(cnt, 0, sizeof(cnt));
	for (int i=0; i<len; ++i) {
		int c = str[i] - 'a';
		while(p && !ch[p][c]) p = fail[p];
		p = ch[p][c];
		if(isend[p]) tadd(p);
		else if(lst[p]) tadd(lst[p]);
	}
}

int n, ans, T;

int main() {
	scanf("%d", &T);
	while(T--) {
		memset(ch, 0, sizeof(ch));
		memset(lst, 0, sizeof(lst));
		memset(isend, 0, sizeof(isend));
		memset(fail, 0, sizeof(fail));
		sz = ans = 0;
		scanf("%d", &n);
		for (int i=1; i<=n; ++i) {
			scanf("%s", str);
			insert();
		}
		getfail();
		scanf("%s", str);
		find();
		for (int i=1; i<=sz; ++i) 
			if(isend[i] && cnt[i]) ans+=isend[i];
		printf("%d\n", ans);
	}
	return 0;
}

BZOJ 3172

# include <stdio.h>
# include <algorithm>
# include <string.h>
# include <queue>
# include <vector>
// # include <bits/stdc++.h>

using namespace std;

typedef long long ll;
typedef long double ld;
typedef unsigned long long ull;
const int M = 1600010;
const int N = 27;
int ch[M][N], fail[M], lst[M], cnt[M], sz=0;
int isend[M];
char str[M];
char t[M];
vector<int> v[M];
int ans[M];

inline void insert(int j) {
	int len = strlen(t), p=0;
	for (int i=0; i<len; ++i) {
		int c = t[i] - 'a';
		if(!ch[p][c]) ch[p][c] = ++sz;
		p = ch[p][c];
	}
	isend[p] ++;
	v[p].push_back(j);
}

queue<int> q;
inline void getfail() {
	while(!q.empty()) q.pop();
	fail[0] = 0;
	for (int c=0; c<26; ++c) {
		int p = ch[0][c];
		if(p) {
			fail[p] = lst[p] = 0;
			q.push(p);
		}
	}
	while(!q.empty()) {
		int top = q.front(); q.pop();
		for (int c=0; c<26; ++c) {
			int p = ch[top][c];
			if(!p) {
				ch[top][c] = ch[fail[top]][c];
				continue;
			}
			q.push(p);
			int v = fail[top];
			while(v && !ch[v][c]) v = fail[v];
			fail[p] = ch[v][c];
			lst[p] = isend[fail[p]] ? fail[p] : lst[fail[p]];
		}
	}
}

inline void tadd(int x) {for (; x; x=lst[x]) cnt[x] ++;}

inline void find() {
	int p=0, len = strlen(str);
	memset(cnt, 0, sizeof(cnt));
	for (int i=0; i<len; ++i) {
		int c = str[i] - 'a';
		while(p && !ch[p][c]) p = fail[p];
		p = ch[p][c];
		if(isend[p]) tadd(p);
		else if(lst[p]) tadd(lst[p]);
	}
}

int n, T;

int main() {
	sz = 0;
	scanf("%d", &n);
	int cur = 0;
	for (int i=1; i<=n; ++i) {
		scanf("%s", t);
		insert(i);
		for (int j=0, jto=strlen(t); j<jto; ++j)
			str[cur++]=t[j];
		str[cur++]='a'+26;
	}
	getfail();
	find();
	for (int i=1; i<=sz; ++i) 
		if(isend[i]) {
			for (int j=0, jto=v[i].size(); j<jto; ++j)
				ans[v[i][j]] = cnt[i];
		}
	for (int i=1; i<=n; ++i)
		printf("%d\n", ans[i]);
	return 0;
}

【应用】

1. BZOJ 1009 GT考试

不能包含不吉利的串,就是在AC自动机上走若干次,求不包含的种类。

不能包含的要求:不能被转移到,不能转移其他(所以要两次判断)

# include <stdio.h>
# include <algorithm>
# include <queue>
# include <string.h>
// # include <bits/stdc++.h>

using namespace std;

typedef long long ll;
typedef long double ld;
typedef unsigned long long ull;
const int M = 500010;
const int N = 10;
const int MM = 25;
int n, m, mod;
char str[MM];

int ch[MM][N], sz=0, fail[MM];
bool isend[M];
queue<int> q;

struct mat {
	int n, m;
	int a[MM][MM];
	inline void init(int _n, int _m) {
		n=_n, m=_m;
		memset(a, 0, sizeof(a));
	}
};

inline void insert() {
	int p=0, len = strlen(str);
	for (int i=0; i<len; ++i) {
		int c = str[i] - '0';
		if(!ch[p][c]) ch[p][c] = ++sz;
		p = ch[p][c];
	}
	isend[p] = 1;
}

inline void getfail() {
	while(!q.empty()) q.pop();
	fail[0] = 0;
	for (int c=0; c<10; ++c) {
		int p = ch[0][c];
		if(p) {
			fail[p] = 0;
			q.push(p);
		}
	}
	while(!q.empty()) {
		int top=q.front(); q.pop();
		for (int c=0; c<10; ++c) {
			int p = ch[top][c];
			if(!p) {
				ch[top][c] = ch[fail[top]][c];
				continue;
			}
			q.push(p);
			int v = fail[top];
			while(v && !ch[v][c]) v = ch[v][c];
			fail[p] = ch[v][c];
			if(isend[fail[top]]) isend[top] = 1;
		}
	}
}

inline mat build() {
	mat ret; ret.init(sz+1, sz+1);
	for (int i=0; i<=sz; ++i) {
		if(isend[i]) continue;
		for (int c=0; c<10; ++c) {
			if(isend[ch[i][c]]) continue;
			ret.a[i+1][ch[i][c]+1] ++;
		}
	}
	return ret;
}

inline mat mul(mat a, mat b) {
	mat ret;
	ret.init(a.n, b.m);
	for (int i=1; i<=ret.n; ++i)
		for (int j=1; j<=ret.m; ++j) 
			for (int k=1; k<=a.m; ++k) 
				ret.a[i][j] = (ret.a[i][j] + a.a[i][k]*b.a[k][j]%mod)%mod;
	return ret;
}

inline mat pwr(mat a, int b) {
	mat ret;
	ret.init(a.n, a.m);
	for (int i=1; i<=ret.n; ++i)
		ret.a[i][i] = 1;
	while(b) {
		if(b&1) ret = mul(ret, a);
		a = mul(a, a);
		b >>= 1;
	}
	return ret;
}

int main() {
	scanf("%d%d%d", &n, &m, &mod);
	scanf("%s", str);
	insert();
	getfail();
	mat ans = pwr(build(), n);
	int anss = 0;
	for (int i=1; i<=sz+1; ++i)
		anss = (anss + ans.a[1][i]) % mod;
	printf("%d\n", anss);
	return 0;
}

2. BZOJ 1030 文本生成器

基本同1009,只是可以不用矩阵乘法加速。(也要两次判断)

# include <stdio.h>
# include <algorithm>
# include <string.h>
# include <queue>
// # include <bits/stdc++.h>

using namespace std;

typedef long long ll;
typedef long double ld;
typedef unsigned long long ull;
const int M3 = 6010, M = 60010;
const int N = 26;
const int mod = 10007;
int ch[M][N], fail[M], sz=0;
bool isend[M];
char str[M*2];
int f[M3][M3];

inline void insert() {
	int len = strlen(str), p=0;
	for (int i=0; i<len; ++i) {
		int c = str[i] - 'A';
		if(!ch[p][c]) ch[p][c] = ++sz;
		p = ch[p][c];
	}
	isend[p] = 1;
}

queue<int> q;
inline void getfail() {
	while(!q.empty()) q.pop();
	fail[0] = 0;
	for (int c=0; c<26; ++c) {
		int p = ch[0][c];
		if(p) {
			fail[p] = 0;
			q.push(p);
		}
	}
	while(!q.empty()) {
		int top = q.front(); q.pop();
		for (int c=0; c<26; ++c) {
			int p = ch[top][c];
			if(!p) {
			    ch[top][c]=ch[fail[top]][c];
			    continue;
			}
			q.push(p);
			int v = fail[top];
			while(v && !ch[v][c]) v = fail[v];
			fail[p] = ch[v][c];
			if(isend[fail[p]]) isend[p] = 1;
		}
	}
}

int n, m;

int main() {
	memset(isend, 0, sizeof(isend));
	memset(ch, 0, sizeof(ch));
	scanf("%d%d", &n, &m);
	for (int i=1; i<=n; ++i) {
		scanf("%s", str);
		insert();
	}
	getfail();
	f[0][0] = 1;
	for (int i=1; i<=m; ++i) {
		for (int j=0; j<=sz; ++j) {
			if(isend[j]) continue;
			for (int k=0; k<26; ++k) {
                if(isend[ch[j][k]]) continue;
				f[i][ch[j][k]] = (f[i][ch[j][k]] + f[i-1][j]) % mod;
			}
		}
	}
	int s = 0, tot = 1;
	for (int i=0; i<=sz; ++i) if(!isend[i]) s = (s+f[m][i]) % mod;
	for (int i=1; i<=m; ++i) tot = tot * 26 % mod;
	tot -= s;
	tot = (tot + mod) % mod;
	printf("%d\n", tot);
	return 0;
}
/*
3 7
AAB
ACC
B
*/

3. BZOJ 2553 禁忌

本题的特殊是求期望,看上去无法用AC自动机来做,实质上我们分解期望,发现每次都是乘一个1/alpha,这个是固定的,然后禁忌的段的数目的最大值可以贪心在AC自动机上走来实现,所以中体就是一个AC自动机的思想了。本题特殊之处在于,我们需要新开一维矩阵来实现统计答案(因为如果走到一个isend,答案应该加一,对应加1/alpha),答案那维的转移永远是1。其他照样做即可。本题end能转移出去,被转移到的时候要特判,所以只有一个判断。

# include <stdio.h>
# include <algorithm>
# include <queue>
# include <string.h>
// # include <bits/stdc++.h>

using namespace std;

typedef long long ll;
typedef long double ld;
typedef unsigned long long ull;
const int M = 500010;
const int N = 26;
const int MM = 255;
int n, m, alpha;
char str[MM];

int ch[MM][N], sz=0, fail[MM];
bool isend[MM];
queue<int> q;


struct mat {
	int n, m;
	ld a[MM][MM];
	inline void init(int _n, int _m) {
		n=_n, m=_m;
		memset(a, 0, sizeof(a));
	}
};

inline void insert() {
	int p=0, len = strlen(str);
	for (int i=0; i<len; ++i) {
		int c = str[i] - 'a';
		if(!ch[p][c]) ch[p][c] = ++sz;
		p = ch[p][c];
	}
	isend[p] = 1;
}

inline void getfail() {
	while(!q.empty()) q.pop();
	fail[0] = 0;
	for (int c=0; c<alpha; ++c) {
		int p = ch[0][c];
		if(p) {
			fail[p] = 0;
			q.push(p);
		}
	}
	while(!q.empty()) {
		int top=q.front(); q.pop();
		for (int c=0; c<alpha; ++c) {
			int p = ch[top][c];
			if(!p) {
				ch[top][c] = ch[fail[top]][c];
				continue;
			}
			q.push(p);
			int v = fail[top];
			while(v && !ch[v][c]) v = ch[v][c];
			fail[p] = ch[v][c];
			if(isend[fail[top]]) isend[top] = 1;
		}
	}
}

inline mat build() {
	mat ret; ret.init(sz+2, sz+2);
	ld tmp = (ld)1.0/(ld)alpha;
	for (int i=0; i<=sz; ++i) {
		for (int c=0; c<alpha; ++c) {
			if(isend[ch[i][c]]) {
				ret.a[i+1][1] += tmp;
				ret.a[i+1][sz+2] += tmp;
				continue;
			}
			ret.a[i+1][ch[i][c]+1] += tmp;
		}
	}
	ret.a[sz+2][sz+2]=1.0;
	return ret;
}

inline mat mul(mat a, mat b) {
	mat ret;
	ret.init(a.n, b.m);
	for (int i=1; i<=ret.n; ++i)
		for (int j=1; j<=ret.m; ++j) 
			for (int k=1; k<=a.m; ++k) 
				ret.a[i][j] = ret.a[i][j] + a.a[i][k]*b.a[k][j];
	return ret;
}

inline mat pwr(mat a, int b) {
	mat ret;
	ret.init(a.n, a.m);
	for (int i=1; i<=ret.n; ++i)
		ret.a[i][i] = 1;
	while(b) {
		if(b&1) ret = mul(ret, a);
		a = mul(a, a);
		b >>= 1;
	}
	return ret;
}

int main() {
	scanf("%d%d%d", &m, &n, &alpha);
	while(m--) {
		scanf("%s", str);
		insert();
	}
	getfail();
	mat ans = pwr(build(), n);
	printf("%.10lf\n", (double)ans.a[1][sz+2]);
	return 0;
}

4. BZOJ 1212 L语言

判断一个文章是否能被一个字库表示出来。

直接trie上dp即可,不需要AC自动机。

f[i]表示到第i个单词可行吗

找到最大的i使得f[i]=1即可。

# include <stdio.h>
# include <algorithm>
# include <string.h>
// # include <bits/stdc++.h>

using namespace std;

typedef long long ll;
typedef long double ld;
typedef unsigned long long ull;
const int M = 1200010;

int ch[M][26], sz=0;
bool isend[M];
char str[M];
int f[M];
int n, m;

inline void insert() {
	int len=strlen(str), p=0;
	for (int i=0; i<len; ++i) {
		int c = str[i]-'a';
		if(!ch[p][c]) ch[p][c]=++sz;
		p=ch[p][c];
	}
	isend[p]=1;
}

inline int solve(int x) {
	int len=strlen(str+1);
	f[0]=x;
	int ret = 0;
	for (int i=0; i<=len; ++i) {
		if(f[i] == x) ret = i;
		else continue;
		for (int p=0, j=i+1; j<=len; ++j) {
			p = ch[p][str[j]-'a'];
			if(!p) break;
			if(isend[p]) f[j] = x;
		} 
	}
	return ret;
}
int main() {
	scanf("%d%d", &n, &m);
	for (int i=1; i<=n; ++i) {
		scanf("%s", str);
		insert();
	}
	for (int i=1; i<=m; ++i) {
		scanf("%s", str+1);
		printf("%d\n", solve(i));
	}

	return 0;
}

5. BZOJ 1444 有趣的游戏

题面复杂,略去

还是关注如何判断,能转移到这个状态,然后不能转移出去(之后应该全是1),所以只要一个判断。

全是1的意义同第3题,就是不会再被其他改变的意思。(因为是求某人赢的概率)

还有就是要做足够多次的矩乘,我这里做了$2^{50}$次。

# include <stdio.h>
# include <algorithm>
# include <queue>
# include <string.h>
// # include <bits/stdc++.h>

using namespace std;

typedef long long ll;
typedef long double ld;
typedef unsigned long long ull;
const int M = 500010;
const int N = 27;
const int MM = 510;
int n, m, TSN=0, l;
char str[MM];

int ch[MM][N], sz=0, fail[MM];
bool isend[MM];
ld pos[MM];
int ps[MM];
queue<int> q;


struct mat {
	int n, m;
	ld a[MM][MM];
	inline void init(int _n, int _m) {
		n=_n, m=_m;
		memset(a, 0, sizeof(a));
	}
};

inline void insert() {
	int p=0;
	for (int i=0; i<l; ++i) {
		int c = str[i] - 'A';
		if(!ch[p][c]) ch[p][c] = ++sz;
		p = ch[p][c];
	}
	isend[p] = 1;
	ps[++TSN] = p;
}

inline void getfail() {
	while(!q.empty()) q.pop();
	fail[0] = 0;
	for (int c=0; c<m; ++c) {
		int p = ch[0][c];
		if(p) {
			fail[p] = 0;
			q.push(p);
		}
	}
	while(!q.empty()) {
		int top=q.front(); q.pop();
		for (int c=0; c<m; ++c) {
			int p = ch[top][c];
			if(!p) {
				ch[top][c] = ch[fail[top]][c];
				continue;
			}
			q.push(p);
			int v = fail[top];
			while(v && !ch[v][c]) v = ch[v][c];
			fail[p] = ch[v][c];
			if(isend[fail[top]]) isend[top] = 1;
		}
	}
}

inline mat build() {
	mat ret; ret.init(sz+1, sz+1);
	for (int i=0; i<=sz; ++i) {
		if(isend[i])
			ret.a[i+1][i+1] = 1.0;
		else for (int c=0; c<m; ++c)
			ret.a[i+1][ch[i][c]+1] += pos[c];
	}
	return ret;
}

inline mat mul(mat a, mat b) {
	mat ret;
	ret.init(a.n, b.m);
	for (int i=1; i<=ret.n; ++i)
		for (int j=1; j<=ret.m; ++j) 
			for (int k=1; k<=a.m; ++k) 
				ret.a[i][j] = ret.a[i][j] + a.a[i][k]*b.a[k][j];
	return ret;
}

inline mat pwr(mat a, int b) {
	mat ret;
	ret.init(a.n, a.m);
	for (int i=1; i<=ret.n; ++i)
		ret.a[i][i] = 1;
	while(b) {
		if(b&1) ret = mul(ret, a);
		a = mul(a, a);
		b >>= 1;
	}
	return ret;
}

int main() {
	scanf("%d%d%d", &n, &l, &m);
	for (int i=0; i<m; ++i) {
		int p, q; scanf("%d%d", &p, &q);
		pos[i] = (ld)p/(ld)q;
	}
	for (int i=1; i<=n; ++i) {
		scanf("%s", str);
		insert();
	}
	getfail();
	mat ans=build();
	for (int i=1; i<=50; ++i) ans=mul(ans,ans);
	for (int i=1; i<=n; ++i) 
		printf("%.2lf\n", (double)ans.a[1][ps[i]+1]);
	return 0;
}
/*
3 2 2
1 2
1 2
AB
BA
AA
*/

 

 


登录 *


loading captcha image...
(输入验证码)
or Ctrl+Enter