重温AC自动机(附7题)

tonyfang posted @ 2016年12月13日 22:32 in 随笔 with tags c++ OI , 1927 阅读

最近在写一些字符串的题呀……

头昏脑涨……

关于AC自动机的应用,一种是模板;一种是关于fail的DP(经常加上矩阵);另外就是关于fail树的问题(本篇不讨论,后续会新写文章介绍,本篇着重前两个内容)。

【模板】

HDU 2222

注意一个结尾可能有多个单词,用类似于边链表存储即可。

# include <stdio.h>
# include <algorithm>
# include <string.h>
# include <queue>
// # include <bits/stdc++.h>

using namespace std;

typedef long long ll;
typedef long double ld;
typedef unsigned long long ull;
const int M = 600010;
const int N = 26;
int ch[M][N], fail[M], lst[M], cnt[M], sz=0;
int isend[M];
char str[M*2];

inline void insert() {
	int len = strlen(str), p=0;
	for (int i=0; i<len; ++i) {
		int c = str[i] - 'a';
		if(!ch[p][c]) ch[p][c] = ++sz;
		p = ch[p][c];
	}
	isend[p] ++;
}

queue<int> q;
inline void getfail() {
	while(!q.empty()) q.pop();
	fail[0] = 0;
	for (int c=0; c<26; ++c) {
		int p = ch[0][c];
		if(p) {
			fail[p] = lst[p] = 0;
			q.push(p);
		}
	}
	while(!q.empty()) {
		int top = q.front(); q.pop();
		for (int c=0; c<26; ++c) {
			int p = ch[top][c];
			if(!p) continue;
			q.push(p);
			int v = fail[top];
			while(v && !ch[v][c]) v = fail[v];
			fail[p] = ch[v][c];
			lst[p] = isend[fail[p]] ? fail[p] : lst[fail[p]];
		}
	}
}

inline void tadd(int x) {for (; x; x=lst[x]) cnt[x] ++;}

inline void find() {
	int p=0, len = strlen(str);
	memset(cnt, 0, sizeof(cnt));
	for (int i=0; i<len; ++i) {
		int c = str[i] - 'a';
		while(p && !ch[p][c]) p = fail[p];
		p = ch[p][c];
		if(isend[p]) tadd(p);
		else if(lst[p]) tadd(lst[p]);
	}
}

int n, ans, T;

int main() {
	scanf("%d", &T);
	while(T--) {
		memset(ch, 0, sizeof(ch));
		memset(lst, 0, sizeof(lst));
		memset(isend, 0, sizeof(isend));
		memset(fail, 0, sizeof(fail));
		sz = ans = 0;
		scanf("%d", &n);
		for (int i=1; i<=n; ++i) {
			scanf("%s", str);
			insert();
		}
		getfail();
		scanf("%s", str);
		find();
		for (int i=1; i<=sz; ++i) 
			if(isend[i] && cnt[i]) ans+=isend[i];
		printf("%d\n", ans);
	}
	return 0;
}

BZOJ 3172

# include <stdio.h>
# include <algorithm>
# include <string.h>
# include <queue>
# include <vector>
// # include <bits/stdc++.h>

using namespace std;

typedef long long ll;
typedef long double ld;
typedef unsigned long long ull;
const int M = 1600010;
const int N = 27;
int ch[M][N], fail[M], lst[M], cnt[M], sz=0;
int isend[M];
char str[M];
char t[M];
vector<int> v[M];
int ans[M];

inline void insert(int j) {
	int len = strlen(t), p=0;
	for (int i=0; i<len; ++i) {
		int c = t[i] - 'a';
		if(!ch[p][c]) ch[p][c] = ++sz;
		p = ch[p][c];
	}
	isend[p] ++;
	v[p].push_back(j);
}

queue<int> q;
inline void getfail() {
	while(!q.empty()) q.pop();
	fail[0] = 0;
	for (int c=0; c<26; ++c) {
		int p = ch[0][c];
		if(p) {
			fail[p] = lst[p] = 0;
			q.push(p);
		}
	}
	while(!q.empty()) {
		int top = q.front(); q.pop();
		for (int c=0; c<26; ++c) {
			int p = ch[top][c];
			if(!p) {
				ch[top][c] = ch[fail[top]][c];
				continue;
			}
			q.push(p);
			int v = fail[top];
			while(v && !ch[v][c]) v = fail[v];
			fail[p] = ch[v][c];
			lst[p] = isend[fail[p]] ? fail[p] : lst[fail[p]];
		}
	}
}

inline void tadd(int x) {for (; x; x=lst[x]) cnt[x] ++;}

inline void find() {
	int p=0, len = strlen(str);
	memset(cnt, 0, sizeof(cnt));
	for (int i=0; i<len; ++i) {
		int c = str[i] - 'a';
		while(p && !ch[p][c]) p = fail[p];
		p = ch[p][c];
		if(isend[p]) tadd(p);
		else if(lst[p]) tadd(lst[p]);
	}
}

int n, T;

int main() {
	sz = 0;
	scanf("%d", &n);
	int cur = 0;
	for (int i=1; i<=n; ++i) {
		scanf("%s", t);
		insert(i);
		for (int j=0, jto=strlen(t); j<jto; ++j)
			str[cur++]=t[j];
		str[cur++]='a'+26;
	}
	getfail();
	find();
	for (int i=1; i<=sz; ++i) 
		if(isend[i]) {
			for (int j=0, jto=v[i].size(); j<jto; ++j)
				ans[v[i][j]] = cnt[i];
		}
	for (int i=1; i<=n; ++i)
		printf("%d\n", ans[i]);
	return 0;
}

【应用】

1. BZOJ 1009 GT考试

不能包含不吉利的串,就是在AC自动机上走若干次,求不包含的种类。

不能包含的要求:不能被转移到,不能转移其他(所以要两次判断)

# include <stdio.h>
# include <algorithm>
# include <queue>
# include <string.h>
// # include <bits/stdc++.h>

using namespace std;

typedef long long ll;
typedef long double ld;
typedef unsigned long long ull;
const int M = 500010;
const int N = 10;
const int MM = 25;
int n, m, mod;
char str[MM];

int ch[MM][N], sz=0, fail[MM];
bool isend[M];
queue<int> q;

struct mat {
	int n, m;
	int a[MM][MM];
	inline void init(int _n, int _m) {
		n=_n, m=_m;
		memset(a, 0, sizeof(a));
	}
};

inline void insert() {
	int p=0, len = strlen(str);
	for (int i=0; i<len; ++i) {
		int c = str[i] - '0';
		if(!ch[p][c]) ch[p][c] = ++sz;
		p = ch[p][c];
	}
	isend[p] = 1;
}

inline void getfail() {
	while(!q.empty()) q.pop();
	fail[0] = 0;
	for (int c=0; c<10; ++c) {
		int p = ch[0][c];
		if(p) {
			fail[p] = 0;
			q.push(p);
		}
	}
	while(!q.empty()) {
		int top=q.front(); q.pop();
		for (int c=0; c<10; ++c) {
			int p = ch[top][c];
			if(!p) {
				ch[top][c] = ch[fail[top]][c];
				continue;
			}
			q.push(p);
			int v = fail[top];
			while(v && !ch[v][c]) v = ch[v][c];
			fail[p] = ch[v][c];
			if(isend[fail[top]]) isend[top] = 1;
		}
	}
}

inline mat build() {
	mat ret; ret.init(sz+1, sz+1);
	for (int i=0; i<=sz; ++i) {
		if(isend[i]) continue;
		for (int c=0; c<10; ++c) {
			if(isend[ch[i][c]]) continue;
			ret.a[i+1][ch[i][c]+1] ++;
		}
	}
	return ret;
}

inline mat mul(mat a, mat b) {
	mat ret;
	ret.init(a.n, b.m);
	for (int i=1; i<=ret.n; ++i)
		for (int j=1; j<=ret.m; ++j) 
			for (int k=1; k<=a.m; ++k) 
				ret.a[i][j] = (ret.a[i][j] + a.a[i][k]*b.a[k][j]%mod)%mod;
	return ret;
}

inline mat pwr(mat a, int b) {
	mat ret;
	ret.init(a.n, a.m);
	for (int i=1; i<=ret.n; ++i)
		ret.a[i][i] = 1;
	while(b) {
		if(b&1) ret = mul(ret, a);
		a = mul(a, a);
		b >>= 1;
	}
	return ret;
}

int main() {
	scanf("%d%d%d", &n, &m, &mod);
	scanf("%s", str);
	insert();
	getfail();
	mat ans = pwr(build(), n);
	int anss = 0;
	for (int i=1; i<=sz+1; ++i)
		anss = (anss + ans.a[1][i]) % mod;
	printf("%d\n", anss);
	return 0;
}

2. BZOJ 1030 文本生成器

基本同1009,只是可以不用矩阵乘法加速。(也要两次判断)

# include <stdio.h>
# include <algorithm>
# include <string.h>
# include <queue>
// # include <bits/stdc++.h>

using namespace std;

typedef long long ll;
typedef long double ld;
typedef unsigned long long ull;
const int M3 = 6010, M = 60010;
const int N = 26;
const int mod = 10007;
int ch[M][N], fail[M], sz=0;
bool isend[M];
char str[M*2];
int f[M3][M3];

inline void insert() {
	int len = strlen(str), p=0;
	for (int i=0; i<len; ++i) {
		int c = str[i] - 'A';
		if(!ch[p][c]) ch[p][c] = ++sz;
		p = ch[p][c];
	}
	isend[p] = 1;
}

queue<int> q;
inline void getfail() {
	while(!q.empty()) q.pop();
	fail[0] = 0;
	for (int c=0; c<26; ++c) {
		int p = ch[0][c];
		if(p) {
			fail[p] = 0;
			q.push(p);
		}
	}
	while(!q.empty()) {
		int top = q.front(); q.pop();
		for (int c=0; c<26; ++c) {
			int p = ch[top][c];
			if(!p) {
			    ch[top][c]=ch[fail[top]][c];
			    continue;
			}
			q.push(p);
			int v = fail[top];
			while(v && !ch[v][c]) v = fail[v];
			fail[p] = ch[v][c];
			if(isend[fail[p]]) isend[p] = 1;
		}
	}
}

int n, m;

int main() {
	memset(isend, 0, sizeof(isend));
	memset(ch, 0, sizeof(ch));
	scanf("%d%d", &n, &m);
	for (int i=1; i<=n; ++i) {
		scanf("%s", str);
		insert();
	}
	getfail();
	f[0][0] = 1;
	for (int i=1; i<=m; ++i) {
		for (int j=0; j<=sz; ++j) {
			if(isend[j]) continue;
			for (int k=0; k<26; ++k) {
                if(isend[ch[j][k]]) continue;
				f[i][ch[j][k]] = (f[i][ch[j][k]] + f[i-1][j]) % mod;
			}
		}
	}
	int s = 0, tot = 1;
	for (int i=0; i<=sz; ++i) if(!isend[i]) s = (s+f[m][i]) % mod;
	for (int i=1; i<=m; ++i) tot = tot * 26 % mod;
	tot -= s;
	tot = (tot + mod) % mod;
	printf("%d\n", tot);
	return 0;
}
/*
3 7
AAB
ACC
B
*/

3. BZOJ 2553 禁忌

本题的特殊是求期望,看上去无法用AC自动机来做,实质上我们分解期望,发现每次都是乘一个1/alpha,这个是固定的,然后禁忌的段的数目的最大值可以贪心在AC自动机上走来实现,所以中体就是一个AC自动机的思想了。本题特殊之处在于,我们需要新开一维矩阵来实现统计答案(因为如果走到一个isend,答案应该加一,对应加1/alpha),答案那维的转移永远是1。其他照样做即可。本题end能转移出去,被转移到的时候要特判,所以只有一个判断。

# include <stdio.h>
# include <algorithm>
# include <queue>
# include <string.h>
// # include <bits/stdc++.h>

using namespace std;

typedef long long ll;
typedef long double ld;
typedef unsigned long long ull;
const int M = 500010;
const int N = 26;
const int MM = 255;
int n, m, alpha;
char str[MM];

int ch[MM][N], sz=0, fail[MM];
bool isend[MM];
queue<int> q;


struct mat {
	int n, m;
	ld a[MM][MM];
	inline void init(int _n, int _m) {
		n=_n, m=_m;
		memset(a, 0, sizeof(a));
	}
};

inline void insert() {
	int p=0, len = strlen(str);
	for (int i=0; i<len; ++i) {
		int c = str[i] - 'a';
		if(!ch[p][c]) ch[p][c] = ++sz;
		p = ch[p][c];
	}
	isend[p] = 1;
}

inline void getfail() {
	while(!q.empty()) q.pop();
	fail[0] = 0;
	for (int c=0; c<alpha; ++c) {
		int p = ch[0][c];
		if(p) {
			fail[p] = 0;
			q.push(p);
		}
	}
	while(!q.empty()) {
		int top=q.front(); q.pop();
		for (int c=0; c<alpha; ++c) {
			int p = ch[top][c];
			if(!p) {
				ch[top][c] = ch[fail[top]][c];
				continue;
			}
			q.push(p);
			int v = fail[top];
			while(v && !ch[v][c]) v = ch[v][c];
			fail[p] = ch[v][c];
			if(isend[fail[top]]) isend[top] = 1;
		}
	}
}

inline mat build() {
	mat ret; ret.init(sz+2, sz+2);
	ld tmp = (ld)1.0/(ld)alpha;
	for (int i=0; i<=sz; ++i) {
		for (int c=0; c<alpha; ++c) {
			if(isend[ch[i][c]]) {
				ret.a[i+1][1] += tmp;
				ret.a[i+1][sz+2] += tmp;
				continue;
			}
			ret.a[i+1][ch[i][c]+1] += tmp;
		}
	}
	ret.a[sz+2][sz+2]=1.0;
	return ret;
}

inline mat mul(mat a, mat b) {
	mat ret;
	ret.init(a.n, b.m);
	for (int i=1; i<=ret.n; ++i)
		for (int j=1; j<=ret.m; ++j) 
			for (int k=1; k<=a.m; ++k) 
				ret.a[i][j] = ret.a[i][j] + a.a[i][k]*b.a[k][j];
	return ret;
}

inline mat pwr(mat a, int b) {
	mat ret;
	ret.init(a.n, a.m);
	for (int i=1; i<=ret.n; ++i)
		ret.a[i][i] = 1;
	while(b) {
		if(b&1) ret = mul(ret, a);
		a = mul(a, a);
		b >>= 1;
	}
	return ret;
}

int main() {
	scanf("%d%d%d", &m, &n, &alpha);
	while(m--) {
		scanf("%s", str);
		insert();
	}
	getfail();
	mat ans = pwr(build(), n);
	printf("%.10lf\n", (double)ans.a[1][sz+2]);
	return 0;
}

4. BZOJ 1212 L语言

判断一个文章是否能被一个字库表示出来。

直接trie上dp即可,不需要AC自动机。

f[i]表示到第i个单词可行吗

找到最大的i使得f[i]=1即可。

# include <stdio.h>
# include <algorithm>
# include <string.h>
// # include <bits/stdc++.h>

using namespace std;

typedef long long ll;
typedef long double ld;
typedef unsigned long long ull;
const int M = 1200010;

int ch[M][26], sz=0;
bool isend[M];
char str[M];
int f[M];
int n, m;

inline void insert() {
	int len=strlen(str), p=0;
	for (int i=0; i<len; ++i) {
		int c = str[i]-'a';
		if(!ch[p][c]) ch[p][c]=++sz;
		p=ch[p][c];
	}
	isend[p]=1;
}

inline int solve(int x) {
	int len=strlen(str+1);
	f[0]=x;
	int ret = 0;
	for (int i=0; i<=len; ++i) {
		if(f[i] == x) ret = i;
		else continue;
		for (int p=0, j=i+1; j<=len; ++j) {
			p = ch[p][str[j]-'a'];
			if(!p) break;
			if(isend[p]) f[j] = x;
		} 
	}
	return ret;
}
int main() {
	scanf("%d%d", &n, &m);
	for (int i=1; i<=n; ++i) {
		scanf("%s", str);
		insert();
	}
	for (int i=1; i<=m; ++i) {
		scanf("%s", str+1);
		printf("%d\n", solve(i));
	}

	return 0;
}

5. BZOJ 1444 有趣的游戏

题面复杂,略去

还是关注如何判断,能转移到这个状态,然后不能转移出去(之后应该全是1),所以只要一个判断。

全是1的意义同第3题,就是不会再被其他改变的意思。(因为是求某人赢的概率)

还有就是要做足够多次的矩乘,我这里做了$2^{50}$次。

# include <stdio.h>
# include <algorithm>
# include <queue>
# include <string.h>
// # include <bits/stdc++.h>

using namespace std;

typedef long long ll;
typedef long double ld;
typedef unsigned long long ull;
const int M = 500010;
const int N = 27;
const int MM = 510;
int n, m, TSN=0, l;
char str[MM];

int ch[MM][N], sz=0, fail[MM];
bool isend[MM];
ld pos[MM];
int ps[MM];
queue<int> q;


struct mat {
	int n, m;
	ld a[MM][MM];
	inline void init(int _n, int _m) {
		n=_n, m=_m;
		memset(a, 0, sizeof(a));
	}
};

inline void insert() {
	int p=0;
	for (int i=0; i<l; ++i) {
		int c = str[i] - 'A';
		if(!ch[p][c]) ch[p][c] = ++sz;
		p = ch[p][c];
	}
	isend[p] = 1;
	ps[++TSN] = p;
}

inline void getfail() {
	while(!q.empty()) q.pop();
	fail[0] = 0;
	for (int c=0; c<m; ++c) {
		int p = ch[0][c];
		if(p) {
			fail[p] = 0;
			q.push(p);
		}
	}
	while(!q.empty()) {
		int top=q.front(); q.pop();
		for (int c=0; c<m; ++c) {
			int p = ch[top][c];
			if(!p) {
				ch[top][c] = ch[fail[top]][c];
				continue;
			}
			q.push(p);
			int v = fail[top];
			while(v && !ch[v][c]) v = ch[v][c];
			fail[p] = ch[v][c];
			if(isend[fail[top]]) isend[top] = 1;
		}
	}
}

inline mat build() {
	mat ret; ret.init(sz+1, sz+1);
	for (int i=0; i<=sz; ++i) {
		if(isend[i])
			ret.a[i+1][i+1] = 1.0;
		else for (int c=0; c<m; ++c)
			ret.a[i+1][ch[i][c]+1] += pos[c];
	}
	return ret;
}

inline mat mul(mat a, mat b) {
	mat ret;
	ret.init(a.n, b.m);
	for (int i=1; i<=ret.n; ++i)
		for (int j=1; j<=ret.m; ++j) 
			for (int k=1; k<=a.m; ++k) 
				ret.a[i][j] = ret.a[i][j] + a.a[i][k]*b.a[k][j];
	return ret;
}

inline mat pwr(mat a, int b) {
	mat ret;
	ret.init(a.n, a.m);
	for (int i=1; i<=ret.n; ++i)
		ret.a[i][i] = 1;
	while(b) {
		if(b&1) ret = mul(ret, a);
		a = mul(a, a);
		b >>= 1;
	}
	return ret;
}

int main() {
	scanf("%d%d%d", &n, &l, &m);
	for (int i=0; i<m; ++i) {
		int p, q; scanf("%d%d", &p, &q);
		pos[i] = (ld)p/(ld)q;
	}
	for (int i=1; i<=n; ++i) {
		scanf("%s", str);
		insert();
	}
	getfail();
	mat ans=build();
	for (int i=1; i<=50; ++i) ans=mul(ans,ans);
	for (int i=1; i<=n; ++i) 
		printf("%.2lf\n", (double)ans.a[1][ps[i]+1]);
	return 0;
}
/*
3 2 2
1 2
1 2
AB
BA
AA
*/

 

 

Haryana Board Model 说:
2022年9月03日 14:09

Haryana Board Model Paper 2023 Pdf Download for Bhiwani Board (BSEH/HBSE) Sample Question Bank or Question Paper Design for Class 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 (Matric/Secondary), Higher Secondary (Class 11 & 12 Grade) Question Paper for Hindi Medium, English Medium, Urdu Medium and others for Theory, Objective (MCQ) and Bit Questions. Haryana Board Model Paper New Exam Scheme or Question Pattern for Sammittive Assignment Exams (SA1 & SA2): Very Long Answer (VLA), Long Answer (LA), Small Answer (SA), Very Small Answer (VSA), Single Answer, Multiple Choice and etc.


登录 *


loading captcha image...
(输入验证码)
or Ctrl+Enter