Template--Reference Liu Ru Home Training Guide
/*===============================* according to the training guide \*===============================*/#include <cstring> #include <queue> #include <cstdio> #include <map> #include <string>using namespace Std;const int Sigma_ SIZE = 26;const int maxnode = 11000;const int maxs = max + 10;map<string,int> ms;struct ahocorasickautomata {int c H[maxnode][sigma_size]; int F[maxnode]; Fail function int Val[maxnode]; Each end node of a string has a Val int last[maxnode] that is not 0; The next node of the output list is int cnt[maxs]; int sz; void init () {sz = 1; memset (Ch[0], 0, sizeof (ch[0)); memset (CNT, 0, sizeof (CNT)); Ms.clear (); } inline void Clear () {memset (cnt,0,sizeof (CNT));} Assuming that text is more than just one word, it is often necessary to empty the CNT array//character c number inline int idx (char c) {return C ' a ') every time you find it; Be careful here, assuming you don't have a given range of characters. direct return C;//due to possible negative ... The problem with virus attack is}//insert String.V must be non 0 void insert (char *s, int v) {int u = 0, n = strlen (s); for (int i = 0; i < n; i++) {int c = idx (s[i]); if (!ch[u][c]) {memset (Ch[sz], 0, sizeof (Ch[sz])); Val[sz] = 0; CH[U][C] = sz++; } u = ch[u][c];//u is the position of the first dimension of CH stored in the next node, which is equivalent to the NXT in my trie} val[u] = V; V is the additional information that best separates each word so that//cnt can record what has appeared and how many times ms[string (s)] = V; }//recursively print all strings ending with node J void print (int j) {if (j) {Cnt[val[j]]++;//val[j] is the number of the word. Ms stores the corresponding numbers and words, which can be used to print the word "print" (Last[j]); }}//in T find template int find (char* T) {int n = strlen (t); int j = 0; Current node number, initial root node for (int i = 0; i < n; i++) {//text string current pointer int c = IDX (T[i]); while (J &&!ch[j][c]) j = f[j]; Walk along the thin edge until you can match j = Ch[j][c]; if (Val[j]) print (j);//To the end of the word else if (Last[j]) print (last[j]); Got it! }}//calculate the Fail function void Getfail () {queue<int> q; F[0] = 0; Initialize queue for (int c = 0; c < sigma_size; C + +) {int u = ch[0][c]; if (u) {f[u] = 0; Q.push (u); last[u] = 0;} }//because the first character mismatch needs to be matched again,//So the first character points to root (root is the trie entry, no actual meaning)//meaning that all words the first character of the f[] are equal to 0. The fail pointer to node e points to root to indicate that there is no matching sequence//in BFS sequence to calculate the fail while (!q.empty ()) {int r = Q.front (); Q.pop (); for (int c = 0; c < sigma_size; C + +) {int u = ch[r][c]; if (!u) continue; Q.push (U); int v = f[r]; while (v &&!ch[v][c]) v = f[v];//ch[v][c]==0, it means that there is no continuation of the matching letter edge. is also unable to continue matching, so continue along the mismatch function walk f[u] = Ch[v][c]; Last[u] = Val[f[u]]? F[u]: Last[f[u]; LAST[J] Node J goes back along the adapter pointer, and encounters the next word node number//last is to solve after finding a word, see if there are other strings included}}}; Ahocorasickautomata AC;
1, see a range of characters, change sigma_size and IDX function
Copyright notice: This article blog original articles, blogs, without consent, may not be reproduced.
AC Own Active Machine summary