Блог пользователя Normie28

Автор Normie28, история, 5 лет назад, По-английски

/* Hi. This is my current implementation for Aho-Corasick suffix automaton. The example below is for problem https://open.kattis.com/problems/stringmultimatching However, I couldn't get it working, as I always get Wrong Answer when submitting with this code. Can you help me debug this code? Thanks. */ #include <bits/stdc++.h> using namespace std; #include <ext/pb_ds/assoc_container.hpp> #include <ext/pb_ds/tree_policy.hpp> using namespace __gnu_pbds; #define rep(i,n) for(int64_t i=0;i < (int64_t)(n);i++) #pragma comment(linker, "/stack:200000000") #pragma GCC optimize("Ofast") #pragma GCC target("sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,tune=native") #define FILE_IN "birds.inp" #define FILE_OUT "birds.out" #define ofile freopen(FILE_IN,"r",stdin);freopen(FILE_OUT,"w",stdout) #define fio ios::sync_with_stdio(0);cin.tie(0);cout.tie(0) #define nfio cin.tie(0);cout.tie(0) #define max(x,y) (((x)>(y))?(x):(y)) #define min(x,y) (((x)<(y))?(x):(y)) #define ord(a,b,c) ((a>=b)and(b>=c)) #define MOD (ll(1000000007)) #define MAX 300001 #define mag 320 #define p1 first #define p2 second.first #define p3 second.second #define fi first #define se second #define pow2(x) (ll(1)<<x) #define pii pair<int,int> #define piii pair<int,pii> #define For(i,__,___) for(int i=__;i<=___;i++) #define Rep(i,__,___) for(int i=__;i>=___;i--) #define ordered_set tree<long long,null_type,less<long long>,rb_tree_tag,tree_order_statistics_node_update> #define endl "\n" #define bi BigInt #define pi 3.1415926535897 typedef long long ll; struct node // Structure representing a node. { int parent,slink,trans[128]; /* parent: id of node's parent. 0 if this node is the root. slink: id of node's direct suffix link. 1 if this node is the root. trans[i]: The next node's id if the next character in the automaton is i. Will be found recursively using slink negative: use suffix link positive: use trie link 0: not set -1 if this node is the root and there's no trie link with this character. */ int pedge; /* pedge: character of the edge connecting parent of node to node */ vector<int> leaf; /* leaf: list of ids of strings that are suffixes of the string represented by current node. Will be found recursively using slink */ node(int par=0 ,int edge=0) // Constructor { for (int i=0;i<128;i++) trans[i]=0; parent=par; slink=0; pedge=edge; leaf={}; } }; struct aho_cora // Structure representing a suffix automaton. { private: vector<node> trie; // A vector of nodes representing the trie. int scnt=0; // Count of strings added to the trie. Will be used to number the strings added to the trie. int add_node (int par, int last) /* Add a node into the tree with parent par and edge character last. */ { trie.push_back(node(par,last)); int cur=trie.size()-1; return cur; } void refresh_node(int cur) /* Add the suffix links and transition table for node cur, then calls the function for its children. */ { if (trie[cur].parent<=1) trie[cur].slink=1; // If string represented by node has <=1 character, then it has no suffixes, so its direct suffix link is 1. else trie[cur].slink=abs(trie[trie[trie[cur].parent].slink].trans[trie[cur].pedge]); /* "to find a suffix link for some vertex v, then we can go to the ancestor p of the current vertex (let c be the letter of the edge from p to v), then follow its suffix link, and perform from there the transition with the letter c." */ for (int i=0;i<128;i++) if (trie[cur].trans[i]<=0) trie[cur].trans[i]=-abs(trie[trie[cur].slink].trans[i]); /* For transitions not using trie links, import the transition results from the node's suffix link node. */ for (int g: trie[trie[cur].slink].leaf) trie[cur].leaf.push_back(g); /* Adds the suffix link node's leaf vector to the node's leaf vector. */ for (int i=0;i<128;i++) if (trie[cur].trans[i]>0) refresh_node(trie[cur].trans[i]); /* Calls refresh_node() for its children. */ } public: aho_cora() // Constructor { trie.push_back(node(0)); trie.push_back(node(0)); trie[1].parent=0; trie[1].slink=1; trie[1].leaf={}; for (int i=0;i<128;i++) trie[1].trans[i]=-1; } void add_string (string s) //Adds a string to the trie. { scnt++; //Increments the string counter. This string's id is now scnt. int cur=1,n=s.length(); //Starts at node 1 for (int i=0;i<n;i++) { if (trie[cur].trans[s[i]]<=0) //If there's no trie link with this character yet, create a new one (along with a new node) { int ass=add_node(cur,s[i]); trie[cur].trans[s[i]]=ass; } cur=trie[cur].trans[s[i]]; //Transitions to next node via the trie link for the current character. } trie[cur].leaf.push_back(scnt); //Once we've arrived at the node for the string, add the string id to its leaf vector. } vector<pii> check_string(string s) /* Returns all occurences of pattern strings in the given text, in the form of a vector of pairs. First element is the end position of the matching pattern string (relative to the text). Second element is the id of the matching patten string (pattern strings are numbered from 1 to n, based on when you added them) */ { vector<pii> res; int n=s.length(); int cur=1; // Starts at node 1 for (int i=0;i<n;i++) { cur=abs(trie[cur].trans[s[i]]); // Transitions to next node based on the current character. for (int g: trie[cur].leaf) { res.push_back({i,g}); //Adds the node's leaf array to the results array. } } return res; } void check_graph() // Debug { for (int i=1;i<trie.size();i++) { cout<<i<<' '<<trie[i].parent<<' '<<trie[i].slink<<' '<<endl; for (int j=0;j<128;j++) cout<<trie[i].trans[j]<<' '; cout<<endl; for (int g: trie[i].leaf) cout<<g<<' '; cout<<endl; } } void refresh() // Refreshes the tree; see refresh_node() { refresh_node(1); } }; ll n,m,k,c[501],t,t1,i,j,res; vector<ll> gt[51]; int main() { while(1) { n=0; cin>>n; cin.ignore(); aho_cora graph; if (!n) break; int len[n]; for (i=0;i<n;i++) { string s; getline(cin,s); len[i]=s.length(); graph.add_string(s); } graph.refresh(); string s; getline(cin,s); vector<int> match[100001]; vector<pii> res=graph.check_string(s); for (i=0;i<res.size();i++) match[res[i].se].push_back(res[i].fi); for (i=0;i<n;i++) { for (int g: match[i+1]) cout<<g-len[i]+1<<' '; cout<<endl; } } }
  • Проголосовать: нравится
  • -12
  • Проголосовать: не нравится

»
5 лет назад, # |
  Проголосовать: нравится +12 Проголосовать: не нравится

You need to calculate the links in bfs order, rather than dfs to ensure you don't use links from nodes not yet processed.
AC with bfs (modified the refresh method).

  • »
    »
    5 лет назад, # ^ |
      Проголосовать: нравится 0 Проголосовать: не нравится

    Thank you! It was an oversight on my part. I initially thought you could get the transition table just from the node's parent, without also considering the node's suffix link.

»
5 лет назад, # |
  Проголосовать: нравится +1 Проголосовать: не нравится

big pp