Description
Beside other services, ACM helps companies-clearly state their ' corporate identity ', which includes company logos but AL So and signs, like trademarks. One of such companies is Internet Building Masters (IBM), which have recently asked ACM for a to help with their new identity. IBM do not want to change their existing logos and trademarks completely, because their customers is used to the old one S. Therefore, ACM would only change existing trademarks instead of creating new ones.
After several other proposals, it is decided to take all existing trademarks and find the longest common sequence of Lett ERS is contained in all of them. This sequence is graphically emphasized to form a new logo. Then, the old trademarks may still is used while showing the new identity.
Your task is to find such a sequence.
Input
The input contains several tasks. Each task begins with a line containing a positive integer N, the number of trademarks (2≤n≤4000). The number is followed by N lines and each containing one trademark. Trademarks'll is composed only from lowercase letters, the length of each trademark would be at least 1 and at most C Haracters.
After the last trademark, the next task begins. The last task was followed by a line containing zero.
Output
For each task, output a single line containing the longest string contained as a substring in all trademarks. If there is several strings of the same length, print the one and that is lexicographically smallest. If There is no such non-empty string, output the words "IDENTITY LOST" instead.
Sample Input
3aabbaabbabbababbbbbbbabb2xyzabc0
Sample Output
Abbidentity LOST
The main idea: to give multiple strings, find the longest and the dictionary order of the smallest common substring.
There are two ways of doing this:
(1). Use KMP. Each substring is prefixed with a suffix. Enumerates each suffix of the shortest string suff (i), so that all other strings go with the Suff (i) to do KMP matches, will get a suff (i) of the common prefix pre_suff (i), choose a longest pre_ Suff
That is the answer. When enumerating suffixes by dictionary-ordered enumeration, you can omit the process of comparing pre_suff with the same length.
(2). Use a suffix array. First concatenate all the strings into a long string, after the height array, two minutes to enumerate the length of the longest common substring of mid, and then divide the array into successive intervals based on whether the height value is not less than mid. View each area
, a common substring with a length of mid is present, as long as the prefixes in a certain interval come from all the strings. But I've been tle.
The code is as follows (the first approach):
#include <iostream> #include <cstdio> #include <cstring> #include <algorithm>using namespace std;const int n=4005;int sa[205];int tsa[205];int rk[205];int cnt[205];bool issame (int *y,int i,int j,int k,int N) {if ( Y[I]!=Y[J]) return false; if (i+k<n&&j+k>=n) return false; if (i+k>=n&&j+k<n) return false; return y[i+k]==y[j+k];} void Buildsa (char* str,int n) {int *x=rk; int *y=tsa; int m=26; for (int i=0;i<m;++i) cnt[i]=0; for (int i=0;i<n;++i) ++cnt[x[i]= (str[i]-' a ')]; for (int i=1;i<m;++i) cnt[i]+=cnt[i-1]; for (int i=n-1;i>=0;--i) sa[--cnt[x[i]]]=i; for (int k=1;k<=n;k<<=1) {int p=0; for (int i=n-k;i<n;++i) y[p++]=i; for (int i=0;i<n;++i) if (sa[i]>=k) y[p++]=sa[i]-k; for (int i=0;i<m;++i) cnt[i]=0; for (int i=0;i<n;++i) ++cnt[x[y[i]]; for (int i=1;i<m;++i) cnt[i]+=cnt[i-1]; for (int i=n-1;i>=0;--i) sa[--cnt[x[y[i]]]]=y[i]; P=1; Swap (x, y); x[sa[0]]=0; for (int i=1;i<n;++i) x[sa[i]]=issame (y,sa[i],sa[i-1],k,n)? p-1:p++; if (p>=n) break; M=p; }}char tdmks[n][205];int nxt[205];void getNext (char* str,int str_len) {nxt[0]=nxt[1]=0; for (int i=1;i<str_len;++i) {int j=nxt[i]; while (J&&str[i]!=str[j]) j=nxt[j]; Nxt[i+1]= (Str[i]==str[j])? j+1:0; }}int Match (char* str,int str_len,char* ptr) {int ptr_len=strlen (PTR); int templen=0,k=0; for (int. i=0;i<ptr_len;++i) {while (k&&ptr[i]!=str[k]) k=nxt[k]; if (Str[k]==ptr[i]) {++k; Templen=max (TEMPLEN,K); }} return Templen;} int Getlongestpre (char* str,int str_len,int cnt_tdmks) {getNext (Str,str_len); int long_pre=n; for (int i=0;i<cnt_tdmks;++i) {long_pre=min (Match (Str,str_len,tdmks[i)), long_pre); } return long_pre;} int input (int cnt_tdmks) {int minlen=n,id_minlen; for (int i=0;i<cNt_tdmks;++i) {scanf ("%s", Tdmks[i]); if (strlen (Tdmks[i]) <minlen) {Minlen=strlen (tdmks[i]); Id_minlen=i; }} return Id_minlen;} void Solve (int p,int cnt_tdmks) {int M=strlen (tdmks[p]); BUILDSA (TDMKS[P],M); int ans_len=0,ans_p; for (int i=0;i<m;++i) {int len=getlongestpre (TDMKS[P]+SA[I],M-SA[I],CNT_TDMKS); if (Len>ans_len) {Ans_len=len; Ans_p=sa[i]; }} if (Ans_len) {for (int i=0;i<ans_len;++i) printf ("%c", Tdmks[p][ans_p+i]); printf ("\ n"); }else{printf ("IDENTITY lost\n"); }}int Main () {//freopen ("In.txt", "R", stdin); int n; while (scanf ("%d", &n) &&n) {Solve (input (n), n); } return 0;}
POJ-3450 Corporate Identity (kmp+ suffix array)