A DNA molecule consists of the strands that wrap around each other to resemble a twisted ladder whose sides, made of sugar and phosphate molecules, is connected by rungs of nitrogen-containing chemicals called bases. Each strand was a linear arrangement of repeating similar units called nucleotides, which are each composed of one sugar, O NE phosphate, and a nitrogenous base. Four different bases is present in Dna:adenine (A), thymine (T), cytosine (C), and guanine (G). The particular order of the bases arranged along the sugar-phosphate backbone is called the DNA sequence; The sequence specifies the exact genetic instructions required to create a particular organism with its own unique traits.
Geneticists often compare DNA strands and is interested in finding the longest common base sequence in the strands. Note that these strands can be represented as strings consisting of the lettersa, t, C and G.so, the longest common sequence in the strands atgc and tga are TG. It's entirely possible that's different common sequences exist that's the same length and is the longest possible co Mmon sequences. For example in the strands atgc and GCTG, the longest common sequences isGC and TG.
Input and OutputWrite A program that accepts as input strings representing DNA strands, and prints as output the longest common se Quence (s) in lexicographical order.
If there isn ' t any common sequence between the strings, just print: 'No common sequence.'
If there is more than one test cases, it must is a blank line between the consecutive, both in input and output files.
The strings is at most characters-long.
Sample Input
Atgctgaatgcgctg
Sample Output
Tggctg
0ms
AC Code
#include <stdio.h> #include <string.h> #include <algorithm> #include <iostream> #define MIN (A, b ) (A>B?B:A) using namespace std; Char str1[660],str2[660];int sa[660],c[660],t2[660];int t1[660],s[660]; int rank[660],height[660]; int len1,len2; void Build_sa (int s[],int n,int m) {int i,j,p,*x=t1,*y=t2; for (i=0;i<m;i++) c[i]=0; for (i=0;i<n;i++) c[x[i]=s[i]]++; for (i=1;i<m;i++) c[i]+=c[i-1]; for (i=n-1;i>=0;i--) sa[--c[x[i]]]=i; for (j=1;j<=n;j<<=1) {p=0; for (i=n-j;i<n;i++) y[p++]=i; for (i=0;i<n;i++) if (sa[i]>=j) y[p++]=sa[i]-j; for (i=0;i<m;i++) c[i]=0; for (i=0;i<n;i++) c[x[y[i]]]++; for (i=1;i<m;i++) c[i]+=c[i-1]; for (i=n-1;i>=0;i--) sa[--c[x[y[i]]]]=y[i]; Swap (x, y); P=1; x[sa[0]]=0; for (i=1;i<n;i++) x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+j]==y[sa[i]+j]?p-1:p++; if (p>=n) break; M=p; }} void GetHeight (int s[],int n) {int i,j,k=0; for (i=0;i<=n;i++) rank[sa[i]]=i; for (i=0;i<n;i++) {if (k) k--; J=SA[RANK[I]-1]; while (S[i+k]==s[j+k]) k++; Height[rank[i]]=k; }}int judge (int len,int k) {int i;for (i=1;i<=len;i++) {if (height[i]>=k) {if (sa[i]>len1&&sa[i-1]<= LEN1) return 1;if (SA[I-1]>LEN1&&SA[I]<=LEN1) return 1;}} return 0;} int main () {int flag=0;while (scanf ("%s%s", STR1,STR2)!=eof) {int i,j,k;if (flag) printf ("\ n"); Flag=1;len1=strlen (STR1) ; Len2=strlen (str2); for (i=0;i<len1;i++) {s[i]=str1[i]-' a ' +1;} S[len1]=27;int n=len1+1;for (i=0;i<len2;i++) s[n++]=str2[i]-' a ' +1;s[n]=0;build_sa (s,n+1,28); GetHeight (s,n); int L=0,r=min (Len1,len2), Ans=0;while (l<=r) {int Mid= (L+R) >>1;if (judge (N,mid)) {ans=mid;l=mid+1;} Elser=mid-1;} if (!ans) {printf ("No common sequence.\n"); continue;} printf ("%d%d\n", n,len1+len2+2); for (i=1;i<=n;i++) {if (Height[i]>=ans) {for (J=i;j<=n&&height[j] >=ans;j++); for (k=i;k<j;k++) {if (SA[K]>LEN1&&SA[K-1]<LEN1) break;if (sa[k-1]>len1&& SA[K]<LEN1) break;} if (j!=k) {int st;for (st=0;st<ans;st++) {printf ("%c", s[sa[k]+st]+ ' a '-1);} printf ("\ n");} i=j-1;}}}}
Copyright NOTICE: This article for Bo Master original article, without Bo Master permission not reproduced.
UVA Topic 760 DNA sequencing (suffix array for two-string longest common substring, dictionary-ordered output)