Use of the longest common substring _c language

Source: Internet
Author: User
Tags first string terminates
The definition of a substring is similar to that of a substring, but the requirement is continuously distributed among other strings. For example, the longest public string for the input of two strings Bdcaba and ABCBDAB is BD and AB, and they are all 2 of the length.
the longest common substring has a total of two solutions, and here's how I think.
Method One:
There is a difference between longest Common substring and longest Common subsequence.
X = <a, B, C, F, B, c>
Y = <a, B, F, C, a, b>
X and y longest Common sequence for <a, B, C, b&gt, length 4
X and Y longest Common substring is <a, b> length is 2
In fact, the substring problem is a special case of subsequence problem, but also to find two incremental subscript sequence
<i1, I2, ...ik> and <j1, J2, ..., jk> make
Xi1 = = Yj1
Xi2 = = Yj2
......
Xik = = Yjk
Unlike the subsequence problem, the substring problem requires not only that the subscript sequence be incremented, but also that each
The increment is 1, or the two subscript sequence is:
<i, i+1, i+2, ..., i+k-1> and <j, j+1, j+2, ..., j+k-1>
The dynamic programming solution of analogy subquence problem, substring can also be solved by dynamic programming, so
C[I][J] Represents the length of the maximum substring of Xi and Yi, such as
X = <y, E, D, f>
Y = <y, E, K, f>
C[1][1] = 1
C[2][2] = 2
C[3][3] = 0
C[4][4] = 1
The dynamic transfer equation is:
If Xi = = YJ, then c[i][j] = c[i-1][j-1]+1
If XI! = YJ, then c[i][j] = 0
Finally, the length of longest Common substring is equal to
max{C[i][j], 1<=i<=n, 1<=j<=m}
The complete code is as follows:
Copy Code code as follows:

/**
Find the length of the longest common continuous substring of two strings
* * Author:liuzhiwei
* * data:2011-08-16
**/
#include "stdio.h"
#include "string.h"
#include "Stdlib.h"
int longest_common_substring (char *str1, char *str2)
{
int i,j,k,len1,len2,max,x,y;
Len1 = strlen (STR1);
Len2 = strlen (STR2);
int **c = new Int*[len1+1];
for (i = 0; i < len1+1; i++)
C[i] = new int[len2+1];
for (i = 0; i < len1+1; i++)
c[i][0]=0;//the No. 0 column is initialized to 0
for (j = 0; J < len2+1; J + +)
c[0][j]=0;//Line No. 0 is initialized to 0
max =-1;
for (i = 1; i < len1+1; i++)
{
for (j = 1; j < Len2+1; J + +)
{
if (str1[i-1]==str2[j-1])//only to be compared with the top left C[i-1][j-1]
c[i][j]=c[i-1][j-1]+1;
else//is not continuous time also to the left side of the c[i][j-1], the top of the c[i-1][j] value comparison, here do not need
c[i][j]=0;
if (C[i][j]>max)
{
MAX=C[I][J];
X=i;
Y=j;
}
}
}
Output Common substring
Char s[1000];
K=max;
I=x-1,j=y-1;
S[k--]= ' ";
while (i>=0 && j>=0)
{
if (Str1[i]==str2[j])
{
S[k--]=str1[i];
i--;
j--;
}
else//As long as there is an inequality, it means that the equal common characters are broken, discontinuous
Break
}
printf ("Longest common substring is:");
Puts (s);
for (i = 0; i < len1+1; i++)/Free two-dimensional array of dynamically requested
Delete[] c[i];
Delete[] C;
return Max;
}
int main (void)
{
Char str1[1000],str2[1000];
printf ("Please enter the first string:");
Gets (STR1);
printf ("Please enter a second string:");
Gets (STR2);
int len = longest_common_substring (str1, str2);
printf ("The length of the longest public continuous substring is:%d\n", Len);
System ("pause");
return 0;
}

The effect chart is as follows:

Method Two:
Write the string S1 and S2 on the two rulers (I still use s1,s2 to denote these two rulers), the S1 is then fixed, the S2 head is aligned with the tail of the S1, and the ruler is gradually s2, comparing the length of the common substring in the overlapping part of the string until the ruler S2 moves to the head of the S1. The maximum length obtained in this process is the length of the maximum substring of S1 and S2.
The following figure is an illustration of the solution process (the following is a bit of an error, it should be moving S2 from right to left), the blue section represents overlapping strings, and the red part represents the same substring of overlapping parts
Among them s1= "Shaohui", s2= "Ahui", finally obtained the result is 3

The complete code is as follows:
Copy Code code as follows:

/**
Find the length of the longest common continuous substring of two strings
* * Author:liuzhiwei
* * data:2011-08-16
**/
#include "stdio.h"
#include "string.h"
#include "Stdlib.h"
int longest_common_substring (char *str1, char *str2)
{
int I,len1,len2,len,s1_start,s2_start,idx,curmax,max;
Len1 = strlen (STR1);
Len2 = strlen (STR2);
Len = len1 + len2;
max = 0;
for (i = 0; i < len; i++)
{
S1_start = S2_start = 0;
if (I < LEN1)
S1_start = Len1-i; Starting at the start of each match
Else
S2_start = I-len1;
Curmax = 0;
for (idx = 0; (S1_start + idx < len1) && (S2_start + idx < len2); idx++)
{
if (Str1[s1_start+idx]==str2[s2_start+idx])
curmax++;
else//As long as there is an inequality, the equal common characters are broken, discontinuous, to save the maximum value in Curmax and Max, and reset the Curmax to 0
{
max = Curmax > Max? Curmax:max;
Curmax = 0;
}
}
max = Curmax > Max? Curmax:max;
}
return Max;
}
int main (void)
{
Char str1[1000],str2[1000];
printf ("Please enter the first string:");
Gets (STR1);
printf ("Please enter a second string:");
Gets (STR2);
int len = longest_common_substring (str1, str2);
printf ("The length of the longest public continuous substring is:%d\n", Len);
System ("pause");
return 0;
}

The effect chart is as follows:

If you change it a little bit, you can output a common substring, which is to save a continuous common substring the subscript position of the last character in one of the strings:
Copy Code code as follows:

/**
Find the length of the longest common continuous substring of two strings
* * Author:liuzhiwei
* * data:2011-08-16
**/
#include "stdio.h"
#include "string.h"
#include "Stdlib.h"
int longest_common_substring (char *str1, char *str2)
{
int I,k,len1,len2,len,s1_start,s2_start,idx,curmax,max;
Len1 = strlen (STR1);
Len2 = strlen (STR2);
Len = len1 + len2;
max = 0;
for (i = 0; i < len; i++)
{
S1_start = S2_start = 0;
if (I < LEN1)
S1_start = Len1-i; Starting at the start of each match
Else
S2_start = I-len1;
Curmax = 0;
for (idx = 0; (S1_start + idx < len1) && (S2_start + idx < len2); idx++)
{
if (Str1[s1_start+idx]==str2[s2_start+idx])
curmax++;
else//As long as there is an inequality, the equal common characters are broken, discontinuous, to save the maximum value in Curmax and Max, and reset the Curmax to 0
{
max = Curmax > Max? Curmax:max;
if (Curmax > Max)
{
max = Curmax;
K = s1_start+idx-1; The subscript position of the last character of a continuous substring in a str1 string when the length of the continuous substring is increased, which facilitates the output of the common continuous substring
}
Curmax = 0;
}
}
max = Curmax > Max? Curmax:max;
if (Curmax > Max)
{
max = Curmax;
K = s1_start+idx-1;
}
}
Output Common substring
Char s[1000];
for (i=0;i<max;i++)
S[i]=str1[k-max+1+i]; public string in str1 starting position is k-max+1, end position is K
S[i]= ' ";
printf ("Longest common substring is:");
Puts (s);
return Max;
}
int main (void)
{
Char str1[1000],str2[1000];
printf ("Please enter the first string:");
Gets (STR1);
printf ("Please enter a second string:");
Gets (STR2);
int len = longest_common_substring (str1, str2);
printf ("The length of the longest public continuous substring is:%d\n", Len);
System ("pause");
return 0;
}

The effect chart is as follows:

extensions: Substrings can also be drag, such as Hdoj 1238
The title means to search for the longest substring
Gives a series of strings, several substrings can be drag
Rose
Orchid
The longest substring here is ro with or length 2.
If the search is exhaustive, it will not be over.
So you can find the shortest string in all strings, and enumerate the substrings of the shortest string.
To determine whether it is a substring of another string, find the maximum length.
Copy Code code as follows:

/**
Find the length of the longest common continuous substring of two strings
* * Author:liuzhiwei
* * data:2011-08-16
**/
#include "stdio.h"
#include "string.h"
#include "Stdlib.h"
Char str[100][100];
int k;
int match (int start,int end,int N)//Shortest start subscript, end subscript, total number of strings
{
int i,j,len,p,h;
for (i=0;i<n;i++)
{
if (i==k)
Continue
Len=strlen (Str[i]);
The for (j=0;j<=len-1-end+start;j++)//str[i] string can consist of a contiguous substring of len-1-end+start length End-start
{
for (p=start,h=j;p<=end;p++,h++)//sequential judgment substring
{
if (Str[k][p]!=str[i][h])////= jump out
Break
}
if (p>end)//If all are equal, the match succeeds and terminates
Break
For (p=end,h=j;p>=start;p--, h++)//reverse Order judgment substring
{
if (Str[k][p]!=str[i][h])////= jump out
Break
}
if (P<start)//If all are equal, the match succeeds and terminates
Break
}
if (J>len-1-end+start)///If the search is completed without termination, there is no match
return 0;
}
return 1;
}
int main (void)
{
int T,i,j,n,len,minlen,flag;
scanf ("%d", &t);
while (t--)
{
minlen=1000,flag=0;
scanf ("%d", &n);
for (i=0;i<n;i++)
{
scanf ("%s", Str[i]);
Len = strlen (Str[i]);
if (Len<minlen)
{
Minlen=len; Save the length of the shortest string
K=i; To save the ordinal number of the shortest string
}
}
for (i=0;i<minlen;i++)///to match a sequential string of shortest string lookup
{
for (j=0;j<=i;j++)
{
if (Match (j,j+minlen-1-i,n))//SUBSTRING matches
{
flag=1;
Break
}
}
if (flag==1)
Break
}
printf ("%d\n", minlen-i);
}
System ("pause");
return 0;
}

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.