Various algorithms for string matching (single-mode matching)

Source: Internet
Author: User
# Include <iostream> # include <string> # include <cstring> # include <cstdio> using namespace std; const int N = 100000; // The maximum length of the text string const int M = 100; // the maximum length of the mode string int n; // the actual length of the text string int m; // the actual length of the mode string char T [N]; // The text string char P [M]; // The mode string int pre [N]; // prefix function in kmp /******************************** **************************************** * ** string matching-Simple Matching Algorithm: if the substring of string T exists from the second (s subscript 0 ≤ S <n-m + 1) and is the same as that of string P, the match is successful, returns the subscript of the first substring in string T. Otherwise,-1 is returned. **************************************** * ***********************************/Int Index_BF () {int s = 0, j = 0; while (s <= n-m) {if (T [s + j] = P [j]) j ++; // match one character. Continue to compare the last character else {s ++; j = 0; // S to move a position and start a new round of matching, pointer of mode P returns to header} if (j = m) return s; // returns subscript} return-1; // The string T does not exist }/************************* **************************************** * ******** string matching-Sunday algorithm: during the matching process, the mode string is not It must be compared from left to right or from right to left. When no matching is found, the algorithm can skip as many characters as possible for the next match, this improves the matching efficiency. The idea of the Sunday algorithm is similar to that of the BM algorithm. When a match fails, the next character of the last character to be matched in a text string is concerned. • If this character does not appear in the matching string, skip it directly, that is, move the step size = the length of the mode string + 1; • otherwise, move step size = distance from the rightmost character in the mode string to the end of the mode string + 1. **************************************** * ***********************************/Int Sunday () {int n = strlen (T); // the length of the text string int m = strlen (P); // The length of the mode string int next [256] = {0 }; // The shortest distance from each character in the record mode string to the rightmost + 1 value for (int j = 0; j <256; ++ j) {// Initialize all the values of the shortest distance + 1 from each character to the rightmost to m + 1, that is, the maximum value next [j] = m + 1 ;} for (int j = 0; j <m; ++ j) {// The shortest distance from each character in the record mode string to the rightmost + 1 // example: p = "abcedfb" // next = {7 1 5 4 3 2 8 8 8 8 ........} next [(Int) P [j] = m-j;} int pos = 0; while (pos <(n-m + 1 )) // end alignment {int I = pos; int j; for (j = 0; j <m; ++ j, ++ I) {if (T [I]! = P [j]) {// skip if it is not equal to the core pos + = next [(int) T [pos + m]; break ;}} if (j = m) return pos;} return-1 ;} /*************************************** * *********************************** string matching -- ZZL algorithm: first, find the first letter of the pattern string P in the text string T. Store the position of the pattern string every time it is found, and extract these positions in sequence. From these locations, continue to match the pattern string P. For frequently-used master and mode strings to be matched, the matching speed is very fast because all storage locations of the mode string in the master string are saved in advance. **************************************** * ***********************************/Int k; // record the number of times that the first letter of the mode string appears in the main string int v; // record the number of times that the first letter of the mode string P appears in the main string int x [N]; // The position where the first P character of the mode string appears in the text string, and save it in an array x int s [N]; // when a pattern string matches a text string, the subscript int zzl () {int n = strlen (T) of x [I] is recorded ); // The length of the text string int m = strlen (P); // The length of the mode string // The algorithm k = 0 for the first character of the search mode string P; for (int I = 0; I <n-m + 1; I ++) {if (T [I] = P [0]) {x [k] = I; k ++ ;}} // matching algorithm v = 0; int j; for (int I = 0; I <k; I ++) {for (j = 1; j <m; j ++) {if (T [(x [I] + j)]! = P [j]) {break ;}}if (j = m) {s [v] = I; v ++ ;}} return v ;} /*************************************** * *********************************** string match -- RK algorithm: hash a string (hash). That is, a numeric value is hashed for a string in the text mode with a length of m. Then, you only need to compare the numeric value; then, use the simple algorithm to compare strings based on the values. **************************************** * **********************************/Bool NativeStringMatcher (const char * T, int s, const char * P) // Simple Matching Algorithm, Rabin_Karp call {int m = strlen (P); int j; for (j = 0; j <m; j ++) {if (T [s + j]! = P [j]) {return false ;}} if (j = m) {return true;} return false;} void Rabin_Karp (int d, int q) // RabinKarp algorithm {int n = strlen (T); int m = strlen (P); int h = 1; for (int I = 0; I <m-1; I ++) // calculate h = d ^ (S-1) mod q {h * = d; // h = h * d, pow may cross the border, so use multiplication if (h> = q) {h % = q; // h = h % q} int p = 0; int t = 0; for (int I = 0; I <m; I ++) // preprocessing, calculate p and t {p = (d * p + (P [I]-'0') % q; // P [I]-'0' indicates the number of characters to be converted. T = (d * t + (T [I]-'0') % q ;}for (int I = 0; I <n-m + 1; I ++) {printf ("t % d = % d \ n", I, t); if (p = t) {if (NativeStringMatcher (T, I, p) {printf ("matched position: % d \ n", I);} else {printf ("pseudo hit point: % d \ n ", i) ;}} if (I <n-m) {t = (d * (t-h * (T [I]-'0 ')) + T [I + m]-'0') % q; if (t <0) {t + = q ;}}}} /*************************************** * *********************************** string matching-KMP algorithm: in In case of mismatch, the text string does not need to be traced back. Instead, the obtained "partially matched" result is used to shift the right of the pattern string as far as possible and continue the comparison. It should be emphasized that the mode string does not necessarily move the position of a character to the right; the right shift does not necessarily have to re-try the match from the start point of the mode string; that is, the position where the mode string can be shifted to multiple characters at a time, and the position after the right shift can start from somewhere after the start of the mode string to try matching. **************************************** *************************************/// Computing mode P prefix function void compute_preflx () {int k = 0; // calculate the longest prefix of the substring pre [1] = 0; // The prefix function, starting from subscript 1 for (int q = 2; q <= m; ++ q) // calculate the prefix function value of a pair of mode strings starting from 2nd characters {while (k> 0 and P [k + 1]! = P [q]) // There is no largest prefix k = pre [k]; if (P [k + 1] = P [q]) k ++; pre [q] = k;} for (int I = 1; I <= m; ++ I) cout <pre [I] <""; cout <endl;} void kmp () {int q = 0; // The number of matched characters. It is also compute_preflx () used as the subscript of mode p (); // calculate the prefix function for (int I = 1; I <= n; ++ I) of pattern p. // scan text characters from left to right, pointer I is not retracted {while (q> 0 and P [q + 1]! = T [I]) // when the next character in mode p does not match the text character, q = pre [q]; // The subscript of pattern p needs to be reduced if (P [q + 1] = T [I]) // q ++ when the next character in mode p matches the text character; // The subscript of mode p + 1 if (q = m) // All characters of mode p match the text character {cout <"s =" <I-m <endl; // print out the valid displacement s q = pre [q]; // find the next match }}}/******************************* **************************************** * *** string matching-Horspool algorithm: for each text search window, compare the last character (such as β) in the window with the last character of the mode string. If they are equal, verify the other characters from the back to the front until they are completely equal or some character does not match. Then, the window is moved to the right based on the next position of β in the pattern string, regardless of whether it matches or not. **************************************** * **********************************/Void HorspoolMatch () {int n = strlen (T); // the length of the text string int m = strlen (P); // The length of the mode string if (m> n) {return ;} short skip [256]; // for (int I = 0; I <256; I ++) {skip [I] = m ;}// the shortest distance from each character in the computing mode string P to the rightmost (int I = 0; I <m-1; I ++) {skip [P [I] = m-I-1;} int pos = 0; while (pos <= n-m) {int j = m-1; // match the while (j> = 0 & T [pos + j] = P [j]) {j --;} if (j <0) {cout <"an occurrence at:" <pos <endl ;} pos = pos + skip [T [pos + m-1]; // jump to the position on the rightmost side of the mode string with an on-1 character} int main () {while (gets (T) {gets (P); kmp (); HorspoolMatch (); Rabin_Karp (10, 13); zzl (); Sunday ();} return 0 ;}

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.