BM pattern matching algorithm-implementation (c)

Source: Internet
Author: User

Address: http://ouyangjia7.javaeye.com/blog/353137

# Include "messageformat. H"
# Include <iostream>

Using namespace STD;

/* Int processfile ();
{

}*/

/*
Function: int * makeskip (char *, INT)
Objective: To create a bad character table by preprocessing based on bad character rules
Parameters:
Ptrn => mode string P
Plen => pattern string P Length
Return Value:
Int *-bad orders table
*/
Int * makeskip (char * ptrn, int Plen)
{
Int I;
// Apply for 256 int space for creating a bad Partition Table
/* PS: the reason why 256 requests are required is that one character is 8 characters long,
Therefore, the characters may have the power of 2, that is, 256 different cases */
Int * Skip = (int *) malloc (256 * sizeof (INT ));

If (skip = NULL)
{
Fprintf (stderr, "malloc failed! ");
Return 0;
}

// Initialize the bad orders table. All the 256 units are initialized to Plen.
For (I = 0; I <256; I ++)
{
* (Skip + I) = Plen;
}

// Assign values to the units that need to be assigned in the table. No value is required for the characters that do not appear in the mode string.
While (Plen! = 0)
{
* (Skip + (unsigned char) * ptrn ++) = Plen --;
}

Return skip;
}

/*
Function: int * makeshift (char *, INT)
Objective: To create a suffix table by pre-processing according to the suffix rules
Parameters:
Ptrn => mode string P
Plen => pattern string P Length
Return Value:
Int *-suffix table
*/
Int * makeshift (char * ptrn, int Plen)
{
// Apply for Plen int space for a table with a better suffix
Int * shift = (int *) malloc (Plen * sizeof (INT ));
Int * sptr = Shift + Plen-1; // it is convenient to assign values to the suffix table.
Char * pptr = ptrn + Plen-1; // records the index of the boundary position of the suffix table.
Char C;

If (shift = NULL)
{
Fprintf (stderr, "malloc failed! ");
Return 0;
}

C = * (ptrn + Plen-1); // The last character in the Save mode string, because it must be used repeatedly

* Sptr = 1; // when the last character is the boundary, the distance from 1 is determined.

Pptr --; // move the boundary to the second-to-last character (this sentence is added by myself, because I always feel that if it is not added, there will be a bug. Let's try "abcdd, that is, the last two duplicates)

While (sptr --! = Shift) // The outermost layer loops to assign values to each unit in the suffix table.
{
Char * P1 = ptrn + Plen-2, * P2, * P3;

// The distance to be moved when the do... while loop is completed based on the characters referred to by the current pptr
Do {
While (P1> = ptrn & * P1 --! = C); // This empty loop is used to find the position pointed to by the character matching the last character C.

P2 = ptrn + Plen-2;
P3 = p1;

While (P3> = ptrn & * P3 -- = * P2 -- & P2> = pptr); // This empty loop determines the position of the character matching within the boundary

} While (P3> = ptrn & P2> = pptr );

* Sptr = Shift + Plen-sptr + P2-P3; // The position to be moved when the suffix table is saved and the pptr character is the boundary
/*
PS: Here I want to declare: * sptr = (SHIFT + Plen-sptr) + P2-P3;
As you can see, if you only need to calculate the distance of string movement, the part in the brackets is not needed.
Because when the string is matched from left to right, the indicator is always shifted to left. Here * The content saved by sptr is actually the indicator to be moved
Distance, rather than the distance of string movement. I think snort does this because of performance considerations.
*/

Pptr --; // The boundary moves forward
}

Return shift;
}

/*
Function: int * bmsearch (char *, Int, char *, Int, int *, int *)
Objective: To determine whether the text string t contains the pattern string P
Parameters:
Buf => text string t
Blen => text string T Length
Ptrn => mode string P
Plen => pattern string P Length
Skip => bad orders table
Shift => suffix table
Return Value:
INT-1 indicates success (text string contains mode string), 0 indicates failure (text string does not contain mode string ).
*/
Bool bmsearch (char * ptrn, char * BUF)
{
Int Plen = strlen (ptrn );
Int B _idx = Plen;
Int blen = strlen (BUF );
 

If (Plen = 0)
Return false;
 
Int * Skip = makeskip (ptrn, Plen );
Int * shift = makeshift (ptrn, Plen );

While (B _idx <= blen) // calculates whether the string matches to the end
{
Int p_idx = Plen, skip_stride, shift_stride;
While (BUF [-- B _idx] = ptrn [-- p_idx]) // start matching
{
If (B _idx <0)
Return false;
If (p_idx = 0)
{
Return true;
}
}
Skip_stride = skip [(unsigned char) BUF [B _idx]; // calculate the Skip distance based on bad character rules
Shift_stride = shift [p_idx]; // calculate the Hop Distance Based on the suffix rules.
B _idx + = (skip_stride> shift_stride )? Skip_stride: shift_stride; // whichever is greater
}
Return false;
}
Int main ()
{
Char * PTR = "465789 ";
Char * Buf = "789456465789 ";

If (bmsearch (PTR, Buf)
{< br> cout <"find OK" }< br>
}

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.