Algorithm-Compiler Principle-lexical analyzer-state machine SM. deterministic finite automatic machine DFA. non-deterministic finite automatic machine NFA

Source: Internet
Author: User
Algorithm-Compiler Principle-lexical analyzer-state machine SM. deterministic finite automatic machine DFA. non-deterministic finite automatic machine NFA-Linux general technology-Linux programming and kernel information. See the following for details. /*************************************** ********
Title: StateMachine-NFA-DFA.c
Author:
Time:
**************************************** ********/

/*************************************** **************************************** **
** $ Id: acsmx. c, v 1.4 2003/03/05 16:42:11 chrisgreen Exp $
**
** Multi-Pattern Search Engine
**
** Aho-Corasick State Machine-uses a Deterministic Finite Automata-DFA
**
** Copyright (C) 2002 Sourcefire, Inc.
** Marc Norton
**
** Reference-Efficient String matching: An Aid to Bibliographic Search
** Alfred V Aho and Margaret J Corasick
** Bell Labratories
** Copyright (C) 1975 Association for Computing Machinery, Inc
**
** Implemented from the 4 algorithms in the paper by Aho & Corasick
** And some implementation ideas from 'practical Algorithms in c'
**
** Notes:
** 1) This version uses about 1024 bytes per pattern character-heavy on the memory.
** 2) This algorithm finds all occurrences of all patterns within
** Body of text.
** 3) Support is supported ded to handle upper and lower case matching.
** 4) Some comopilers optimize the search routine well, others don't, this makes all the difference.
** 5) Aho inspects all bytes of the search text, but only once so it's very efficient,
** If the patterns are all large than the Modified Wu-Manbar method is often faster.
** 6) I don't subscribe to any one method is best for all searching needs,
** The data decides which method is best,
** And we don't know until after the search method has been tested on the specific data sets.
**
** May 2002: Marc Norton 1st Version
** June 2002: Modified interface for SNORT, added case support
** Aug 2002: Cleaned up comments, and removed dead code.
** Nov 2,2002: Fixed queue_init (), added count = 0
**
**************************************** **************************************** ***/




# Include
# Include
# Include




# Define ALPHABET_SIZE 256
# Define ACSM_FAIL_STATE-1


Typedef struct _ acsm_pattern {

Struct _ acsm_pattern * next;
Unsigned char * patrn;
Unsigned char * casepatrn;
Int n;
Int nocase;
Int offset;
Int depth;
Unsigned id;
Int iid;
} ACSM_PATTERN;

Typedef struct {

/* Next state-based on input character */
Int NextState [ALPHABET_SIZE];

/* Failure state-used while building NFA & DFA */
Int FailState;

/* List of patterns that end here, if any */
ACSM_PATTERN * MatchList;

} ACSM_STATETABLE;

/* ---- State machine Struct ----*/
Typedef struct {

Int acsmMaxStates;
Int acsmNumStates;

ACSM_PATTERN * acsmPatterns;
ACSM_STATETABLE * acsmStateTable;

Int bcSize;
Short bcShift [256];

} ACSM_STRUCT;




/* ============================== Function declaration ============ =============== */
ACSM_STRUCT * acsmNew ();
Int acsmAddPattern (ACSM_STRUCT * p, unsigned char * pat, int n,
Int nocase, int offset, int depth, unsigned id, int iid );
Int acsmCompile (ACSM_STRUCT * acsm );
Int acsmSearch (ACSM_STRUCT * acsm, unsigned char * T, int n,
Int (* Match) (unsigned id, int index, void * data ),
Void * data );
Void acsmFree (ACSM_STRUCT * acsm );




# Define MEMASSERT (p, s) if (! P) {fprintf (stderr, "ACSM-No Memory: % s! \ N ", s); exit (0 );}

# Ifdef DEBUG_AC
Static int max_memory = 0;
# Endif





/* = ========================= */
/* ---------------- MALLOC --------------------*/
Static void * AC_MALLOC (int n)
{
Void * p;
P = malloc (n );
# Ifdef DEBUG_AC
If (p)
Max_memory + = n;
# Endif
Return p;
}

/* ------------ FREE --------------*/
Static void AC_FREE (void * p)
{
If (p) free (p );
}

/* ------ Simple queue node ---------*/
Typedef struct _ qnode
{
Int state;
Struct _ qnode * next;
}
QNODE;

/* ------------ Simple QUEUE Structure -------------*/
Typedef struct _ queue
{
QNODE * head, * tail;
Int count;
}
QUEUE;

/* ------------------ Queue init ------------------------*/
Static void queue_init (QUEUE * s)
{
S-> head = s-> tail = 0;
S-> count = 0;
}


/* --------------------- Add Tail Item to queue ----------------------*/
Static void queue_add (QUEUE * s, int state)
{
QNODE * q;
If (! S-> head ){
Q = s-> tail = s-> head = (QNODE *) AC_MALLOC (sizeof (QNODE ));
MEMASSERT (q, "queue_add ");
Q-> state = state;
Q-> next = 0;
}
Else {
Q = (QNODE *) AC_MALLOC (sizeof (QNODE ));
MEMASSERT (q, "queue_add ");
Q-> state = state;
Q-> next = 0;
S-> tail-> next = q;
S-> tail = q;
}
S-> count ++;
}


/* ----------- Remove Head Item from queue -------------*/
Static int queue_remove (QUEUE * s)
{
Int state = 0;
QNODE * q;

If (s-> head ){
Q = s-> head;
State = q-> state;
S-> head = s-> head-> next;
S-> count --;
If (! S-> head ){
S-> tail = 0;
S-> count = 0;
}
AC_FREE (q );
}

Return state;
}


/* ------------- Queue_count -----------------*/
Static int queue_count (QUEUE * s)
{
Return s-> count;
}


/* ---------------------- Queue_free --------------------------*/
Static void queue_free (QUEUE * s)
{
While (queue_count (s )){
Queue_remove (s );
}
}


/* ---- Case Translation Table ----*/
Static unsigned char xlatcase [256];

/* --------- Init_xlatcase ----------*/
Static void init_xlatcase ()
{
Int I;
For (I = 0; I <256; I ++ ){
Xlatcase = Toupper (I );
}
}


/*-------------------------------------------*/
Static inline void ConvertCase (unsigned char * s, int m)
{
Int I;
For (I = 0; I <m; I ++ ){
S= Xlatcase [s];
}
}


/*---------------------------------------*/
Static inline void ConvertCaseEx (unsigned char * d, unsigned char * s, int m)
{
Int I;
For (I = 0; I <m; I ++ ){
D= Xlatcase [s];
}
}


/*------------------------------------------------*/
Static ACSM_PATTERN * CopyMatchListEntry (ACSM_PATTERN * px)
{
ACSM_PATTERN * p;
P = (ACSM_PATTERN *) AC_MALLOC (sizeof (ACSM_PATTERN ));
MEMASSERT (p, "CopyMatchListEntry ");
Memcpy (p, px, sizeof (ACSM_PATTERN ));
P-> next = 0;
Return p;
}


/*------------------------------------------------------------------------
* Add a pattern to the list of patterns terminated at this state.
* Insert at front of list.
-------------------------------------------------------------------------*/
Static void AddMatchListEntry (ACSM_STRUCT * acsm, int state, ACSM_PATTERN * px)
{
ACSM_PATTERN * p;
P = (ACSM_PATTERN *) AC_MALLOC (sizeof (ACSM_PATTERN ));
MEMASSERT (p, "AddMatchListEntry ");
Memcpy (p, px, sizeof (ACSM_PATTERN ));
P-> next = acsm-> acsmStateTable [state]. MatchList;
Acsm-> acsmStateTable [state]. MatchList = p;
}


/* Configure /*-------------------------------------------------------------------------------------------------------------------
Add Pattern States
Optional ---------------------------------------------------------------------------------------------------------------------*/
Static void AddPatternStates (ACSM_STRUCT * acsm, ACSM_PATTERN * p)
{
Unsigned char * pattern;
Int state = 0, next, n;
N = p-> n;
Pattern = p-> patrn;

/* Match up pattern with existing states */
For (; n> 0; pattern ++, n --){
Next = acsm-> acsmStateTable [state]. NextState [* pattern];
If (next = ACSM_FAIL_STATE) break;
State = next;
}

/* Add new states for the rest of the pattern bytes, 1 state per byte */
For (; n> 0; pattern ++, n --){
Acsm-> acsmNumStates ++;
Acsm-> acsmStateTable [state]. NextState [* pattern] = acsm-> acsmNumStates;
State = acsm-> acsmNumStates;
}

AddMatchListEntry (acsm, state, p );
}


/* ----------- Build Non-Deterministic Finite Automata --------*/
Static void Build_NFA (ACSM_STRUCT * acsm)
{
Int r, s;
Int I;
QUEUE q, * queue = & q;
ACSM_PATTERN * mlist = 0;
ACSM_PATTERN * px = 0;

/* Init a Queue */
Queue_init (queue );

/* Add the state 0 transitions 1st */
For (I = 0; I <ALPHABET_SIZE; I ++ ){
S = acsm-> acsmStateTable [0]. NextState;
If (s ){
Queue_add (queue, s );
Acsm-> acsmStateTable [s]. FailState = 0;
}
}

/* Build the fail state transitions for each valid state */
While (queue_count (queue)> 0 ){
R = queue_remove (queue );

/* Find Final States for any Failure */
For (I = 0; I <ALPHABET_SIZE; I ++ ){
Int fs, next;
If (s = acsm-> acsmStateTable [r]. NextState)! = ACSM_FAIL_STATE ){
Queue_add (queue, s );
Fs = acsm-> acsmStateTable [r]. FailState;

/* Locate the next valid state for 'I 'starting at s */
While (next = acsm-> acsmStateTable [fs]. NextState) =
ACSM_FAIL_STATE ){
Fs = acsm-> acsmStateTable [fs]. FailState;
}

/* Update's 'State failure state to point to the next valid state */
Acsm-> acsmStateTable [s]. FailState = next;

/*
* Copy 'Next' states MatchList to 'S' states MatchList,
* We copy them so each list can be AC_FREE 'd later,
* Else we coshould just manipulate pointers to fake the copy.
*/
For (mlist = acsm-> acsmStateTable [next]. MatchList;
Mlist! = NULL; mlist = mlist-> next ){
Px = CopyMatchListEntry (mlist );

If (! Px ){
Printf ("*** Out of memory Initializing Aho Corasick in acsmx. c ****");
}

/* Insert at front of MatchList */
Px-> next = acsm-> acsmStateTable [s]. MatchList;
Acsm-> acsmStateTable [s]. MatchList = px;
}
}
}
}

/* Clean up the queue */
Queue_free (queue );
}


/* ------------ Build Deterministic Finite Automata from NFA ---------------*/
Static void Convert_NFA_To_DFA (ACSM_STRUCT * acsm)
{
Int r, s;
Int I;
QUEUE q, * queue = & q;

/* Init a Queue */
Queue_init (queue );

/* Add the state 0 transitions 1st */
For (I = 0; I <ALPHABET_SIZE; I ++ ){
S = acsm-> acsmStateTable [0]. NextState;
If (s ){
Queue_add (queue, s );
}
}

/* Start building the next layer of transitions */
While (queue_count (queue)> 0 ){
R = queue_remove (queue );

/* State is a branch state */
For (I = 0; I <ALPHABET_SIZE; I ++ ){
If (s = acsm-> acsmStateTable [r]. NextState)! = ACSM_FAIL_STATE ){
Queue_add (queue, s );
}
Else {
Acsm-> acsmStateTable [r]. NextState=
Acsm-> acsmStateTable [acsm-> acsmStateTable [r]. FailState].
NextState;
}
}
}

/* Clean up the queue */
Queue_free (queue );
}


/*-----------------------------------------*/
ACSM_STRUCT * acsmNew ()
{
ACSM_STRUCT * p;
Init_xlatcase ();
P = (ACSM_STRUCT *) AC_MALLOC (sizeof (ACSM_STRUCT ));
MEMASSERT (p, "acsmNew ");
If (p)
Memset (p, 0, sizeof (ACSM_STRUCT ));
Return p;
}


/* --------------------------- Add a pattern to the list of patterns for this state machine -------------------------*/
Int acsmAddPattern (ACSM_STRUCT * p, unsigned char * pat, int n, int nocase,
Int offset, int depth, unsigned id, int iid)
{
ACSM_PATTERN * plist;
Plist = (ACSM_PATTERN *) AC_MALLOC (sizeof (ACSM_PATTERN ));
MEMASSERT (plist, "acsmAddPattern ");
Plist-> patrn = (unsigned char *) AC_MALLOC (n );
ConvertCaseEx (plist-> patrn, pat, n );
Plist-> casepatrn = (unsigned char *) AC_MALLOC (n );
Memcpy (plist-> casepatrn, pat, n );
Plist-> n = n;
Plist-> nocase = nocase;
Plist-> offset = offset;
Plist-> depth = depth;
Plist-> id = id;
Plist-> iid = iid;
Plist-> next = p-> acsmPatterns;
P-> acsmPatterns = plist;
Return 0;
}


/* ------------- Compile State Machine --------------*/
Int acsmCompile (ACSM_STRUCT * acsm)
{
Int I, k;
ACSM_PATTERN * plist;

/* Count number of states */
Acsm-> acsmMaxStates = 1;
For (plist = acsm-> acsmPatterns; plist! = NULL; plist = plist-> next ){
Acsm-> acsmMaxStates + = plist-> n;
}
Acsm-> acsmStateTable = (ACSM_STATETABLE *) AC_MALLOC (sizeof (ACSM_STATETABLE )*
Acsm-> acsmMaxStates );
MEMASSERT (acsm-> acsmStateTable, "acsmCompile ");
Memset (acsm-> acsmStateTable, 0, sizeof (ACSM_STATETABLE) * acsm-> acsmMaxStates );

/* Initialize state zero as a branch */
Acsm-> acsmNumStates = 0;

/* Initialize all States NextStates to FAILED */
For (k = 0; k <acsm-> acsmMaxStates; k ++ ){
For (I = 0; I <ALPHABET_SIZE; I ++ ){
Acsm-> acsmStateTable [k]. NextState= ACSM_FAIL_STATE;
}
}

/* Add each Pattern to the State Table */
For (plist = acsm-> acsmPatterns; plist! = NULL; plist = plist-> next ){
AddPatternStates (acsm, plist );
}

/* Set all failed state transitions to return to the 0 'th state */
For (I = 0; I <ALPHABET_SIZE; I ++ ){
If (acsm-> acsmStateTable [0]. NextState= ACSM_FAIL_STATE ){
Acsm-> acsmStateTable [0]. NextState= 0;
}
}

/* Build the NFA */
Build_NFA (acsm );

/* Convert the NFA to a DFA */
Convert_NFA_To_DFA (acsm );

/* Printf ("ACSMX-Max Memory: % d bytes, % d states \ n", max_memory,
Acsm-> acsmMaxStates );
*/
Return 0;
}


Static unsigned char Tc [64*1024];

/* Search Text or Binary Data for Pattern matches */
Int acsmSearch (ACSM_STRUCT * acsm, unsigned char * Tx, int n,
Int (* Match) (unsigned id, int index, void * data), void * data)
{
Int state;
ACSM_PATTERN * mlist;
Unsigned char * Tend;
ACSM_STATETABLE * StateTable = acsm-> acsmStateTable;
Int nfound = 0;
Unsigned char * T;
Int index;

/* Case conversion */
ConvertCaseEx (Tc, Tx, n );
T = Tc;
Tend = T + n;

For (state = 0; T <Tend; T ++ ){
State = StateTable [state]. NextState [* T];

If (StateTable [state]. MatchList! = NULL ){
For (mlist = StateTable [state]. MatchList; mlist! = NULL;
Mlist = mlist-> next ){
Index = T-mlist-> n + 1-Tc;
If (mlist-> nocase ){
Nfound ++;
If (Match (mlist-> id, index, data) return nfound;
}
Else {
If (memcmp (mlist-> casepatrn, Tx + index, mlist-> n) = 0 ){
Nfound ++;
If (Match (mlist-> id, index, data ))
Return nfound;
}
}
}
}
}
Return nfound;
}


/* --------------- Free all memory ----------------*/
Void acsmFree (ACSM_STRUCT * acsm)
{
Int I;
ACSM_PATTERN * mlist, * ilist;
For (I = 0; I <acsm-> acsmMaxStates; I ++ ){
If (acsm-> acsmStateTable. MatchList! = NULL ){
Mlist = acsm-> acsmStateTable. MatchList;
While (mlist ){
Ilist = mlist;
Mlist = mlist-> next;
AC_FREE (ilist );
}
}
}
AC_FREE (acsm-> acsmStateTable );
}


// # Ifdef ACSMX_MAIN

/* Text Data Buffer */
Unsigned char text [512];

/* ----------------------------------- A Match is found -----------------------------------*/
Int MatchFound (unsigned id, int index, void * data)
{
Fprintf (stdout, "% s \ n", (char *) id );
Return 0;
}


/* = =================== */
Int main (int argc, char * argv [])
{
Int I, nocase = 0;
ACSM_STRUCT * acsm;
If (argc <3 ){
Fprintf (stderr, "Usage: acsmx pattern word-1 word-2... word-n-nocase \ n ");
Exit (0 );
}
Acsm = acsmNew ();
Strcpy (text, argv [1]);
For (I = 1; I <argc; I ++)
If (strcmp (argv, "-Nocase") = 0)
Nocase = 1;
For (I = 2; I <argc; I ++ ){
If (argv[0] = '-')
Continue;
AcsmAddPattern (acsm, argv, Strlen (argv), Nocase, 0, 0,
(Unsigned) argv, I-2 );
}
AcsmCompile (acsm );
AcsmSearch (acsm, text, strlen (text), MatchFound, (void *) 0 );
AcsmFree (acsm );
Printf ("normal pgm end \ n ");
Return (0 );
}
// # Endif /**/
Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.