Max matchingAlgorithm: An entry-level word splitting algorithm. The above is an algorithm flow chart. There are many types of algorithms on the Internet.
Important Notes:
1. When reading Chinese characters, pay attention to the definition of the reading format of streamreader:
Streamreader sr =
New streamreader (@ wordlibpath,
System. Text. encoding. getencoding ("gb2312 "))
;
2. The algorithm is very simple, but it is faster than writing it clearly.CodeAnd knocked on me for 65 minutes. I'm dizzy.
The result is like this, but it is the simplest. Of course, we need to deepen it and use other better methods. To be continued...
Private string skipblanks (string strinput)
{
Char [] strarray = strinput. tochararray ();
Int wordlen = strinput. length;
Strinput = "";
For (INT I = 0; I <wordlen; I ++)
{
If (strarray [I] = ''| strarray [I] = '\ N'
| Strarray [I] = '\ t ')
Continue;
Else
Strinput + = strarray [I];
}
Return strinput;
}
Private string maxbackfowardpatch (string strinput)
{
Strinput = skipblanks (strinput );
String strword = "";
String stroutput = "";
Int leftidx = strinput. length;
Char [] strarray = strinput. tochararray ();
Int wordlen = leftidx;
List <int> segpos = new list <int> (); // segment point
Int sublen = 0;
/* Maximum backward matching algorithm */
{
While (! Strinput. Equals (""))
{
Bool flag = true;
While (leftidx> 0 & sublen <maxlen)
{
Strword = strinput. substring (-- leftidx, ++ sublen );
If (wordlib. Contains (strword ))
{
Segpos. Add (leftidx );
Segpos. Add (leftidx + sublen );
Flag = false;
Break;
}
}
If (FLAG)
Leftidx = strinput. Length-1;
Strinput = strinput. Remove (leftidx );
Leftidx = strinput. length;
Sublen = 0;
}
}
// Output seek
For (INT I = 0; I <wordlen; I ++ ){
If (segpos. Contains (I) stroutput + = '/';
Stroutput + = strarray [I];
}
Segpos. Clear ();
Return stroutput;
}
The main code is as above, and there is nothing else ..