// Chinese word segmentation is the foundation of the Chinese search engine. It is mainly used in information retrieval, information mining, Chinese and foreign translation, Chinese proofreading, automatic clustering, automatic classification, and many other aspects.
// This is the C # version I modified by referring to the VC example. ^
Using system;
Using system. text;
Using system. collections;
Using system. Collections. Generic;
Using system. runtime. interopservices;
Namespace hlssplit
{
/// <Summary>
/// Summary of hlparse.
/// </Summary>
Public class hlparse
{
Private dictionary <string, float> m_strkeywords;
Private dictionary <string, POS> m_strwords;
Private timespan ts;
Private byte iextracalcflag = 0 ;//
Private dictionary <string, byte> m_strfinger;
Public dictionary <string, float> keywords
{
Get {return m_strkeywords ;}
}
Public dictionary <string, POS> words
{
Get {return m_strwords ;}
}
Public timespan dotime
{
Get {return ts ;}
}
Public byte extracalcflag
{
Set {
Iextracalcflag = value ;//
}
}
Public dictionary <string, byte> finger
{
Get {return m_strfinger ;}
}
Public byte parseall (string text)
{
Iextracalcflag = 0; // additional calculation flag, no additional Calculation
// Obtain the additional computing identifier
Iextracalcflag | = (byte) segoption. Pos ;//
Iextracalcflag | = (byte) segoption. Keyword;
Iextracalcflag | = (byte) segoption. search;
Iextracalcflag | = (byte) segoption. Finger;
Return parse (text );//
}
Public byte parse (string text)
{
Return parseword (text, iextracalcflag, out m_strkeywords, out m_strwords, out m_strfinger, out TS );
}
Public static byte parseword (string text, byte iextracalcflag, out dictionary <string, float> m_strkeywords,
Out dictionary <string, POS> m_strwords,
Out dictionary <string, byte> m_strfinger, out timespan TS)
{
Bool isoutkeyword = (iextracalcflag | (byte) segoption. Keyword) = iextracalcflag );
Bool isoutfinger = (iextracalcflag | (byte) segoption. Finger) = iextracalcflag );
Return parseword (text, iextracalcflag, out m_strkeywords, out m_strwords, out m_strfinger, out ts, isoutkeyword, isoutfinger );
}
Public static byte parseword (string text, out dictionary <string, float> m_strkeywords,
Out dictionary <string, POS> m_strwords,
Out dictionary <string, byte> m_strfinger, out timespan TS)
{
Byte iextracalcflag = 0; // additional calculation flag, no additional Calculation
// Obtain the additional computing identifier
Iextracalcflag | = (byte) segoption. Pos ;//
Iextracalcflag | = (byte) segoption. Keyword;
Iextracalcflag | = (byte) segoption. search;
Iextracalcflag | = (byte) segoption. Finger;
Return parseword (text, iextracalcflag, out m_strkeywords, out m_strwords, out m_strfinger, out ts, true, true );
}
Private Static byte parseword (string text, byte iextracalcflag, out dictionary <string, float> m_strkeywords,
Out dictionary <string, POS> m_strwords,
Out dictionary <string, byte> m_strfinger, out timespan ts, bool isoutkeyword, bool isoutfinger)
{
Byte iserror = 0;
M_strfinger = new dictionary <string, byte> ();
M_strkeywords = new dictionary <string, float> ();//
M_strwords = new dictionary <string, POS> ();//
TS = timespan. Zero ;//
If (! Hl. splitinit ())
{
Iserror = 1;
Return iserror;
}
Intptr hhandle = hl. hlopensplit (); // create a word segmentation handle
If (hhandle = intptr. Zero)
{
Iserror = 2;
Hl. hlfreesp.pdf (); // uninstall the word segmentation dictionary
Return iserror;
}
Datetime bgdt = datetime. now;
Bool bsuccess = hl. splitword (hhandle, text, iextracalcflag );
TS = datetime. Now-bgdt;
If (bsuccess)
{
// Word segmentation successful
Int nresultcnt = hl. hlgetwordcnt (hhandle); // get the number of Word Segmentation
For (INT I = 0; I <nresultcnt; I ++)
{
SEG pword = hl. getwordat (hhandle, I); // obtain a word segmentation result
M_strwords [pword. Word] = pword. Pos;
}
If (isoutkeyword)
{
// Obtain keywords
Int nkeycnt = hl. hlgetfilekeycnt (hhandle); // obtain the number of keywords
For (Int J = 0; j <nkeycnt; j ++)
{
SEG pkey = hl. getfilekeyat (hhandle, J); // obtain the specified keyword
If (pkey. WORD = NULL | pkey. WORD = "")
Continue;
M_strkeywords [pkey. Word] = pkey. weight;
}
}
If (isoutfinger)
{
Byte [] FS = hl. getfingerm (hhandle); // obtain the semantic fingerprint
Foreach (byte F in FS)
{
String stru = string. Format ("{0: x}", F );
M_strfinger [stru] = F;
}
}
}
Else
{
Iserror = 2;
}
Hl. hlclosesplit (hhandle); // close the word segmentation handle
Hl. hlfreesp.pdf (); // uninstall the word segmentation dictionary
Hhandle = intptr. Zero ;//
Return iserror ;//
}
}
}