Call the DLL of the massive Intelligent Word Segmentation research version to obtain the word segmentation result. CS

Source: Internet
Author: User

// Chinese word segmentation is the foundation of the Chinese search engine. It is mainly used in information retrieval, information mining, Chinese and foreign translation, Chinese proofreading, automatic clustering, automatic classification, and many other aspects.

// This is the C # version I modified by referring to the VC example. ^

Using system;
Using system. text;
Using system. collections;
Using system. Collections. Generic;
Using system. runtime. interopservices;
Namespace hlssplit
{
/// <Summary>
/// Summary of hlparse.
/// </Summary>
Public class hlparse
{
Private dictionary <string, float> m_strkeywords;
Private dictionary <string, POS> m_strwords;
Private timespan ts;
Private byte iextracalcflag = 0 ;//
Private dictionary <string, byte> m_strfinger;

Public dictionary <string, float> keywords
{
Get {return m_strkeywords ;}
}
Public dictionary <string, POS> words
{
Get {return m_strwords ;}
}
Public timespan dotime
{
Get {return ts ;}
}
Public byte extracalcflag
{
Set {
Iextracalcflag = value ;//
}
}
Public dictionary <string, byte> finger
{
Get {return m_strfinger ;}
}
Public byte parseall (string text)
{
Iextracalcflag = 0; // additional calculation flag, no additional Calculation
// Obtain the additional computing identifier
Iextracalcflag | = (byte) segoption. Pos ;//
Iextracalcflag | = (byte) segoption. Keyword;
Iextracalcflag | = (byte) segoption. search;
Iextracalcflag | = (byte) segoption. Finger;
Return parse (text );//
}
Public byte parse (string text)
{
Return parseword (text, iextracalcflag, out m_strkeywords, out m_strwords, out m_strfinger, out TS );
}

Public static byte parseword (string text, byte iextracalcflag, out dictionary <string, float> m_strkeywords,
Out dictionary <string, POS> m_strwords,
Out dictionary <string, byte> m_strfinger, out timespan TS)
{
Bool isoutkeyword = (iextracalcflag | (byte) segoption. Keyword) = iextracalcflag );
Bool isoutfinger = (iextracalcflag | (byte) segoption. Finger) = iextracalcflag );
Return parseword (text, iextracalcflag, out m_strkeywords, out m_strwords, out m_strfinger, out ts, isoutkeyword, isoutfinger );
}

Public static byte parseword (string text, out dictionary <string, float> m_strkeywords,
Out dictionary <string, POS> m_strwords,
Out dictionary <string, byte> m_strfinger, out timespan TS)
{
Byte iextracalcflag = 0; // additional calculation flag, no additional Calculation
// Obtain the additional computing identifier
Iextracalcflag | = (byte) segoption. Pos ;//
Iextracalcflag | = (byte) segoption. Keyword;
Iextracalcflag | = (byte) segoption. search;
Iextracalcflag | = (byte) segoption. Finger;

Return parseword (text, iextracalcflag, out m_strkeywords, out m_strwords, out m_strfinger, out ts, true, true );
}

Private Static byte parseword (string text, byte iextracalcflag, out dictionary <string, float> m_strkeywords,
Out dictionary <string, POS> m_strwords,
Out dictionary <string, byte> m_strfinger, out timespan ts, bool isoutkeyword, bool isoutfinger)
{
Byte iserror = 0;
M_strfinger = new dictionary <string, byte> ();
M_strkeywords = new dictionary <string, float> ();//
M_strwords = new dictionary <string, POS> ();//
TS = timespan. Zero ;//

If (! Hl. splitinit ())
{
Iserror = 1;
Return iserror;
}
Intptr hhandle = hl. hlopensplit (); // create a word segmentation handle
If (hhandle = intptr. Zero)
{
Iserror = 2;
Hl. hlfreesp.pdf (); // uninstall the word segmentation dictionary
Return iserror;
}

Datetime bgdt = datetime. now;
Bool bsuccess = hl. splitword (hhandle, text, iextracalcflag );
TS = datetime. Now-bgdt;

If (bsuccess)
{
// Word segmentation successful
Int nresultcnt = hl. hlgetwordcnt (hhandle); // get the number of Word Segmentation
For (INT I = 0; I <nresultcnt; I ++)
{
SEG pword = hl. getwordat (hhandle, I); // obtain a word segmentation result
M_strwords [pword. Word] = pword. Pos;
}
If (isoutkeyword)
{
// Obtain keywords
Int nkeycnt = hl. hlgetfilekeycnt (hhandle); // obtain the number of keywords
For (Int J = 0; j <nkeycnt; j ++)
{
SEG pkey = hl. getfilekeyat (hhandle, J); // obtain the specified keyword
If (pkey. WORD = NULL | pkey. WORD = "")
Continue;
M_strkeywords [pkey. Word] = pkey. weight;
}
}
If (isoutfinger)
{
Byte [] FS = hl. getfingerm (hhandle); // obtain the semantic fingerprint
Foreach (byte F in FS)
{
String stru = string. Format ("{0: x}", F );
M_strfinger [stru] = F;
}
}
}
Else
{
Iserror = 2;
}
Hl. hlclosesplit (hhandle); // close the word segmentation handle
Hl. hlfreesp.pdf (); // uninstall the word segmentation dictionary
Hhandle = intptr. Zero ;//
Return iserror ;//
}
}
}

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.