Using system;
Using system. Collections. Generic;
Using system. runtime. interopservices;
Using system. text;
Namespace hlssplit
{
/// <Summary>
/// C # API of massive word segmentation research Edition
/// </Summary>
Public class HL
{
/// <Summary> initial trial of the massive Word Segmentation System </Summary>
/// <Param name = "dict"> </param>
/// <Returns> bool </returns>
[Dllimport ("hlssplit. dll", entrypoint = "hlsplitinit", setlasterror = true, charset = charset. Unicode)]
Public static extern bool hlsplitinit (string dict );
/// <Summary>
/// Initial trial of the massive Word Segmentation System
/// </Summary>
/// <Returns> </returns>
Public static bool splitinit ()
{
Return hlsplitinit (null );
}
/// <Summary> unmount the massive Word Segmentation System </Summary>
/// <Paramref name = ""/>
[Dllimport ("hlssplit. dll", entrypoint = "hlfreesp.pdf", setlasterror = true)]
Public static extern void hlfreesplit ();
/// <Summary> open a massive word splitting handle </Summary>
/// <Returns> intptr handle </returns>
[Dllimport ("hlssplit. dll", entrypoint = "hlopensplit", setlasterror = true)]
Public static extern intptr hlopensplit ();
/// <Summary> close a massive word splitting handle </Summary>
/// <Param name = "handle"> </param>
[Dllimport ("hlssplit. dll", entrypoint = "hlclosesplit", setlasterror = true)]
Public static extern void hlclosesplit (intptr handle );
/// <Summary> word segmentation for a string </Summary>
/// <Param name = "handle"> word segmentation handle </param>
/// <Param name = "lptext"> split text </param>
/// <Param name = "iextracalcflag"> word segmentation options </param>
/// <Returns> </returns>
[Dllimport ("hlssplit. dll", entrypoint = "hlsplitword", setlasterror = true)]
Public static extern bool hlsplitword (intptr handle, string lptext, int iextracalcflag );
/// <Summary>
/// Word segmentation for a string
/// </Summary>
/// <Param name = "handle"> </param>
/// <Param name = "text"> </param>
/// <Returns> </returns>
Public static bool splitword (intptr handle, string text)
{
Return hlsplitword (handle, text, 0 );
}
/// <Summary>
/// Word segmentation for a string
/// </Summary>
/// <Param name = "handle"> </param>
/// <Param name = "text"> </param>
/// <Param name = "option"> </param>
/// <Returns> </returns>
Public static bool splitword (intptr handle, string text, segoption option)
{
Return hlsplitword (handle, text, (INT) option );
}
/// <Summary>
/// Word segmentation for a string
/// </Summary>
/// <Param name = "handle"> </param>
/// <Param name = "text"> </param>
/// <Param name = "option"> </param>
/// <Returns> </returns>
Public static bool splitword (intptr handle, string text, int option)
{
Return hlsplitword (handle, text, option );
}
/// <Summary> obtain the number of word segmentation results </Summary>
/// <Param name = "handle"> </param>
/// <Returns> </returns>
[Dllimport ("hlssplit. dll", entrypoint = "hlgetwordcnt", setlasterror = true)]
Public static extern int hlgetwordcnt (intptr handle );
/// <Summary> get the specified word segmentation result </Summary>
/// <Param name = "handle"> </param>
/// <Param name = "nindex"> </param>
/// <Returns> </returns>
[Dllimport ("hlssplit. dll", setlasterror = true)]
Private Static extern intptr hlgetwordat (intptr handle, int index );
/// <Summary>
/// This is the real interface of the user. It encapsulates the part that converts the pointer into a structure.
/// </Summary>
/// <Param name = "handle"> </param>
/// <Param name = "Index"> </param>
/// <Returns> </returns>
Public static seg getwordat (intptr handle, int index)
{
Intptr pword = hlgetwordat (handle, index );
SEG word = (SEG) Marshal. ptrtostructure (intptr) pword, typeof (SEG ));
// In this example, You need to block the high position; otherwise, an error will occur. All part-of-speech values under 0x4000000000
Word. Pos = (POS) (byte) Word. Pos & 0x0fffffff );
Return word;
}
/// <Summary>
/// Load the User-Defined dictionary
/// </Summary>
/// <Param name = "dict"> </param>
/// <Returns> </returns>
[Dllimport ("hlssplit. dll", entrypoint = "hlopenusrdict", setlasterror = true)]
Public static extern bool hlopenusrdict (string dict );
/// <Summary>
/// Uninstall the User-Defined dictionary
/// </Summary>
/// <Returns> </returns>
[Dllimport ("hlssplit. dll", entrypoint = "hlfreeusrdict", setlasterror = true)]
Public static extern bool hlfreeusrdict ();
/// <Summary>
/// Obtain the number of keywords
/// </Summary>
/// <Param name = "handle"> </param>
/// <Returns> </returns>
[Dllimport ("hlssplit. dll", entrypoint = "hlgetfilekeycnt", setlasterror = true)]
Public static extern int hlgetfilekeycnt (intptr handle );
/// <Summary>
/// Obtain the specified keyword
/// </Summary>
/// <Param name = "handle"> </param>
/// <Param name = "Index"> </param>
/// <Returns> </returns>
[Dllimport ("hlssplit. dll", setlasterror = true)]
Private Static extern intptr hlgetfilekeyat (intptr handle, int index );
/// <Summary>
/// Obtain the specified keyword
/// </Summary>
/// <Param name = "handle"> </param>
/// <Param name = "Index"> </param>
/// <Returns> </returns>
Public static seg getfilekeyat (intptr handle, int index)
{
Intptr pword = hlgetfilekeyat (handle, index );
SEG word = (SEG) Marshal. ptrtostructure (pword, typeof (SEG ));
// In this example, You need to block the high position; otherwise, an error will occur. All part-of-speech values under 0x4000000000
Word. Pos = (POS) (uint) Word. Pos & 0x0fffffff );
Return word;
}
/// <Summary>
/// Obtain the semantic fingerprint
/// </Summary>
/// <Param name = "handle"> </param>
/// <Param name = "rpdata"> </param>
/// <Param name = "rdwlen"> </param>
/// <Returns> </returns>
[Dllimport ("hlssplit. dll", setlasterror = true)]
Private Static extern bool hlgetfingerm (intptr handle, out intptr RDATA, out int32 rlen );
/// <Summary>
/// Obtain the semantic fingerprint
/// </Summary>
Public static byte [] getfingerm (intptr handle)
{
Intptr P;
Byte [] DATA = NULL;
Int32 Len = 0;
Bool flag = hlgetfingerm (handle, out P, out Len );
If (flag = false | Len = 0) return NULL;
Data = new byte [Len];
For (INT I = 0; I <Len; I ++)
{
Data [I] = marshal. readbyte (P, I );
}
Return data;
}
}
[Structlayout (layoutkind. Sequential)]
Public struct seg
{
/// <Summary>
/// String
/// </Summary>
// [Financialas (unmanagedtype. BSTR)]
Public String word;
/// <Summary>
/// Part-of-speech mark
/// </Summary>
[Financialas (unmanagedtype. U4)]
Public Pos;
/// <Summary>
/// Keyword weight. If it is not a keyword, the weight is 0.
/// </Summary>
Public float weight;
};
/// <Summary>
/// Constant definition part, Word Segmentation Option
/// </Summary>
Public Enum segoption: byte
{
/// <Summary>
/// Default option, Word Segmentation only
/// </Summary>
Default = 0,
/// <Summary>
/// Additional identifier of the computing keyword
/// </Summary>
Keyword = 0x1,
/// <Summary>
/// Calculate the semantic fingerprint of an article
/// </Summary>
Finger = 0x2,
/// <Summary>
/// Calculate the part-of-speech mark
/// </Summary>
Pos = 0x4,
/// <Summary>
/// Output search-oriented word segmentation results
/// </Summary>
Search = 0x8
};
/// <Summary>
/// Part of speech Definition
/// </Summary>
Public Enum pos: uint
{
D_a = 0x40000000, // adjectives
D_ B = 0x20000000, // differentiate the phoneme
D_c = 0x10000000, // Concatenates the phoneme.
D_d = 0x08000000, // sub-morphology of adverbs
D_e = 0x04000000, // exclamation mark
D_f = 0x02000000, // the location of the acronyms.
D_ I = 0x01000000, // Idiom
D_l = 0x00800000, // Idiom
A_m = 0x00400000, // Number of Phoneme
D_mq = 0x00200000, // quantifiers
D_n = 0x00100000, // noun name and morphology
D_o = 0x00080000, // anthropomorphic word
D_p = 0x00040000, // prefix
A_q = 0x00020000, // 0x80020000, // a number of quantifiers, one, and so on
D_r = 0x00010000, // pronouns
D_S = 0x00008000, // place word
D_t = 0x00004000, // time word
D_u = 0x00002000, // auxiliary Phoneme
D_v = 0x00001000, // verb dynamic morphology
D_w = 0x00000800, // 0x80000800, // punctuation
D_x = 0x00000400, // non-morphological word
D_y = 0x00000200, // modal Phoneme
D_z = 0x00000100, // status word
A_nr = 0x00000080, // 0x80000080, // person name
A_ns = 0x00000040, // Place Name
A_nt = 0x00000020, // Organization Group
A_nx = 0x00000010, // 0x80000010, // Foreign character
A_nz = 0x00000008, // other special names
D_h = 0x00000004, // front component
D_k = 0x00000002 // post composition
}
}