1 PackageIctclas. I3s. Test;2 3 Importjava.io.UnsupportedEncodingException;4 5 ImportIctclas. I3s. AC. ICTCLAS50;6 7 Public classTest_userdic {8 9 /**Ten * @paramargs One * @throwsunsupportedencodingexception A */ - Public Static voidMain (string[] args)throwsunsupportedencodingexception { -ICTCLAS50 Ictclas =NewICTCLAS50 (); the //Initial -String Argu = ".";//current directory - if(Ictclas. Ictclas_init (Argu.getbytes ("UTF-8")) = =false) { -System.err.println ("Initail fail!"); + return; - } +System.out.println ("Initial success!"); A atString input = "The Ictclas of Chinese lexical analysis system has been developed on the basis of many years ' research work in the Institute of Computing Technology of CAs." Tens of thousands of scientists "; - - //Dictionary pre-participle not added - System.out.println (input); - Ictclas. Ictclas_setposmap (Ictclas. Pku_pos_map_first); - bytenativebytes[] = Ictclas. Ictclas_paragraphprocess (Input.getbytes ("UTF-8"), 0, 1); inString result =NewString (nativebytes, 0, Nativebytes.length, "UTF-8"); -System.out.println ("Word breaker for user dictionary not imported is: \ t" +result); to + //Add a user dictionary word breaker - intCount = 0; theString Userdir = "UserDict.txt";//User Dictionary path * byte[] Userdirb =userdir.getbytes (); $Count = Ictclas. Ictclas_importuserdictfile (USERDIRB, 3);Panax NotoginsengSystem.out.println ("\ n the number of imported user words: \ t" +count); -Count = 0; the + //word breaker After importing user dictionary A byte[] nativeBytes1 = Ictclas. Ictclas_paragraphprocess (Input.getbytes ("UTF-8"), 0, 1); theString RESULT1 =NewString (nativeBytes1, 0, Nativebytes1.length, "UTF-8"); +System.out.println ("Word breaker results after importing user dictionaries: \ t" +result1); - $ //exit, Release the sub-phrase resource $ Ictclas. Ictclas_exit (); - } -}
The user dictionary is as follows:
Zhouqu County @ @ZQXC
Overnight @ @LY
Chinese Academy of Sciences @@v
Work @@t
Research @ @nb
Guo Ke @t
Vanke @y
The results are as follows:
Initial success!
Based on the accumulation of many years of research work, the Computational Technology Institute of CAS has developed a ictclas of Chinese lexical analysis system. Tens of thousands of scientists
/w thousand/m million/m scientist/N
Number of imported user words: 7
/w thousand/m million/m scientist/N
No change!
See online has said user dictionary is preferred, (2, the user Dictionary of the word priority seems too high.) I added the word "Vanke" in the user dictionary, and the results of the test statement "tens of thousands of scientists" were also divided into "thousand/vanke/learning/home".
But I have not changed the result of the word?
Chinese Academy of Sciences participle Ictclas import user dictionaries after the word segmentation results?