Java Chinese word segmentation tool (3)
Import java. io. EOFException; import java. io. file; import java. io. fileNotFoundException; import java. io. IOException; import java. io. randomAccessFile; import java. util. arrayList;/** File Format: text that has been segmented. Words are separated by spaces, line breaks, and other blank characters. * End at the end of the file * It is suitable for reading a large line of text, because the buffer here is not a row, but several words (less than a row ). * Code implementation method: each time you read several words as a sentence, you can read them in bytes one by one. The beginning and end of a word are distinguished by blank characters. **/Public class WordReader {RandomAccessFile raf = null; ArrayList
Sentence = null; int senSize = 1000; int senPos = 0; public WordReader (String fileName) throws IOException {File file File = new file (fileName); raf = new RandomAccessFile (File, r); sentence = new ArrayList
();} Public String [] getNextWords (int count) throws IOException {if (senPos + count> = sentence. size () // At the end of the paragraph, read the new paragraph {if (readSentence () return getNextWords (count); else return null ;} string [] words = new String [count]; for (int I = 0; I