Today we implement a simple word breaker, just do the demo using the following functions:
1, the participle according to the space, the horizontal bar, the point number to divide;
2, the implementation of HI and hello synonym query function;
3, to achieve hi and hello synonym highlighting;
Myanalyzer Implementation code:
public class Myanalyzer extends Analyzer {
private int analyzertype;
public myanalyzer (int type) {
super ();
Analyzertype = type;
}
@Override
protected tokenstreamcomponents createcomponents (String fieldName, Reader Reader) {
Mytokenizer Tokenizer = new Mytokenizer (fieldName, Reader, analyzertype);
return new tokenstreamcomponents (Tokenizer);
}
}
Mytokenizer Implementation code:
public class Mytokenizer extends Tokenizer {public class wordunit{Wordunit (String Word, int start, int length) {th
Is.word = Word;
This.start = start;
this.length = length;
System.out.println ("\twordunit:" + Word + "|" + Start + "|" + length);
} String Word;
int start;
int length;
} private int analyzertype;
private int endposition;
Private iterator<wordunit> it;
private arraylist<wordunit> words;
Private final Chartermattribute Termatt;
Private final Offsetattribute Offsetatt;
Public Mytokenizer (String FieldName, Reader in, int type) {super ' (in);
it = null;
endposition = 0;
Analyzertype = type;
Offsetatt = AddAttribute (Offsetattribute.class);
Termatt = AddAttribute (Chartermattribute.class);
AddAttribute (Payloadattribute.class);
} @Override public Boolean Incrementtoken () throws IOException {clearattributes ();
char[] Inputbuf = new char[1024];
if (it = = null) {int bufSize = Input.read (INPUTBUF);if (bufSize <= 0) return false;
int beginindex = 0;
int endIndex = 0;
Words = new arraylist<wordunit> (); for (endIndex = 0; EndIndex < bufSize; endindex++) {if (Inputbuf[endindex]! = '-' && inputbuf[endindex]! = ' ' && inputbuf[endindex]! = '. ')
Continue
Addword (Inputbuf, Beginindex, EndIndex);
Beginindex = EndIndex + 1;
} addword (Inputbuf, Beginindex, EndIndex);//add the Last if (Words.isempty ()) return false;
it = Words.iterator ();
} if (it! = null && it.hasnext ()) {Wordunit word = it.next ();
Termatt.append (Word.word);
Termatt.setlength (Word.word.length ());
Endposition = Word.start + word.length;
Offsetatt.setoffset (Word.start, endposition);
return true;
} return false;
} @Override public void Reset () throws IOException {Super.reset ();
it = null;
endposition = 0;
} @Override public final void end () {int finaloffset = Correctoffset (this.endposition);Offsetatt.setoffset (Finaloffset, Finaloffset);
} private void Addword (char[] inputbuf, int begin, int end) {if (end <= begin) return;
String word = new string (Inputbuf, begin, End-begin);
Words.add (New Wordunit (Word, begin, End-begin));
if (Analyzertype = = 0 && word.equals ("HI")) Words.add (New Wordunit ("Hello", begin, 2));
if (Analyzertype = = 0 && word.equals ("Hello")) Words.add (New Wordunit ("HI", begin, 5));
}
}
When indexing the word breaker type: analyzertype=0;
When searching the word breaker type: analyzertype=1;
When highlighting the word breaker type: analyzertype=0;
The effect of searching for Hello is as follows:
Score Doc 0 Hightlight To:look <em>hello</em> on
score Doc 1 hightlight to:i am <em>hi</em> China Chinese
You can see that documents with hi are also searched and highlighted.