<strong><span style= "FONT-SIZE:18PX;" >/*** * @author yangxin * @info Use the Doublemetaphone function to optimize Twitter. * The Doublemetaphone function can create the same key for a word that sounds similar * */package unittwelve;import java.io.ioexception;import Org.apache.commons.codec.language.doublemetaphone;import Org.apache.lucene.analysis.analyzer;import Org.apache.lucene.analysis.stopfilter;import Org.apache.lucene.analysis.tokenstream;import Org.apache.lucene.analysis.en.porterstemfilter;import Org.apache.lucene.analysis.standard.StandardAnalyzer; Import Org.apache.lucene.analysis.tokenattributes.termattribute;import Org.apache.lucene.util.version;public Class Twitteranalyzer extends analyzer{private doublemetaphone filter = new Doublemetaphone ();p ublic tokenstream result = New Porterstemfilter (New Stopfilter (True, new Standardtokenizer (version.lucene_current, reader), Standardanalyzer.stop_words_set)); Termattribute Termatt = (termattribute) result.addattribute (Termattribute.class); StringBuilder buf = new StringBuilder (); Try{while (reSult.incrementtoken ()) {string word = new String (termatt.term (), 0, Termatt.termlength ()); Buf.append (Filter.encode ( Filter.encode (Word)). Append (""));}} catch (IOException e) {e.printstacktrace ();} return new Whitespacetokenizer (New StringReader (Buf.tostring ()));} </span></strong>
Lucene Analyzer class for tweet optimization