Training Model
Reading corpus and dictionaries for training
Scanner Getcorpus = new Scanner (new Bufferedinputstream (New File (FileInputStream)), "Corpus"); Corpus Scanner getdict = new Scanner (new Bufferedinputstream (New File (FileInputStream)), "dict"); Dictionary//pair to that text belongs to negative tendency <-1, "text" > List<pair<integer, string>> lines = new linkedlist<pair<
Integer, string>> ();
Map<string, integer> features = new linkedhashmap<string, integer> ();
Corpus processing while (Getcorpus.hasnextline ()) {String line = Getcorpus.nextline ();
String[] _ = Line.trim (). Split ("\ t"); Lines.add (New Pair<integer, string> (_[0].equals ("negative")? -1:1, _[1])); Here you can do multiple classifications as needed}//dictionary processing for (int i=0; getdict.hasnextline ();)
{String line = Getdict.nextline ();
Features.put (Line.trim (), ++i);
} int[] cnt = new int[features.size ()];
Arrays.fill (CNT, 0);
For (Pair<integer, string> pr:lines) {int polarity = Pr.getfirst ();
String phrase = Pr.getsecond (); Double sum = 0;
List<pair<integer, integer>> featurepos = stringutil.featureextract (phrase, features, 10, 1);
set<integer> num = new treeset<integer> (); For (Pair<integer, integer> pos:featurepos) {String feature = phrase.substring (Pos.getfirst (), Pos.
Getsecond ());
int id = features.get (feature);
Sum + = Cnt[id];
cnt[id]++;
Num.add (ID);
}//File output if (!featurepos.isempty ()) {putproblem.printf ("%d", polarity);
for (Integer id:num) {putproblem.printf ("%d:%.6f", ID, cnt[id]/math.sqrt (sum));
cnt[id]=0;
} putproblem.println ();
}}//Use LR for training Parameter param = new Parameter (SOLVERTYPE.L2R_LR, 10, 0.002); Linear.train (Train.readproblem (new File (problem),-1), param). Save (new FiLe (model));
End of training Get model file
load Feature words
map<string,integer> Featuremap = new hashmap<string, integer> ();
InputStream InputStream = Conf.getconfresourceasinputstream ("path://");
BufferedReader br = new BufferedReader (new InputStreamReader (InputStream));
Load Feature
String line;
int index = 0;
while (line = Br.readline ())! = null) {Line
= Line.tolowercase (). Trim ();
if (!featuremap.containskey line) {
featuremap.put (line, ++index);
}
}
Br.close ();
Inputstream.close ();
get feature vectors
List<string> KWS;
Private int[] cnt = NULL;
Arrays.fill (CNT, 0);
Arraylist<feature> features = new arraylist<feature> ();
The algorithm obtains the keyword (forward matching, inverse matching algorithm)
KWS = Stralg.content (Content);
int sum = 0;
set<integer> num = new treeset<integer> ();
for (String Kw:kws)
{
if (featuremap.containskey (kw)) {
int id = featuremap.get (kw);
Sum + = Cnt[id];
cnt[id]++;
Num.add (ID);
}
}
for (Integer id:num) {
features.add (new Featurenode (ID, cnt[id]/math.sqrt (sum)));
cnt[id]=0;
}
Predictive classification based on trained models