def trainNB0 (trainmatrix,traincategory): numtraindocs = Len (trainmatrix) numwords = Len (trainmatrix[0]) pabusive = SUM (traincategory)/float (numtraindocs) p0num = ones (numwords); p1num = Ones (numwords) #change To ones () p0denom = 2.0, P1denom = 2.0 #change to 2.0 for I in range (Numtraindocs): if traincategory[i] = = 1: P1num + = Trainmatrix[i] p1denom + = SUM (Trainmatrix[i]) else: p0num + = Trainmatrix[i] P0denom + = SUM (Trainmatrix[i]) p1vect = log (p1num/p1denom) #change to log () p0vect = log (p0num/p0denom) #change to log () return p0vect,p1vect,pabusive
def classifynb (Vec2classify, P0vec, P1vec, PClass1): p1 = SUM (vec2classify * P1vec) + log (pClass1) #element-wise mult p0 = SUM (vec2classify * P0vec) + log (1.0-PCLASS1) if p1 > P0: return 1 else: return 0
Use C # To do a random example, to achieve the classification of the article type
1, create word vector: Medium super/AFC Crown/replace him/FA/Premier League/La Liga/Champions League/Serie A/Bundesliga/basketball/nba/cba/Golf/Ping pong/volleyball/Tennis/Badminton/running/racing/chess/billiards/swimming/Equestrian/Boxing/Athletics/kungfu/Poker/Sports/team/player/training/national team/ League/Club/Venue/comeback/lore/warm-up/team-mates/champions/runner-up/third place/fouls/season/overtime/overtake/halftime/scramble/tactics/lineup/Match/Dobbin/recovery/goal/Lost/Oscar/Entertainment/fans/film/TV/Music/drama/video/actor/director/star/Broker/singer /TV Series/screenings/fans/photo/acting/show/show/Celebrity/supermodel/actress/model/star/sexy/creative/cinema/film/filming/screenwriter/plot/video/plot/starring/Show/box office/Power-up/Drama/performance/program/trailer/moderator/Emmy Award/character/Theatre/fans/fans/ Performance/album/Music/Theatre/art/Ballet/drama/Dance/Military/Army/warplanes/bombs/military/tanks/warships/bombing/drills/combat readiness/troops/Military district/Defense/soldiers/ships/submarines/aircraft/helicopters/fleet/defense/maneuvers/weapons/counterattack/strike/military parade/confrontation/defence/Navy/ Air Force/Army/armed/strategic/air raid/conflict/armor/infantry/combat/Missile/Border/reconnaissance/fighter/radar/bombing/defense/stronghold/Firepower/aircraft carrier/offensive/ammunition/Military camp/Siege/Conquest/Captive/War/ally/Battle/Invasion
2, Sohu download three kinds of articles each 10 composition training samples, calculate the document matrix of each article, label each article category label
Document Matrix:
0000000000000000001000000000000000000011000100010010100000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000
0000000000000000000000000100000000000001000000000000000111100010100000000000000000110000001100000000001000000000000000000 10000000000000000000000000000000000000000000000
0000000000000000000000000000110000000000000000000000010010000010010000000010000000000000000000000100000000000000000000000 00000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000010010000000000000000010000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000
0000000000000000000000000100000000000000000000000000000100100001000000000000000100100000010000000001000000000000000000000 00000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000100000000100000100000101000000001111111111100000001000000000000000000000 00000000000000000000000000000000000000000000000
0000000000000000000000000000000000000001100000000000000110100000010000100000000000011000011100000000000000000000000000000 00000000000000000000000000000000000000000000000
0000000001000000000000000000000000000000000000000000000010100001100000000000000001000000011010000001000000000000000000000 00000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000100000100000000000000010000000011000001000000000000000000000000 00000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000010100001100000000000000000000010110000100001100000000000000000000 00000000000000000000000000000000000000000000000
0000000100000000000000000000111000000010000101100010010000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000100000000000000000000000
0000000000000000000000000000000000000000000000000000000010010001000000000000000000000000100001000001000000000000000000000 00000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111100000 00000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001100000001 11111111111100000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011000000 00000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000 00000000000100000000000000000000000000000000000
0000000000000000000000000000000000000000000000010000000000010000000000000000000000001000000000000000000000000001001000000 10010000000000000000100000000000100000000000010
0000000000000000000000000000001000000000000000000000000000010000000000000000000000000000000000000000000000000001000100000 10000000000000000000100000100000000000000000000
0000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000001 10010000000001001010000000010000000000000000000
0000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000101000 00000000100000000010000000000000001000000000000
0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000 10000000000100000000100010000000000001000000000
0000000100000000000000000001110011000000000100000010010000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000 00100000000000000100000000110000010000000000000
1100000000000000000000000001000010001000000100000000010000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000001000000000
1100000000000000000000000001110011001001000100011110110000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000001000000000
0000000000010000000000000000010000001010001001100010000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000001000000000
0000000000000100000000000000000000000011010000010010000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000
0000000000100000000000000000100000000000000100100010000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000
0000000000010000000000000001111000001010001101000010001000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000001000000000
0000000000000000000000000001000000000001100101000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000
Category tag vector:
122222222212333333333131111111
Using system;using system.text;using system.windows.forms;using system.io;namespace naivebayes{public partial class Fo Rm1:form {private string[] vocabarray; Private double[] P0num, P1num, P2num; Public Form1 () {InitializeComponent (); Label2. Text = "Sports 1, Entertainment 2, military 3\r\n 10 training samples per type \ r \ n Article all from Sohu News \ r \ n Word vector from all kinds of articles to obtain the word"; StreamReader sr = new StreamReader ("VocabList.txt", Encoding.default); String line, all = ""; while (line = Sr. ReadLine ()) = null) {all + = line; } Vocabarray = All. Split (new string[] {"/"}, Stringsplitoptions.removeemptyentries); private void Form1_Resize (object sender, EventArgs e) {this. Width = 800; This. Height = 600; } private void Button1_Click (object sender, EventArgs e) {//Generate document matrix and category label vector Directoryin Fo di = new DirectoryInfo ("Train");Fileinfo[] fi = di. GetFiles ("*.txt"); string[] Trainmatrix = new String[fi. Length]; P0num = new Double[vocabarray.length]; P1num = new Double[vocabarray.length]; P2num = new Double[vocabarray.length]; Double p0denom = 2.0; Double p1denom = 2.0; Double p2denom = 2.0; for (int i = 0; i < vocabarray.length; i++) {p0num[i] = p1num[i] = P2num[i] = 1.0; } String traincategory = ""; int m = 0; foreach (FileInfo i in FI) {StreamReader sr = new StreamReader (I.fullname, Encoding.default); String line, all = ""; while (line = Sr. ReadLine ()) = null) {all + = line; } String Strvec = ""; foreach (String j in Vocabarray) {if (all. Contains (j)) Strvec + = "1"; else Strvec + = "0"; } Trainmatrix[m] = Strvec; m++; Traincategory + = i.name.substring (I.name.lastindexof ("_") + 1, 1); } StreamWriter SW = new StreamWriter (". \\trainV\\trainMatrix.txt", true); foreach (String i in Trainmatrix) {SW. WriteLine (i); Sw. Flush (); } SW. Close (); SW = new StreamWriter (". \\trainV\\trainCategory.txt", true); Sw. WriteLine (traincategory); Sw. Close (); for (int i = 0; i < trainmatrix.length; i++) {if (Traincategory.substring (i, 1) = = "1") {double tmp = 0; for (int j = 0; J < Vocabarray.length; J + +) {P0num[j] + = double. Parse (Trainmatrix[i]. Substring (J, 1)); TMP + = Double.Parse (Trainmatrix[i]. Substring (J, 1)); } p0denom + = tmp; } else if (Traincategory.substring (i, 1) = = "2") {double tmp = 0; for (int j = 0; J < Vocabarray.length; J + +) {P1num[j] + = Doubl E.parse (Trainmatrix[i]. Substring (J, 1)); TMP + = Double. Parse (Trainmatrix[i]. Substring (J, 1)); } p1denom + = tmp; } else if (Traincategory.substring (i, 1) = = "3") {double tmp = 0; for (int j = 0; J < Vocabarray.length; J + +) {P2num[j] + = Doubl E.parse (Trainmatrix[i]. Substring (J, 1)); TMP + = Double. Parse (Trainmatrix[i]. Substring (J, 1)); } p2denom + = tmp; } else {//undo}} for (int j = 0; J < Vocabarray.length; J + +) {P0num[j] = Math.Log (P0num[j]/p0denom); P1NUM[J] = Math.Log (P1num[j]/p1denom); P2NUM[J] = Math.Log (P2num[j]/p2denom); } label4. Text = "Processing sample data Complete"; private void Button2_Click (object sender, EventArgs e) {if (TextBox1.Text.Trim ()! = "") {String Strvec = ""; foreach (String i in Vocabarray) {if (TextBox1.Text.Contains (i)) Strvec + = "1"; else Strvec + = "0"; } double p0 = 0; Double P1 = 0; Double P2 = 0; for (int j = 0; J < Vocabarray.length; J + +) {p0 + = p0num[j] * Double. Parse (Strvec.substring (J, 1)); P1 + = p1num[j] * Double. Parse (Strvec.substring (J, 1)); P2 + = p2num[j] * Double. Parse (Strvec.substring (J, 1)); } String catelog = ""; if (P0 > P1 && p0 > p2) catelog = "Sport"; else if (P1 > P0 && p1 > p2) catelog = "Entertainment"; else if (P2 > P0 && p2 > p1) catelog = "Military"; else Catelog = "cannot be judged"; Label3. Text = "Sport:" + p0. ToString () + "\ r \ n Entertainment:" + P1. ToString () + "\ r \ n Military:" + P2. ToString (); Label1. Text = "The owning type is:" + catelog; } } }}
<machine Learning in Action > The naïve Bayesian C # implementation