machine Learning in Action > The naïve Bayesian C # implementation

`def trainNB0 (trainmatrix,traincategory):    numtraindocs = Len (trainmatrix)    numwords = Len (trainmatrix[0])    pabusive = SUM (traincategory)/float (numtraindocs)    p0num = ones (numwords); p1num = Ones (numwords)      #change  To ones ()     p0denom = 2.0, P1denom = 2.0                        #change to 2.0 for    I in range (Numtraindocs):        if traincategory[i] = = 1:            P1num + = Trainmatrix[i]            p1denom + = SUM (Trainmatrix[i])        else:            p0num + = Trainmatrix[i]            P0denom + = SUM (Trainmatrix[i])    p1vect = log (p1num/p1denom)          #change to log ()    p0vect = log (p0num/p0denom)          #change to log ()    return p0vect,p1vect,pabusive`

`def classifynb (Vec2classify, P0vec, P1vec, PClass1):    p1 = SUM (vec2classify * P1vec) + log (pClass1)    #element-wise mult    p0 = SUM (vec2classify * P0vec) + log (1.0-PCLASS1)    if p1 > P0:        return 1    else:         return 0    `

Use C # To do a random example, to achieve the classification of the article type

1, create word vector: Medium super/AFC Crown/replace him/FA/Premier League/La Liga/Champions League/Serie A/Bundesliga/basketball/nba/cba/Golf/Ping pong/volleyball/Tennis/Badminton/running/racing/chess/billiards/swimming/Equestrian/Boxing/Athletics/kungfu/Poker/Sports/team/player/training/national team/ League/Club/Venue/comeback/lore/warm-up/team-mates/champions/runner-up/third place/fouls/season/overtime/overtake/halftime/scramble/tactics/lineup/Match/Dobbin/recovery/goal/Lost/Oscar/Entertainment/fans/film/TV/Music/drama/video/actor/director/star/Broker/singer /TV Series/screenings/fans/photo/acting/show/show/Celebrity/supermodel/actress/model/star/sexy/creative/cinema/film/filming/screenwriter/plot/video/plot/starring/Show/box office/Power-up/Drama/performance/program/trailer/moderator/Emmy Award/character/Theatre/fans/fans/ Performance/album/Music/Theatre/art/Ballet/drama/Dance/Military/Army/warplanes/bombs/military/tanks/warships/bombing/drills/combat readiness/troops/Military district/Defense/soldiers/ships/submarines/aircraft/helicopters/fleet/defense/maneuvers/weapons/counterattack/strike/military parade/confrontation/defence/Navy/ Air Force/Army/armed/strategic/air raid/conflict/armor/infantry/combat/Missile/Border/reconnaissance/fighter/radar/bombing/defense/stronghold/Firepower/aircraft carrier/offensive/ammunition/Military camp/Siege/Conquest/Captive/War/ally/Battle/Invasion

2, Sohu download three kinds of articles each 10 composition training samples, calculate the document matrix of each article, label each article category label

Document Matrix:

0000000000000000001000000000000000000011000100010010100000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000
0000000000000000000000000100000000000001000000000000000111100010100000000000000000110000001100000000001000000000000000000 10000000000000000000000000000000000000000000000
0000000000000000000000000000110000000000000000000000010010000010010000000010000000000000000000000100000000000000000000000 00000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000010010000000000000000010000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000
0000000000000000000000000100000000000000000000000000000100100001000000000000000100100000010000000001000000000000000000000 00000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000100000000100000100000101000000001111111111100000001000000000000000000000 00000000000000000000000000000000000000000000000
0000000000000000000000000000000000000001100000000000000110100000010000100000000000011000011100000000000000000000000000000 00000000000000000000000000000000000000000000000
0000000001000000000000000000000000000000000000000000000010100001100000000000000001000000011010000001000000000000000000000 00000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000100000100000000000000010000000011000001000000000000000000000000 00000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000010100001100000000000000000000010110000100001100000000000000000000 00000000000000000000000000000000000000000000000
0000000100000000000000000000111000000010000101100010010000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000100000000000000000000000
0000000000000000000000000000000000000000000000000000000010010001000000000000000000000000100001000001000000000000000000000 00000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111100000 00000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001100000001 11111111111100000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011000000 00000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000 00000000000100000000000000000000000000000000000
0000000000000000000000000000000000000000000000010000000000010000000000000000000000001000000000000000000000000001001000000 10010000000000000000100000000000100000000000010
0000000000000000000000000000001000000000000000000000000000010000000000000000000000000000000000000000000000000001000100000 10000000000000000000100000100000000000000000000
0000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000001 10010000000001001010000000010000000000000000000
0000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000101000 00000000100000000010000000000000001000000000000
0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000 10000000000100000000100010000000000001000000000
0000000100000000000000000001110011000000000100000010010000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000
0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000 00100000000000000100000000110000010000000000000
1100000000000000000000000001000010001000000100000000010000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000001000000000
1100000000000000000000000001110011001001000100011110110000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000001000000000
0000000000010000000000000000010000001010001001100010000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000001000000000
0000000000000100000000000000000000000011010000010010000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000
0000000000100000000000000000100000000000000100100010000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000
0000000000010000000000000001111000001010001101000010001000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000001000000000
0000000000000000000000000001000000000001100101000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000

Category tag vector:

122222222212333333333131111111

`Using system;using system.text;using system.windows.forms;using system.io;namespace naivebayes{public partial class Fo        Rm1:form {private string[] vocabarray;        Private double[] P0num, P1num, P2num;            Public Form1 () {InitializeComponent (); Label2.            Text = "Sports 1, Entertainment 2, military 3\r\n 10 training samples per type \ r \ n Article all from Sohu News \ r \ n Word vector from all kinds of articles to obtain the word";            StreamReader sr = new StreamReader ("VocabList.txt", Encoding.default);            String line, all = ""; while (line = Sr.            ReadLine ()) = null) {all + = line; } Vocabarray = All.        Split (new string[] {"/"}, Stringsplitoptions.removeemptyentries); private void Form1_Resize (object sender, EventArgs e) {this.            Width = 800; This.        Height = 600; } private void Button1_Click (object sender, EventArgs e) {//Generate document matrix and category label vector Directoryin            Fo di = new DirectoryInfo ("Train");Fileinfo[] fi = di.            GetFiles ("*.txt"); string[] Trainmatrix = new String[fi.            Length];            P0num = new Double[vocabarray.length];            P1num = new Double[vocabarray.length];            P2num = new Double[vocabarray.length];            Double p0denom = 2.0;            Double p1denom = 2.0;            Double p2denom = 2.0;            for (int i = 0; i < vocabarray.length; i++) {p0num[i] = p1num[i] = P2num[i] = 1.0;            } String traincategory = "";            int m = 0;                foreach (FileInfo i in FI) {StreamReader sr = new StreamReader (I.fullname, Encoding.default);                String line, all = ""; while (line = Sr.                ReadLine ()) = null) {all + = line;                } String Strvec = ""; foreach (String j in Vocabarray) {if (all. Contains (j)) Strvec + = "1";                else Strvec + = "0";                } Trainmatrix[m] = Strvec;                m++;            Traincategory + = i.name.substring (I.name.lastindexof ("_") + 1, 1);            } StreamWriter SW = new StreamWriter (". \\trainV\\trainMatrix.txt", true); foreach (String i in Trainmatrix) {SW.                WriteLine (i); Sw.            Flush (); } SW.            Close ();            SW = new StreamWriter (". \\trainV\\trainCategory.txt", true); Sw.            WriteLine (traincategory); Sw.            Close ();                for (int i = 0; i < trainmatrix.length; i++) {if (Traincategory.substring (i, 1) = = "1")                    {double tmp = 0; for (int j = 0; J < Vocabarray.length; J + +) {P0num[j] + = double. Parse (Trainmatrix[i].                        Substring (J, 1)); TMP + = Double.Parse (Trainmatrix[i].                    Substring (J, 1));                } p0denom + = tmp;                    } else if (Traincategory.substring (i, 1) = = "2") {double tmp = 0; for (int j = 0; J < Vocabarray.length; J + +) {P1num[j] + = Doubl E.parse (Trainmatrix[i].                        Substring (J, 1)); TMP + = Double. Parse (Trainmatrix[i].                    Substring (J, 1));                } p1denom + = tmp;                    } else if (Traincategory.substring (i, 1) = = "3") {double tmp = 0; for (int j = 0; J < Vocabarray.length; J + +) {P2num[j] + = Doubl E.parse (Trainmatrix[i].                        Substring (J, 1)); TMP + = Double. Parse (Trainmatrix[i].                    Substring (J, 1));                } p2denom + = tmp;       } else         {//undo}}                for (int j = 0; J < Vocabarray.length; J + +) {P0num[j] = Math.Log (P0num[j]/p0denom);                P1NUM[J] = Math.Log (P1num[j]/p1denom);            P2NUM[J] = Math.Log (P2num[j]/p2denom); } label4.        Text = "Processing sample data Complete";            private void Button2_Click (object sender, EventArgs e) {if (TextBox1.Text.Trim ()! = "")                {String Strvec = "";                        foreach (String i in Vocabarray) {if (TextBox1.Text.Contains (i))                    Strvec + = "1";                else Strvec + = "0";                } double p0 = 0;                Double P1 = 0;                Double P2 = 0; for (int j = 0; J < Vocabarray.length; J + +) {p0 + = p0num[j] * Double. Parse (Strvec.substring (J, 1)); P1 + = p1num[j] * Double.                    Parse (Strvec.substring (J, 1)); P2 + = p2num[j] * Double.                Parse (Strvec.substring (J, 1));                } String catelog = "";                if (P0 > P1 && p0 > p2) catelog = "Sport";                else if (P1 > P0 && p1 > p2) catelog = "Entertainment";                else if (P2 > P0 && p2 > p1) catelog = "Military";                else Catelog = "cannot be judged"; Label3. Text = "Sport:" + p0. ToString () + "\ r \ n Entertainment:" + P1. ToString () + "\ r \ n Military:" + P2.                ToString (); Label1.            Text = "The owning type is:" + catelog; }        }    }}`

