#-*-Coding:utf-8-*-"" "Created on Mon 23:40:13 2017@author:mdz" "" Import NumPy as Npdef loaddata (): vocablist =[[' my ', ' dog ', ' have ', ' flea ', ' problems ', ' help ', ' I ', ' [' Maybe ', ' not ', ' take ', ' him ', ' to ', ' dog ', ' Park ', ' stupid '], [' my ', ' dalmation ', ' is ', ' so ', ' cute ', ' I ', ' love ', ' him '], [' Stop ', ' Posting ', ' stupid ', ' worthless ', ' garbage '], [' Mr ', ' licks ', ' ate ', ' my ', ' steak ', ' How ', ' to ', ' Stop ', ' Him '], [' Quit ', ' buying ', ' worthless ', ' dog ', ' food ', ' stupid ']] classlist=[0,1,0,1,0,1] #1 insulting text, 0 normal speech Return vocablist,classlist# the vocablist already split the sentence to filter, filter out the duplicate words, and finally return list# the list length is the number of properties Def filtervocablist ( Vocablist): Vocabset=set ([]) for document in Vocablist:vocabset=vocabset|set (document) return list (vocabs ET) #对测试样本进行0-1 processing def zero_one (vocablist,input): Returnvec=[0]*len (vocablist) for word in Input:if word in Vocablist:returnvec[vocablIst.index (word)]=1 else:print "The word:%s is not in my vocabulary!" %word return returnvecdef TRAINNBC (trainsamples,traincategory): Numtrainsamp=len (Trainsamples) NumWords=len (train Samples[0]) pabusive=sum (traincategory)/float (numtrainsamp) #y =1 or 0 feature Count P0num=np.ones (numwords) P1NUM=NP.O NES (numwords) #y =1 or 0 category count P0numtotal=numwords p1numtotal=numwords for I in Range (Numtrainsamp): if Traincategory[i]==1:p0num+=trainsamples[i] P0numtotal+=sum (Trainsamples[i]) Else: P1num+=trainsamples[i] P1numtotal +=sum (trainsamples[i]) P1vec=np.log (p1num/p1numtotal) P0Vec=np.log (p0num/p0numtotal) return p1vec,p0vec,pabusivedef CLASSIFYOFNBC (testsamples,p1vec,p0vec,pabusive): P1=sum ( Testsamples*p1vec) +np.log (pabusive) p0=sum (Testsamples*p0vec) +np.log (1-pabusive) if P1>p0:return 1 els E:return 0def TESTINGNBC (): Vocablist,classlist=loaddata () Vocabset=filtervocablist (vocablist) trainlist=[] for term in vocabList:trainList.append (Zero_one (vocabset,t ERM)) P1VEC,P0VEC,PABUSIVE=TRAINNBC (Np.array (trainlist), Np.array (classlist)) testentry=[' love ', ' my ', ' daughter '] t Estsamples=np.array (Zero_one (vocabset,testentry)) print testentry, ' classified as: ', CLASSIFYOFNBC (TestSamples, p0vec,p1vec,pabusive) testentry=[' stupid ', ' garbage '] Testsamples=np.array (Zero_one (vocabset,testentry)) Print tes Tentry, ' classified as: ', CLASSIFYOFNBC (testsamples,p0vec,p1vec,pabusive)
NBC naive Bayesian classifier ———— machine learning actual combat python code