Python implementation of naive Bayesian algorithm

Source: Internet
Author: User

Note: 1, the comments in the code should not be placed in the source program to run, will error.

2, the data set in the code is derived from Http://archive.ics.uci.edu/ml/datasets/Car+Evaluation

3, for the naïve Bayesian principle, you can view my previous blog

# author :wenxiang cui# date :2015/9/11# function: a classifier  which using naive bayesian algorithm import mathclass bayesian:def __ Init__ (self):self.datas = [] #  training sample Set datasourceself.attrilist = [] #  Attribute collection self.desclass = 0 #  The position of the classification target attribute in Attrilist Def loaddatas (self,filename,decollator): # input:  #fileName  - DataSource  filename #decollator - datasource  The delimiter between each field in the There may be spaces or ', ' #function  :  #从磁盘中读取数据并转化为较好处理的列表items  = []fp = open (filename, ' r ') lines  = fp.readlines () For line in lines:line = line.strip (' \ n ') Items.append (line ) Fp.close () I = 0b = []for i in range (len (items)): B.append (Items[i].split ( Decollator)) Self.datas = b[:]def getattrilist (self,attributes): #input:  #attributes  -   Training A collection of properties in a dataset that must be associated withThe columns in the DataSource should correspond to the #function:  #获得训练数据集的属性列表self. Attrilist = attributes[:]def getdesclass (self , Loca): #input:  #loca  -  Classification target Properties #function:  #获得分类目标属性在attriList中的位置self in Attrilist. Desclass  = locadef calpriorprob (self): #input:  # #function:  #计算类的先验概率dictFreq  = {}  #  build frequency tables and use dictionaries to represent deslabel = [] samplenum = 0for items  in  self.datas:samplenum += 1if not items[self.desclass] in dictfreq:dictfreq[items[ Self.desclass]] = 1deslabel.append (Items[self.desclass]) else:dictfreq[items[self.desclass]] +=  1dictPriorP = {} #  construct a priori probability table, and use a dictionary to represent For item in deslabel:dictpriorp[item]  = float (Dictfreq[item])  / samplenumself.priorp = dictpriorp[:]self.classlabel  = deslabel[:]def calprob (Self,type,loca): #input:  #type  -  Define whether the attribute is continuous or discrete #    loca -  the property in the property set #output: #dictPara  -  sample mean and variance for continuous attributes (list representation) #dictProb  -  class conditional probabilities for discrete attributes # function:  #计算某个属性的类条件概率密度if  type ==  ' continuous ':  dictdata = [] #   Extracts the category of the sample and the current attribute value dictpara = [] #  records the category of the sample and its corresponding sample mean and variance for item in  Self.classLabel:dictData.append ([]) dictpara.append ([]) for items in self.datas:dataindex =  self.classlabel.index (Items[self.deslabel])  #  Returns the current Sample class Property Dictdata[dataindex].append (float (items[ Loca])  #  record the current attribute value and the class property of the sample # calculates the sample mean and variance of the class attribute (can be processed quickly with the NumPy package) For i in range (Len ( Self.classlabel)): [A,b] = self.calparam (Dictdata[i]) dictpara[i].append (a) dictpara[i].append (b) return  dictParaelif type ==  ' discrete ':  dictfreq = {}dictprob = {}for  item in self.classLabel:#  build frequency tables and use dictionaries to represent dictfreq[item] = {}dictprob[item] =  {}label = []for items&nbsP;in self.datas:if not items[loca] in label:label.append (Items[loca]) dictFreq[items[ self.desclass]][items[loca]] = 1else:dictfreq[items[self.desclass]][items[loca]] +=  1needlaplace = 0for key in dictfreq.keys (): For ch in labels:if ch  not in dictfreq[key]:d ictfreq[key][ch] = 0needlaplace = 1if  needlaplace == 1: #  Laplace smoothing is used to handle cases where the probability of a class condition is 0 dictfreq[key] = self. Laplaceestimator (Dictfreq[key]) needlaplace = 0for item in self.classlabel:for ch  in dictfreq[item]:d ictprob[item][ch] = float (dictfreq[item][ch])  /  self.dictfreq[item]return dictprobelse:print  ' wrong type! ' Def calparam (self,soulist): #input:  #souList  -  list to be calculated #output: Mean value of #meanVal  -  list element # The standard deviation of the  deviation -  list element #function:  #计算某个属性的类条件概率密度meanVal  = sum (SouLIST)  / float (len (soulist)) deviation = 0tempt = 0for val in  soulist:tempt +=  (val - meanval) **2deviation = math.sqrt (float (tempt)/(Len ( Soulist)-1) return meanval,deviationdef laplaceestimator (self,soudict): #input:  #souDict  -   Dictionary to be computed #output: #desDict  -  smoothed dictionary #function:  #拉普拉斯平滑desDict  = soudict.copy () for  key in soudict:desdict[key] = soudict[key] + 1return desdictclass  carbayesian (Bayesian):d ef __init__ (self): bayesian.__init__ (self) self.buying = {} self.maint = {}self.doors = {}self.persons = {}self.lug_boot = {} Self.safety = {}def tranning (self): self. Prob = []self.buying = bayesian.calprob (' discrete ', 0) self.maint =  Bayesian.calprob (' discrete ', 1) self.doors = bayesian.calprob (' discrete ', 2) self.persons =  BayeSian.calprob (' discrete ', 3) self.lug_boot = bayesian.calprob (' discrete ', 4) self.safety =  Bayesian.calprob (' discrete ', 5) self. Prob.append (self.buying) self. Prob.append (Self.maint) self. Prob.append (self.doors) self. Prob.append (self.persons) self. Prob.append (Self.lug_boot) self. Prob.append (self.safety) def classify (self,sample): #input  :# sample -  a sample #function:#   Determine the type of input for this sample Posteriorprob = {}for item in self.classlabel:posteriorprob[item]  = self. Priorp[item]for i in range (sample)-1):p osteriorprob[item] *= self. prob[i][item][sample[i]]maxval = posteriorprob[self.classlabel[0]]i = 0for item  in posteriorprob:i += 1if posteriorprob[item] > maxval:maxval =  posteriorprob[item]location = iprint  "The sample belongs to the category is:", self.classlabel[location]filename =   "D:\MyDocuments-HnH\DataMining\DataSets\Car\Car_Data.txT "Mycar = carbayesian () mycar.loaddatas (filename, ', ') attributes = [' buying ', ' maint ', ' doors ' , ' persons ', ' lug_boot ', ' Safety ']mycar.getattrilist (attributes) Mycar.getdesclass (7-1) mycar.tranning () sample  = [' Vhigh ', ' Vhigh ', ' 2 ', ' 2 ', ' small ', ' low ']


This article is from "Lu Yao" blog, please be sure to keep this source http://cwxfly.blog.51cto.com/6113982/1694356

Python implementation of naive Bayesian algorithm

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.