Note: 1, the comments in the code should not be placed in the source program to run, will error.
2, the data set in the code is derived from Http://archive.ics.uci.edu/ml/datasets/Car+Evaluation
3, for the naïve Bayesian principle, you can view my previous blog
# author :wenxiang cui# date :2015/9/11# function: a classifier which using naive bayesian algorithm import mathclass bayesian:def __ Init__ (self):self.datas = [] # training sample Set datasourceself.attrilist = [] # Attribute collection self.desclass = 0 # The position of the classification target attribute in Attrilist Def loaddatas (self,filename,decollator): # input: #fileName - DataSource filename #decollator - datasource The delimiter between each field in the There may be spaces or ', ' #function : #从磁盘中读取数据并转化为较好处理的列表items = []fp = open (filename, ' r ') lines = fp.readlines () For line in lines:line = line.strip (' \ n ') Items.append (line ) Fp.close () I = 0b = []for i in range (len (items)): B.append (Items[i].split ( Decollator)) Self.datas = b[:]def getattrilist (self,attributes): #input: #attributes - Training A collection of properties in a dataset that must be associated withThe columns in the DataSource should correspond to the #function: #获得训练数据集的属性列表self. Attrilist = attributes[:]def getdesclass (self , Loca): #input: #loca - Classification target Properties #function: #获得分类目标属性在attriList中的位置self in Attrilist. Desclass = locadef calpriorprob (self): #input: # #function: #计算类的先验概率dictFreq = {} # build frequency tables and use dictionaries to represent deslabel = [] samplenum = 0for items in self.datas:samplenum += 1if not items[self.desclass] in dictfreq:dictfreq[items[ Self.desclass]] = 1deslabel.append (Items[self.desclass]) else:dictfreq[items[self.desclass]] += 1dictPriorP = {} # construct a priori probability table, and use a dictionary to represent For item in deslabel:dictpriorp[item] = float (Dictfreq[item]) / samplenumself.priorp = dictpriorp[:]self.classlabel = deslabel[:]def calprob (Self,type,loca): #input: #type - Define whether the attribute is continuous or discrete # loca - the property in the property set #output: #dictPara - sample mean and variance for continuous attributes (list representation) #dictProb - class conditional probabilities for discrete attributes # function: #计算某个属性的类条件概率密度if type == ' continuous ': dictdata = [] # Extracts the category of the sample and the current attribute value dictpara = [] # records the category of the sample and its corresponding sample mean and variance for item in Self.classLabel:dictData.append ([]) dictpara.append ([]) for items in self.datas:dataindex = self.classlabel.index (Items[self.deslabel]) # Returns the current Sample class Property Dictdata[dataindex].append (float (items[ Loca]) # record the current attribute value and the class property of the sample # calculates the sample mean and variance of the class attribute (can be processed quickly with the NumPy package) For i in range (Len ( Self.classlabel)): [A,b] = self.calparam (Dictdata[i]) dictpara[i].append (a) dictpara[i].append (b) return dictParaelif type == ' discrete ': dictfreq = {}dictprob = {}for item in self.classLabel:# build frequency tables and use dictionaries to represent dictfreq[item] = {}dictprob[item] = {}label = []for items&nbsP;in self.datas:if not items[loca] in label:label.append (Items[loca]) dictFreq[items[ self.desclass]][items[loca]] = 1else:dictfreq[items[self.desclass]][items[loca]] += 1needlaplace = 0for key in dictfreq.keys (): For ch in labels:if ch not in dictfreq[key]:d ictfreq[key][ch] = 0needlaplace = 1if needlaplace == 1: # Laplace smoothing is used to handle cases where the probability of a class condition is 0 dictfreq[key] = self. Laplaceestimator (Dictfreq[key]) needlaplace = 0for item in self.classlabel:for ch in dictfreq[item]:d ictprob[item][ch] = float (dictfreq[item][ch]) / self.dictfreq[item]return dictprobelse:print ' wrong type! ' Def calparam (self,soulist): #input: #souList - list to be calculated #output: Mean value of #meanVal - list element # The standard deviation of the deviation - list element #function: #计算某个属性的类条件概率密度meanVal = sum (SouLIST) / float (len (soulist)) deviation = 0tempt = 0for val in soulist:tempt += (val - meanval) **2deviation = math.sqrt (float (tempt)/(Len ( Soulist)-1) return meanval,deviationdef laplaceestimator (self,soudict): #input: #souDict - Dictionary to be computed #output: #desDict - smoothed dictionary #function: #拉普拉斯平滑desDict = soudict.copy () for key in soudict:desdict[key] = soudict[key] + 1return desdictclass carbayesian (Bayesian):d ef __init__ (self): bayesian.__init__ (self) self.buying = {} self.maint = {}self.doors = {}self.persons = {}self.lug_boot = {} Self.safety = {}def tranning (self): self. Prob = []self.buying = bayesian.calprob (' discrete ', 0) self.maint = Bayesian.calprob (' discrete ', 1) self.doors = bayesian.calprob (' discrete ', 2) self.persons = BayeSian.calprob (' discrete ', 3) self.lug_boot = bayesian.calprob (' discrete ', 4) self.safety = Bayesian.calprob (' discrete ', 5) self. Prob.append (self.buying) self. Prob.append (Self.maint) self. Prob.append (self.doors) self. Prob.append (self.persons) self. Prob.append (Self.lug_boot) self. Prob.append (self.safety) def classify (self,sample): #input :# sample - a sample #function:# Determine the type of input for this sample Posteriorprob = {}for item in self.classlabel:posteriorprob[item] = self. Priorp[item]for i in range (sample)-1):p osteriorprob[item] *= self. prob[i][item][sample[i]]maxval = posteriorprob[self.classlabel[0]]i = 0for item in posteriorprob:i += 1if posteriorprob[item] > maxval:maxval = posteriorprob[item]location = iprint "The sample belongs to the category is:", self.classlabel[location]filename = "D:\MyDocuments-HnH\DataMining\DataSets\Car\Car_Data.txT "Mycar = carbayesian () mycar.loaddatas (filename, ', ') attributes = [' buying ', ' maint ', ' doors ' , ' persons ', ' lug_boot ', ' Safety ']mycar.getattrilist (attributes) Mycar.getdesclass (7-1) mycar.tranning () sample = [' Vhigh ', ' Vhigh ', ' 2 ', ' 2 ', ' small ', ' low ']
This article is from "Lu Yao" blog, please be sure to keep this source http://cwxfly.blog.51cto.com/6113982/1694356
Python implementation of naive Bayesian algorithm