Python-implemented Naive Bayes classifier example, python Bayesian example
This article describes the Python-implemented Naive Bayes classifier. We will share this with you for your reference. The details are as follows:
As needed during work, I wrote a naive Bayes classifier.
For unused attributes, Laplace smoothing is adopted to avoid the occurrence of zero probability of unused attributes resulting in zero probability of the entire condition.
Basic Principles of Naive Bayes can be easily found on the Internet. I will not describe it here. I will directly attach the code
As needed during work, I wrote a naive Bayes classifier. For unused attributes, Laplace smoothing is adopted to avoid the occurrence of zero probability of unused attributes resulting in zero probability of the entire condition.
Class NBClassify (object): def _ init _ (self, fillNa = 1): self. fillNa = 1 pass def train (self, trainSet): # calculate the probability of each type # Save all types of all tags and their occurrence frequency dictTag ={}for subTuple in trainSet: dictTag [str (subTuple [1])] = 1 if str (subTuple [1]) not in dictTag. keys () else dictTag [str (subTuple [1])] + 1 # Save the probability tagProbablity ={} totalFreq = sum ([value for value in dictTag. values ()]) for key, value in dictTa G. items (): tagProbablity [key] = value/totalFreq # print (tagProbablity) self. tagProbablity = tagProbablity ##################################### ######################################## # Calculate the conditional probability of a feature # Save basic information about a feature attribute {feature 1: {value 1: 5 times, value 2: 1 time}, Feature 2: {value 1: 1 time, value 2: appears 5 times} dictFeaturesBase ={} for subTuple in trainSet: for key, value in subTuple [0]. items (): if key not in dictFeaturesBase. keys (): dictFeaturesBa Se [key] = {value: 1} else: if value not in dictFeaturesBase [key]. keys (): dictFeaturesBase [key] [value] = 1 else: dictFeaturesBase [key] [value] + = 1 # dictFeaturesBase ={# 'occupation ': {'farmer ': 1, 'instructor ': 2, 'Construction workers': 2, 'nurse': 1}, # 'symptom ': {'sneezing': 3, 'headache ': 3 }#} dictFeatures = {}. fromkeys ([key for key in dictTag]) for key in dictFeatures. keys (): dictFeatures [key] = {}. fromkeys ([key for key in dictFeaturesBase]) For key, value in dictFeatures. items (): for subkey in value. keys (): value [subkey] = {}. fromkeys ([x for x in dictFeaturesBase [subkey]. keys ()]) # dictFeatures = {# 'cold ': {'symptom': {'sneezing ': None, 'headache': None}, 'occupation ': {'nurse ': None, 'farmer': None, 'Construction worker ': None, 'instructor': None },# 'concussion ': {'symptom ': {'sneezing ': None, 'headache': None}, 'octo': {'nurse ': None, 'farmer': None, 'Construction workers': None, 'instructor ': None },# 'allergy': {'symptom ': {'sneezing': None, 'headache ': None}, 'occupation ': {'nurse': None, 'farmer ': None, 'Construction worker': None, 'instructor ': none }#}# initialise dictFeatures for subTuple in trainSet: for key, value in subTuple [0]. items (): dictFeatures [subTuple [1] [key] [value] = 1 if dictFeatures [subTuple [1] [key] [value] = None else dictFeatures [subTuple [1] [key] [value] + 1 # print (dictFeatures) # change the items not in the train samples from None to a very small value, indicating that the probability is extremely small rather than zero for tag. featuresDict in dictFeature S. items (): for featureName, fetureValueDict in featuresDict. items (): for featureKey, featureValues in fetureValueDict. items (): if featureValues = None: fetureValueDict [featureKey] = 1 # conditional probability P (feature | tag) for tag and featuresDict in dictFeatures calculated by feature frequency. items (): for featureName, fetureValueDict in featuresDict. items (): totalCount = sum ([x for x in fetureValueDict. values () if x! = None]) for featureKey, featureValues in fetureValueDict. items (): fetureValueDict [featureKey] = featureValues/totalCount if featureValues! = None else None self. featuresProbablity = dictFeatures ##################################### ######################################## # def classify (self, featureDict): resultDict ={}# calculate the conditional probability of each tag for key, value in self. tagProbablity. items (): iNumList = [] for f, v in featureDict. items (): if self. featuresProbablity [key] [f] [v]: iNumList. append (self. featuresProbablity [key] [f] [v]) conditionPr = 1 fo R iNum in iNumList: conditionPr * = iNum resultDict [key] = value * conditionPr # Compare the conditional probability of each tag. resultList = sorted (resultDict. items (), key = lambda x: x [1], reverse = True) return resultList [0] [0] if _ name _ = '_ main _': trainSet = [({"Symptom": "sneezing ", "Occupation": "nurse"}, "cold"), ({"Symptom": "sneezing", "Occupation": "farmer"}, "allergic "), ({"Symptom": "headache", "Occupation": "Construction Workers"}, "concussion"), ({"Symptom": "headache", "Occupation ": "Builder"}, "cold"), ({"Symptom": "sneezing", "Occupation": "instructor"}, "Cold "), ({" Symptom ":" headache "," Occupation ":" instructor "}," concussion "),] monitor = NBClassify () # trainSet is something like that [(featureDict, tag),] monitor. train (trainSet) # sneezing construction workers # How likely is he to catch a cold? Result = monitor. classify ({"Symptom": "sneezing", "Occupation": "Construction Workers"}) print (result)
For more information about Naive Bayes algorithms, see http://www.bkjia.com/article/129903.htm.