1 #-*-coding:utf-8-*-2 " "3 >>> C = Classy ()4 >>> c.train ([' CPU ', ' RAM ', ' ALU ', ' io ', ' bridge ', ' disk '], ' architecture ')5 True6 >>> C.train ([' Monitor ', ' mouse ', ' keyboard ', ' microphone ', ' headphones '], ' input_devices ')7 True8 >>> c.train ([' Desk ', ' chair ', ' cabinet ', ' lamp '], ' office furniture ')9 TrueTen >>> my_office = [' CPU ', ' monitor ', ' mouse ', ' chair '] One >>> c.classify (my_office) A (' input_devices ', -1.0986122886681098) - ... - >>> C = Classy () the >>> c.train ([' CPU ', ' RAM ', ' ALU ', ' io ', ' bridge ', ' disk '], ' architecture ') - True - >>> C.train ([' Monitor ', ' mouse ', ' keyboard ', ' microphone ', ' headphones '], ' input_devices ') - True + >>> c.train ([' Desk ', ' chair ', ' cabinet ', ' lamp '], ' office furniture ') - True + >>> my_office = [' CPU ', ' monitor ', ' mouse ', ' chair '] A >>> c.classify (my_office) at (' input_devices ', -1.0986122886681098) - ... - " " - - fromCollectionsImportCounter - ImportMath in - classclassifiernottrainedexception (Exception): to + def __str__(self): - return "Classifier is not trained." the * classClassy (object): $ Panax Notoginseng def __init__(self): -Self.term_count_store = {} theSelf.data = { + 'Class_term_count': {}, A 'beta_priors': {}, the 'Class_doc_count': {}, + } -Self.total_term_count =0 $Self.total_doc_count =0 $ - deftrain (self, Document_source, class_id): - the " " - Trains the classifier.Wuyi the " " -Count =Counter (Document_source) Wu Try: - self.term_count_store[class_id] About exceptKeyerror: $SELF.TERM_COUNT_STORE[CLASS_ID] = {} - forTerminchCount: - Try: -Self.term_count_store[class_id][term] + =Count[term] A exceptKeyerror: +Self.term_count_store[class_id][term] =Count[term] the Try: -self.data['Class_term_count'][CLASS_ID] + = Document_source.__len__() $ exceptKeyerror: theself.data['Class_term_count'][CLASS_ID] = Document_source.__len__() the Try: theself.data['Class_doc_count'][CLASS_ID] + = 1 the exceptKeyerror: -self.data['Class_doc_count'][CLASS_ID] = 1 inSelf.total_term_count + = Document_source.__len__() theSelf.total_doc_count + = 1 the self.compute_beta_priors () About returnTrue the the defclassify (self, document_input): the if notSelf.total_doc_count:Raiseclassifiernottrainedexception () + -Term_freq_matrix =Counter (document_input) theArg_max_matrix = []Bayi forclass_idinchself.data['Class_doc_count']: thesummation =0 the forTerminchDocument_input: - Try: -Conditional_probability = (Self.term_count_store[class_id][term] + 1) theConditional_probability = conditional_probability/(self.data['Class_term_count'][CLASS_ID] +self.total_doc_count) theSummation + = term_freq_matrix[term] *Math.log (conditional_probability) the exceptKeyerror: the Break -Arg_max = summation + self.data['beta_priors'][class_id] the Arg_max_matrix.insert (0, (class_id, Arg_max)) theArg_max_matrix.sort (key=LambdaX:x[1]) the return(Arg_max_matrix[-1][0], arg_max_matrix[-1][1])94 the defcompute_beta_priors (self): the if notSelf.total_doc_count:Raiseclassifiernottrainedexception () the 98 forclass_idinchself.data['Class_doc_count']: AboutTMP = self.data['Class_doc_count'][CLASS_ID]/Self.total_doc_count -self.data['beta_priors'][CLASS_ID] = Math.log (TMP)
Python implementation of a simple Bayesian classifier