Based on the tumor characteristics of malignant tumors or benign tumors, through the study of 699 patients with tumor properties, to find the tumor prediction model, according to the tumor properties to determine the nature of the tumor, for the patients who have not seen the face, according to the attributes to determine whether the malignant tumor.
Data used: Link: http://pan.baidu.com/s/1c26Dbjy Password: gllb
[HTML]View Plain Copy
- ###########################################
- # classifier: Benign or malignant tumors
- ###########################################
- ###########################################
- # read into the dataset and get a list of meta-fathers
- ###########################################
- def readset (FileName):
- trainset = []
- Trainfile = open (FileName)
- For line in Trainfile:
- line = Line.strip () #去掉 ' \ n '
- If '? ' line: #注意: Do not have a space in the middle of the quotation marks, remove the bad data containing the question mark
- Continue
- Id,a1,a2,a3,a4,a5,a6,a7,a8,a9,diag = Line.split (', ') #以逗号分开
- if diag = = ' 4 ':
- Diagmorb = ' m '
- Else
- Diagmorb = ' B '
- Patienttuple = (Id,diagmorb,int (a1), int (A2), int (A3), int (A4), Int (A5), \
- Int (A6), int (A7), int (A8), int (A9))
- Trainset.append (Patienttuple)
- return trainset
- ###########################################
- # Training Classifier
- ###########################################
- def sumlists (LIST1,LIST2):
- Listofsums =[0.0] * 9
- For index in range (9):
- Listofsums[index] = List1[index] + List2[index]
- Return listofsums
- def makeaverages (listofsums,total):
- Averagelist =[0.0] * 9
- For index in range (9):
- Averagelist[index] = Listofsums[index]/float (total)
- Return averagelist
- def Classifier (trainset):
- Benignsums = [0] * 9
- Benigncount = 0
- Malignantsums = [0] * 9
- Malignantcount = 0
- For Patienttup in trainset:
- If patienttup[1] = = ' B ':
- Benignsums = Sumlists (benignsums,patienttup[2:])
- Benigncount + = 1
- Else
- Malignantsums = Sumlists (malignantsums,patienttup[2:])
- Malignantcount + = 1
- Benignavgs = Makeaverages (Benignsums,benigncount)
- Malignantavgs = Makeaverages (Malignantsums,malignantcount)
- Classifier = Makeaverages (sumlists (Benignavgs,malignantavgs), 2)
- return classifier
- ###########################################
- # test Classifier
- ###########################################
- def Test (Testset,classifier):
- results = []
- For patient in Testset:
- Benigncount = 0
- Malignantcount = 0
- For index in range (9):
- If Patient[index + 2] > Classifier[index]: #注意索引值加2才是属性值
- Malignantcount + = 1
- Else
- Benigncount + = 1
- Resulttuple = (patient[0],benigncount,malignantcount,patient[1])
- Results.append (Resulttuple)
- return results
- ###########################################
- # Format Output Test results
- ###########################################
- def showresult (Result):
- TotalCount = 0
- Wrongcount = 0
- For R in Result:
- TotalCount + = 1
- If r[1] > r[2]:
- If r[3] = = ' m ':
- Wrongcount + = 1
- Elif r[3] = = ' B ':
- Wrongcount + = 1
- Print ("%d patients,there were%d wrong"% (Totalcount,wrongcount))
- ###########################################
- # main function
- ###########################################
- def main ():
- Print ("Reading in train data ...")
- Trainfilename = "C:\\python36\\code\\ruxian\\fulltraindata.txt"
- trainset = Readset (trainfilename)
- #print (trainset)
- Print ("Read trainset done!")
- Print ("Begin Training ...")
- Classifier = classifier (trainset)
- Print ("Train Classifier done!")
- Print ("Reading in test data ...")
- Testfilename = "C:\\python36\\code\\ruxian\\fulltestdata.txt"
- Testset = Readset (testfilename)
- Print ("Read testset done!")
- Print ("Begin testing ...")
- Result = Test (testset,classifier)
- #print (Result)
- Print ("Test done!")
- Showresult (Result)
- Print ("program finished.\n")
Reference: "Pthon Introductory Classics study book"
Data mining--python Getting Started classic study on classification of breast cancer