#Coding=utf-8 fromMathImportsqrt fromLoadmovielensImportLoadmovielenstrain fromLoadmovielensImportloadmovielenstest## # Calculation of Pearson correlationdefSim_pearson (prefer, Person1, Person2): Sim= {} #find items that have been evaluated by both parties forIteminchPrefer[person1]:ifIteminchPrefer[person2]: Sim[item]= 1#Add the same item to the dictionary sim #Number of elementsn =Len (SIM)ifLen (SIM) = =0:return-1#The sum of all preferencessum1 = SUM ([Prefer[person1][item] forIteminchSIM]) Sum2= SUM ([Prefer[person2][item] forIteminchSIM]) #ask for the sum of squaressum1sq = SUM ([Pow (Prefer[person1][item], 2) forIteminchSIM]) SUM2SQ= SUM ([Pow (Prefer[person2][item], 2) forIteminchSIM]) #The sum of ∑xiyiSummulti = SUM ([Prefer[person1][item]*prefer[person2][item] forIteminchSIM]) NUM1= Summulti-(sum1*sum2/N) num2= sqrt ((Sum1sq-pow (sum1,2)/N) * (Sum2sq-pow (sum2,2)/N))ifNum2==0:## # If the denominator is 0, the service will return 0. return0 Result= num1/num2returnresult## # Gets the K most similar users to the item rating (k default)defTopkmatches (prefer, person, itemId, k=20, sim =Sim_pearson): Userset=[] Scores=[] Users= [] #Find all the users who have evaluated the item in prefer, and deposit userset forUserinchprefer:ifItemIdinchPrefer[user]: userset.append (user)#Computational Similarityscores = [(Sim (prefer, person, other), other) forOtherinchUsersetifother!=Person ]#Sort by SimilarityScores.sort () scores.reverse ( )ifLen (scores) <=k:#if it is less than K, select only these to make recommendations. forIteminchScores:users.append (item[1])#extract the UserID for each item returnUsersElse:#if >k, intercept K usersKscore =Scores[0:k] forIteminchKscore:users.append (item[1])#extract the UserID for each item returnUsers#returns the ID of the most similar user of K## # Calculate user's average ratingdefGetaverage (prefer, userId): Count=0 Sum=0 forIteminchPrefer[userid]: Sum= Sum +Prefer[userid][item] Count= Count+1returnsum/Count## # Average weighted strategy, predicting UserID's score on ItemiddefGetrating (Prefer1, UserId, ItemId, knumber=20,similarity=Sim_pearson): Sim= 0.0Averageother=0.0Jiaquanaverage= 0.0simsums= 0.0#get k Nearest neighbor User (excessive user set)Users = Topkmatches (Prefer1, UserId, ItemId, k=knumber, sim =Sim_pearson)#get the average of the UserIDAverageofuser =getaverage (Prefer1, userId)#calculate the weighting for each user, forecast forOtherinchUsers:sim= Similarity (Prefer1, userId, other)#calculate the similarity between other usersAverageother = Getaverage (Prefer1, other)#average score for other users #AccumulateSimsums + = ABS (SIM)#take absolute valueJiaquanaverage + = (prefer1[other][itemid]-averageother) *sim#accumulate, some values are negative #Simsums is 0, that is, the project has not yet been rated by other users, here the processing method: Return the user average score ifsimsums==0:returnAverageofuserElse: return(Averageofuser + jiaquanaverage/simsums)##==================================================================## getalluserrating (): Get forecast scores for all users and store them in Fileresult#### parameter: filetrain,filetest is the training file and the corresponding test file, fileresult as the result file## Similarity is a method of similarity measurement, which by default is Pearson. ##==================================================================defGetalluserrating (filetrain='U1.base', filetest='u1.test', fileresult='Result.txt', similarity=Sim_pearson): Prefer1= Loadmovielenstrain (Filetrain)#Load Training SetPrefer2 = Loadmovielenstest (filetest)#Load Test SetInallnum =0 File= Open (Fileresult,'a') File.write ("%s\n"%("------------------------------------------------------")) forUseridinchPrefer2:#test sets each user forIteminchPrefer2[userid]:#for each item in the test collection with the base dataset, the CF forecast scoreRating = Getrating (prefer1, UserID, item, 20)#predict user ratings based on training set (number of users <=k)File.write ('%s\t%s\t%s\n'%(userid, item, rating)) Inallnum= Inallnum +1file.close ()Print("-------------completed!! -----------", Inallnum)############ Main program ##############if __name__=="__main__": Print("\ n--------------recommended system in operation ...-----------\ n") getalluserrating ('U1.base','u1.test','Result.txt')
Collaborative Filtering Code--getrating.py file