# coding:utf-8__author__ = ' similarface ' #datalink =http://www2.informatik.uni-freiburg.de/~cziegler/bx/ Bx-csv-dump.zip "bx-users[" User-id ";" Location ";" Age "]bx-books[" ISBN ";" Book-title ";" Book-author ";" Year-of-publication ";" Publisher ";" Image-url-s ";" Image-url-m ";" Image-url-l "]bx-book-ratings[" User-id ";" ISBN ";" Book-rating "]" #专门用作编码转换import codecs, OS, sysfrom math import sqrtusers = {"Angelica": {"Blues Traveler": 3.5, "Brok En bells ": 2.0," Norah Jones ": 4.5," Phoenix ": 5.0," slightly stoopid ": 1.5," the Strokes " : 2.5, "Vampire Weekend": 2.0}, "Bill": {"Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0, "Phoenix": 2.0, " Slightly Stoopid ": 3.5," Vampire Weekend ": 3.0}," Chan ": {" Blues Traveler ": 5.0," Broken Bells ": 1.0," Dead Mau5 ": 1.0," Norah Jones ": 3.0," Phoenix ": 5," Slightly Stoopid ": 1.0}," Dan ": {" Blues Traveler ": 3.0," Bro Ken Bells ": 4.0," Deadmau5 ": 4.5," Phoenix ": 3.0," slightly stoopid ": 4.5, "The Strokes": 4.0, "Vampire Weekend": 2.0}, "Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0, "The Strokes": 4.0, "Vampire Weekend": 1.0}, "Jordyn": {"Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0, "Phoe Nix ": 5.0," slightly stoopid ": 4.5," The Strokes ": 4.0," Vampire Weekend ": 4.0}," Sam ": {" Blues traveler " : 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0, "Phoenix": 5.0, "slightly stoopid": 4.0, "The Strokes": 5.0}, "Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0, "slightly stoopid": 2.5, "The Strokes": 3.0}}cla SS Recommender:def __init__ (self, data, k=1, metric= ' Pearson ', n=5): self.k = k SELF.N = n self.u Sername2id = {} Self.userid2name = {} Self.productid2name = {} Self.metric = metric if self.me Tric = = ' Pearson ': Self.fn = Self.pearson if Type (data). __name__ = = ' Dict ': self.data = data def loadbookdb (self,Path= "): Self.data = {} i = 0 #读取用户评分书籍的数据 f = codecs.open (Os.path.join (Path, ' bx-book-rating S.csv '), ' R ', ' Utf-8 ', errors= ' ignore ') for line in f:i = i + 1 fields = Line.split (';') user = Fields[0].strip (' "') book = Fields[1].strip ('" ') try:rating = Int (fie Lds[2].strip (). Strip (' "')) except valueerror:continue if user in Self.data: Currentratings = Self.data[user] else:currentratings = {} Currentratings[book ] = rating Self.data[user] = currentratings f.close () #读取书籍的信息 f = Codecs.open (Os.path.joi N (Path, ' bx-books.csv '), ' r ', ' UTF8 ', errors= ' ignore ') for line in F:i + = 1 fields = LINE.SPL It (';') #BX-books["ISBN"; " Book-title ";" Book-author ";" Year-of-publication ";" Publisher ";" Image-url-s ";" Image-url-m ";" Image-url-l "] ISBN = Fields[0].strip (' "') title = Fields[1].strip ('" ') Author = Fields[2].strip (' "') title = title + ' by ' + author SELF.PRODUCTID2NAME[ISBN] = title f.close () #读取用户的信息 f = C Odecs.open (Os.path.join (Path, ' bx-users.csv '), ' r ', ' UTF8 ', errors= ' ignore ') for line in f:i + = 1 Fields = Line.split (';') UserID = Fields[0].strip (' "') location = Fields[1].strip ('" ') If Len (Fields) > 3: Age = Fields[2].strip (). Strip (' "') Else:age = ' null ' if the age! = ' null ': Value = Location + ' (age: ' + ' + ') ' Else:value = Location Self.userid2name[us Erid] = value self.username2id[location] = userid F.close () print (i) def Pearson (self, rating1 , rating2): "Pearson correlation parameter in statistics, Pearson moment correlation coefficient (English: Pearson product-moment correlatioN coefficient, also known as PPMCC or pccs[1], is used to measure correlation (linear correlation) between two variables x and y, with values between 1 and 1, commonly used in the article R or Pearson ' s R. In the field of natural science, this coefficient is widely used to measure the degree of correlation between two variables. 0.8-1.0 very strong correlation between 0.6-0.8 strong correlation 0.4-0.6 Intermediate degree related 0.2-0.4 weakly related 0.0-0.2 very weakly correlated or unrelated ' sum_xy, Sum_x, Sum_y, sum_x2, sum_y2, n = 0, 0, 0, 0, 0, 0 for key in Rating1:if key in Rating2: n = n + 1 x = Rating1[key] y = rating2[key] Sum_xy + = x * y Sum_x + = x sum_y + y sum_x2 + = x * * 2 Sum_y2 + = y * * 2 if n = = 0: return 0 Fenmu = sqrt (sum_x2-(sum_x * * 2)/N) * sqrt (Sum_y2-(sum_y * 2)/n) If Fenmu = = 0: return 0 Else:return (Sum_xy-(sum_x * sum_y)/N)/Fenmu def computenearesneighbor (self, Username): ' Calculate relational coefficients ' ' distinces = [] For instance in Self.data: If instance! = Username: #相关系数 distince = Self.fn (Self.data[username], Self.data[instanc E]) Distinces.append ((instance, distince)) Distinces.sort (Key=lambda artisttuple:artisttuple[1], re Verse=true) return distinces def recommend (self, user): recommendations = {} nearest = Self.comput Enearesneighbor (user) userrating = self.data[user] totaldistance = 0.0 for i in range (SELF.K): Totaldistance + = nearest[i][1] for i in range (SELF.K): Weight = nearest[i][1]/totaldistance Name = nearest[i][0] neighborratings = Self.data[name] #遍历相关性高的用户喜欢的书籍 for artist I N neighborratings: #如果喜欢的书不在推荐用户的书籍中 if not artist in userrating: #文章是否存在 Rating if artist not in recommendations:recommendations[artist] = (neighborratings[ Artist] * weight) Else:recommendations[artist] = (Recommendations[artist] + neighborratings[artist] * W Eight) Recommendations = List (Recommendations.items ()) Recommendations = [(Self.convertproductid2name (k), V) For (K, V) in recommendations] Recommendations.sort (Key=lambda artisttuple:artisttuple[1], reverse=true) r Eturn RECOMMENDATIONS[:SELF.N] def convertproductid2name (self, id): ' ' Returns the product name with the given product number ' if ID in Self.productid2name:return Self.productid2name[id] Else:return ID def userratings (Self, ID, N): "' Returns the first n of the user ID-related:p Aram ID::p Aram N:: return:" ' Print ("Ratings for" + Self.userid2name[id]) ratings = Self.data[id] Print (len (ratings)) ratings = List (RA Tings.items ()) ratings = [(Self.convertproductid2name (k), V) for (K, v) in ratings] Ratings.sort (KEY=LAMBDA Artisttuple:artisttuple[1], reverse=true) ratings = Ratings[:n] for rating in Ratings:print ("%s\t%i"% (Rating[0], Rati NG[1]) If __name__ = = ' __main__ ': r = Recommender (users) print (R.recommend (' Veronica ')) r.loadbookdb (U ' d:/360 safe browsing Download/bx-csv-dump ') print (R.recommend (' 276737 '))
#result:
[(' Blues Traveler ', 5.0)]1700021[(U"Devil ' s Waltz (Alex Delaware novels (paperback)) Byjonathan Kellerman ", 9.0), (U' Silent Partner (Alex Delaware novels (paperback)) Byjonathan Kellerman ', 8.0), (U' The Outsiders (now in speak!) ByS. E. Hinton ', 8.0), (U' Sein languagebyjerry SEINFELD ', 8.0), (U' The Girl who loved Tom gordonbystephen Kin G ', 8.0)]
Collaborative filtering of data mining