Collaborative filtering of data mining

Source: Internet
Author: User

# coding:utf-8__author__ = ' similarface ' #datalink =http://www2.informatik.uni-freiburg.de/~cziegler/bx/ Bx-csv-dump.zip "bx-users[" User-id ";" Location ";" Age "]bx-books[" ISBN ";" Book-title ";" Book-author ";" Year-of-publication ";" Publisher ";" Image-url-s ";" Image-url-m ";" Image-url-l "]bx-book-ratings[" User-id ";" ISBN ";" Book-rating "]" #专门用作编码转换import codecs, OS, sysfrom math import sqrtusers = {"Angelica": {"Blues Traveler": 3.5, "Brok En bells ": 2.0," Norah Jones ": 4.5," Phoenix ": 5.0," slightly stoopid ": 1.5," the Strokes " : 2.5, "Vampire Weekend": 2.0}, "Bill": {"Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0, "Phoenix": 2.0, " Slightly Stoopid ": 3.5," Vampire Weekend ": 3.0}," Chan ": {" Blues Traveler ": 5.0," Broken Bells ": 1.0," Dead Mau5 ": 1.0," Norah Jones ": 3.0," Phoenix ": 5," Slightly Stoopid ": 1.0}," Dan ": {" Blues Traveler ": 3.0," Bro       Ken Bells ": 4.0," Deadmau5 ": 4.5," Phoenix ": 3.0," slightly stoopid ": 4.5,     "The Strokes": 4.0, "Vampire Weekend": 2.0}, "Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0, "The Strokes": 4.0, "Vampire Weekend": 1.0}, "Jordyn": {"Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0, "Phoe Nix ": 5.0," slightly stoopid ": 4.5," The Strokes ": 4.0," Vampire Weekend ": 4.0}," Sam ": {" Blues traveler "    : 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0, "Phoenix": 5.0, "slightly stoopid": 4.0, "The Strokes": 5.0}, "Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0, "slightly stoopid": 2.5, "The Strokes": 3.0}}cla SS Recommender:def __init__ (self, data, k=1, metric= ' Pearson ', n=5): self.k = k SELF.N = n self.u Sername2id = {} Self.userid2name = {} Self.productid2name = {} Self.metric = metric if self.me    Tric = = ' Pearson ': Self.fn = Self.pearson if Type (data). __name__ = = ' Dict ': self.data = data def loadbookdb (self,Path= "): Self.data = {} i = 0 #读取用户评分书籍的数据 f = codecs.open (Os.path.join (Path, ' bx-book-rating            S.csv '), ' R ', ' Utf-8 ', errors= ' ignore ') for line in f:i = i + 1 fields = Line.split (';') user = Fields[0].strip (' "') book = Fields[1].strip ('" ') try:rating = Int (fie                Lds[2].strip (). Strip (' "')) except valueerror:continue if user in Self.data: Currentratings = Self.data[user] else:currentratings = {} Currentratings[book ] = rating Self.data[user] = currentratings f.close () #读取书籍的信息 f = Codecs.open (Os.path.joi N (Path, ' bx-books.csv '), ' r ', ' UTF8 ', errors= ' ignore ') for line in F:i + = 1 fields = LINE.SPL            It (';') #BX-books["ISBN"; " Book-title ";" Book-author ";" Year-of-publication ";" Publisher ";" Image-url-s ";" Image-url-m ";"        Image-url-l "]    ISBN = Fields[0].strip (' "') title = Fields[1].strip ('" ') Author = Fields[2].strip (' "') title = title + ' by ' + author SELF.PRODUCTID2NAME[ISBN] = title f.close () #读取用户的信息 f = C            Odecs.open (Os.path.join (Path, ' bx-users.csv '), ' r ', ' UTF8 ', errors= ' ignore ') for line in f:i + = 1            Fields = Line.split (';')                UserID = Fields[0].strip (' "') location = Fields[1].strip ('" ') If Len (Fields) > 3:                Age = Fields[2].strip (). Strip (' "') Else:age = ' null ' if the age! = ' null ': Value = Location + ' (age: ' + ' + ') ' Else:value = Location Self.userid2name[us Erid] = value self.username2id[location] = userid F.close () print (i) def Pearson (self, rating1 , rating2): "Pearson correlation parameter in statistics, Pearson moment correlation coefficient (English: Pearson product-moment correlatioN coefficient, also known as PPMCC or pccs[1], is used to measure correlation (linear correlation) between two variables x and y, with values between 1 and 1, commonly used in the article R or Pearson ' s R.        In the field of natural science, this coefficient is widely used to measure the degree of correlation between two variables.  0.8-1.0 very strong correlation between 0.6-0.8 strong correlation 0.4-0.6 Intermediate degree related 0.2-0.4 weakly related 0.0-0.2 very weakly correlated or unrelated ' sum_xy,                Sum_x, Sum_y, sum_x2, sum_y2, n = 0, 0, 0, 0, 0, 0 for key in Rating1:if key in Rating2:                n = n + 1 x = Rating1[key] y = rating2[key] Sum_xy + = x * y            Sum_x + = x sum_y + y sum_x2 + = x * * 2 Sum_y2 + = y * * 2 if n = = 0:            return 0 Fenmu = sqrt (sum_x2-(sum_x * * 2)/N) * sqrt (Sum_y2-(sum_y * 2)/n) If Fenmu = = 0:  return 0 Else:return (Sum_xy-(sum_x * sum_y)/N)/Fenmu def computenearesneighbor (self,        Username): ' Calculate relational coefficients ' ' distinces = [] For instance in Self.data:    If instance! = Username: #相关系数 distince = Self.fn (Self.data[username], Self.data[instanc E]) Distinces.append ((instance, distince)) Distinces.sort (Key=lambda artisttuple:artisttuple[1], re Verse=true) return distinces def recommend (self, user): recommendations = {} nearest = Self.comput            Enearesneighbor (user) userrating = self.data[user] totaldistance = 0.0 for i in range (SELF.K):            Totaldistance + = nearest[i][1] for i in range (SELF.K): Weight = nearest[i][1]/totaldistance Name = nearest[i][0] neighborratings = Self.data[name] #遍历相关性高的用户喜欢的书籍 for artist I N neighborratings: #如果喜欢的书不在推荐用户的书籍中 if not artist in userrating: #文章是否存在 Rating if artist not in recommendations:recommendations[artist] = (neighborratings[       Artist] * weight)             Else:recommendations[artist] = (Recommendations[artist] + neighborratings[artist] * W  Eight) Recommendations = List (Recommendations.items ()) Recommendations = [(Self.convertproductid2name (k), V) For (K, V) in recommendations] Recommendations.sort (Key=lambda artisttuple:artisttuple[1], reverse=true) r  Eturn RECOMMENDATIONS[:SELF.N] def convertproductid2name (self, id): ' ' Returns the product name with the given product number ' if ID in Self.productid2name:return Self.productid2name[id] Else:return ID def userratings (Self, ID, N): "' Returns the first n of the user ID-related:p Aram ID::p Aram N:: return:" ' Print ("Ratings for" + Self.userid2name[id]) ratings = Self.data[id] Print (len (ratings)) ratings = List (RA Tings.items ()) ratings = [(Self.convertproductid2name (k), V) for (K, v) in ratings] Ratings.sort (KEY=LAMBDA Artisttuple:artisttuple[1], reverse=true) ratings = Ratings[:n] for rating in Ratings:print ("%s\t%i"% (Rating[0], Rati NG[1]) If __name__ = = ' __main__ ': r = Recommender (users) print (R.recommend (' Veronica ')) r.loadbookdb (U ' d:/360 safe browsing Download/bx-csv-dump ') print (R.recommend (' 276737 '))

 

#result:
[(' Blues Traveler ', 5.0)]1700021[(U"Devil ' s Waltz (Alex Delaware novels (paperback)) Byjonathan Kellerman ", 9.0), (U' Silent Partner (Alex Delaware novels (paperback)) Byjonathan Kellerman ', 8.0), (U' The Outsiders (now in speak!) ByS. E. Hinton ', 8.0), (U' Sein languagebyjerry SEINFELD ', 8.0), (U' The Girl who loved Tom gordonbystephen Kin G ', 8.0)]

Collaborative filtering of data mining

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.