Collaborative filtering algorithm reference code

Source: Internet
Author: User

Code has a problem, run an error, find a solution, is not the data set errors?

#include <iostream>
#include <queue>
#include <cmath>
#include <cassert>
#include <cstdlib>
#include <fstream>
#include <sstream>
#include <vector>
#include <algorithm>

using namespace Std;

const int iterm_size=1682;
const int user_size=943;
const int v=15; Number of nearest neighbors in Iterm
const int s=10; Number of recent neighbors for user

struct mypair{
int id;
Double value;
Mypair (int i=0,double v=0): ID (i), value (v) {}
};

struct cmp{
BOOL Operator () (const Mypair & obj1,const Mypair & obj2) const{
return Obj1.value < Obj2.value;
}
};

Double rate[user_size][iterm_size]; Scoring matrix
Mypair Nbi[iterm_size][v]; The nearest neighbor that holds each iterm
Mypair Nbu[user_size][s]; Store the nearest neighbor for each user
Double rate_avg[user_size]; Average ratings per user

Read the scoring matrix from the file
int readrate (string filename) {
Ifstream IFS;
Ifs.open (Filename.c_str ());
if (!ifs) {
cerr<< "error:unable to open input file" <<filename<<endl;
return-1;
}
String line;
while (Getline (Ifs,line)) {
String Str1,str2,str3;
Istringstream strstm (line);
strstm>>str1>>str2>>str3;
int Userid=atoi (STR1.C_STR ());
int Itermid=atoi (STR2.C_STR ());
Double Rating=atof (Str3.c_str ());
rate[userid-1][itermid-1]=rating;
Line.clear ();
}
Ifs.close ();
return 0;
}

Calculate the average score per user
void Getavgrate () {
for (int i=0;i<user_size;++i) {
Double sum=0;
for (int j=0;j<iterm_size;++j)
SUM+=RATE[I][J];
Rate_avg[i]=sum/iterm_size;
}
}

Calculate Pearson correlation coefficients for two vectors
Double Getsim (const vector<double> &vec1,const vector<double> &vec2) {
int len=vec1.size ();
ASSERT (Len==vec2.size ());
Double sum1=0;
Double sum2=0;
Double sum1_1=0;
Double sum2_2=0;
Double sum=0;
for (int i=0;i<len;i++) {
Sum+=vec1[i]*vec2[i];
Sum1+=vec1[i];
Sum2+=vec2[i];
Sum1_1+=vec1[i]*vec1[i];
Sum2_2+=vec2[i]*vec2[i];
}
Double Ex=sum1/len;
Double Ey=sum2/len;
Double Ex2=sum1_1/len;
Double Ey2=sum2_2/len;
Double Exy=sum/len;
Double sdx=sqrt (EX2-EX*EX);
Double sdy=sqrt (Ey2-ey*ey);
ASSERT (sdx!=0 && sdy!=0);
Double sim= (Exy-ex*ey)/(SDX*SDY);
Return SIM;
}

Calculates the nearest neighbor of each iterm
void Getnbi () {
for (int i=0;i<iterm_size;++i) {
Vector<double> VEC1;
Priority_queue<mypair,vector<mypair>,cmp> neighbour;
for (int k=0;k<user_size;k++)
Vec1.push_back (Rate[k][i]);
for (int j=0;j<iterm_size;j++) {
if (I==J)
Continue
Vector<double> vec2;
for (int k=0;k<user_size;k++)
Vec2.push_back (Rate[k][j]);
Double Sim=getsim (VEC1,VEC2);
Mypair p (J,sim);
Neighbour.push (P);
}
for (int n=0;n<v;++n) {
Nbi[i][n]=neighbour.top ();
Neighbour.pop ();
}
}
}

Predict the user's scoring value for a non-rated item
Double getpredict (const vector<double> &user,int index) {
Double sum1=0;
Double sum2=0;
for (int i=0;i<v;++i) {
int neib_index=nbi[index][i].id;
Double Neib_sim=nbi[index][i].value;
Sum1+=neib_sim*user[neib_index];
Sum2+=fabs (Neib_sim);
}
return sum1/sum2;
}

Calculate the similarity of two users
Double Getusersim (const vector<double> &user1,const vector<double> &user2) {
Vector<double> VEC1;
Vector<double> vec2;
int len=user1.size ();
ASSERT (Len==user2.size ());
for (int i=0;i<len;++i) {
if (user1[i]!=0 | | user2[i]!=0) {
if (user1[i]!=0)
Vec1.push_back (User1[i]);
Else
Vec1.push_back (Getpredict (user1,i));
if (user2[i]!=0)
Vec2.push_back (User2[i]);
Else
Vec2.push_back (Getpredict (user2,i));
}
}
Return Getsim (VEC1,VEC2);
}

Calculates the nearest neighbor of each user
void Getnbu () {
for (int i=0;i<user_size;++i) {
Vector<double> user1;
Priority_queue<mypair,vector<mypair>,cmp> neighbour;
for (int k=0;k<iterm_size;++k)
User1.push_back (Rate[i][k]);
for (int j=0;j<user_size;++j) {
if (j==i)
Continue
Vector<double> User2;
for (int k=0;k<iterm_size;++k)
User2.push_back (Rate[j][k]);
Double Sim=getusersim (USER1,USER2);
Mypair p (J,sim);
Neighbour.push (P);
}
for (int m=0;m<s;++m) {
Nbu[i][m]=neighbour.top ();
Neighbour.pop ();
}
}
}

Generate a recommendation to predict a user's rating of an item
Double predictrate (int user,int iterm) {
Double sum1=0;
Double sum2=0;
for (int i=0;i<s;++i) {
int neib_index=nbu[user][i].id;
Double Neib_sim=nbu[user][i].value;
sum1+=neib_sim* (Rate[neib_index][iterm]-rate_avg[neib_index]);
Sum2+=fabs (Neib_sim);
}
return rate_avg[user]+sum1/sum2;
}

Test
int main () {
String file= "/home/orisun/dataset/movie-lens-100k/u.data";
String file= "E:\\c++programs1\\ml-100k\\ml-100k\\u.data";
if (readrate (file)!=0) {
return-1;
}
Getavgrate ();
Getnbi ();
Getnbu ();
while (1) {
cout<< "Please input user index and ITERM index which want predict" <<endl;
int user,iterm;
cin>>user>>iterm;
Cout<<predictrate (user,iterm) <<endl;
}
return 0;
}

Collaborative filtering algorithm reference code

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.