Code has a problem, run an error, find a solution, is not the data set errors?
#include <iostream>
#include <queue>
#include <cmath>
#include <cassert>
#include <cstdlib>
#include <fstream>
#include <sstream>
#include <vector>
#include <algorithm>
using namespace Std;
const int iterm_size=1682;
const int user_size=943;
const int v=15; Number of nearest neighbors in Iterm
const int s=10; Number of recent neighbors for user
struct mypair{
int id;
Double value;
Mypair (int i=0,double v=0): ID (i), value (v) {}
};
struct cmp{
BOOL Operator () (const Mypair & obj1,const Mypair & obj2) const{
return Obj1.value < Obj2.value;
}
};
Double rate[user_size][iterm_size]; Scoring matrix
Mypair Nbi[iterm_size][v]; The nearest neighbor that holds each iterm
Mypair Nbu[user_size][s]; Store the nearest neighbor for each user
Double rate_avg[user_size]; Average ratings per user
Read the scoring matrix from the file
int readrate (string filename) {
Ifstream IFS;
Ifs.open (Filename.c_str ());
if (!ifs) {
cerr<< "error:unable to open input file" <<filename<<endl;
return-1;
}
String line;
while (Getline (Ifs,line)) {
String Str1,str2,str3;
Istringstream strstm (line);
strstm>>str1>>str2>>str3;
int Userid=atoi (STR1.C_STR ());
int Itermid=atoi (STR2.C_STR ());
Double Rating=atof (Str3.c_str ());
rate[userid-1][itermid-1]=rating;
Line.clear ();
}
Ifs.close ();
return 0;
}
Calculate the average score per user
void Getavgrate () {
for (int i=0;i<user_size;++i) {
Double sum=0;
for (int j=0;j<iterm_size;++j)
SUM+=RATE[I][J];
Rate_avg[i]=sum/iterm_size;
}
}
Calculate Pearson correlation coefficients for two vectors
Double Getsim (const vector<double> &vec1,const vector<double> &vec2) {
int len=vec1.size ();
ASSERT (Len==vec2.size ());
Double sum1=0;
Double sum2=0;
Double sum1_1=0;
Double sum2_2=0;
Double sum=0;
for (int i=0;i<len;i++) {
Sum+=vec1[i]*vec2[i];
Sum1+=vec1[i];
Sum2+=vec2[i];
Sum1_1+=vec1[i]*vec1[i];
Sum2_2+=vec2[i]*vec2[i];
}
Double Ex=sum1/len;
Double Ey=sum2/len;
Double Ex2=sum1_1/len;
Double Ey2=sum2_2/len;
Double Exy=sum/len;
Double sdx=sqrt (EX2-EX*EX);
Double sdy=sqrt (Ey2-ey*ey);
ASSERT (sdx!=0 && sdy!=0);
Double sim= (Exy-ex*ey)/(SDX*SDY);
Return SIM;
}
Calculates the nearest neighbor of each iterm
void Getnbi () {
for (int i=0;i<iterm_size;++i) {
Vector<double> VEC1;
Priority_queue<mypair,vector<mypair>,cmp> neighbour;
for (int k=0;k<user_size;k++)
Vec1.push_back (Rate[k][i]);
for (int j=0;j<iterm_size;j++) {
if (I==J)
Continue
Vector<double> vec2;
for (int k=0;k<user_size;k++)
Vec2.push_back (Rate[k][j]);
Double Sim=getsim (VEC1,VEC2);
Mypair p (J,sim);
Neighbour.push (P);
}
for (int n=0;n<v;++n) {
Nbi[i][n]=neighbour.top ();
Neighbour.pop ();
}
}
}
Predict the user's scoring value for a non-rated item
Double getpredict (const vector<double> &user,int index) {
Double sum1=0;
Double sum2=0;
for (int i=0;i<v;++i) {
int neib_index=nbi[index][i].id;
Double Neib_sim=nbi[index][i].value;
Sum1+=neib_sim*user[neib_index];
Sum2+=fabs (Neib_sim);
}
return sum1/sum2;
}
Calculate the similarity of two users
Double Getusersim (const vector<double> &user1,const vector<double> &user2) {
Vector<double> VEC1;
Vector<double> vec2;
int len=user1.size ();
ASSERT (Len==user2.size ());
for (int i=0;i<len;++i) {
if (user1[i]!=0 | | user2[i]!=0) {
if (user1[i]!=0)
Vec1.push_back (User1[i]);
Else
Vec1.push_back (Getpredict (user1,i));
if (user2[i]!=0)
Vec2.push_back (User2[i]);
Else
Vec2.push_back (Getpredict (user2,i));
}
}
Return Getsim (VEC1,VEC2);
}
Calculates the nearest neighbor of each user
void Getnbu () {
for (int i=0;i<user_size;++i) {
Vector<double> user1;
Priority_queue<mypair,vector<mypair>,cmp> neighbour;
for (int k=0;k<iterm_size;++k)
User1.push_back (Rate[i][k]);
for (int j=0;j<user_size;++j) {
if (j==i)
Continue
Vector<double> User2;
for (int k=0;k<iterm_size;++k)
User2.push_back (Rate[j][k]);
Double Sim=getusersim (USER1,USER2);
Mypair p (J,sim);
Neighbour.push (P);
}
for (int m=0;m<s;++m) {
Nbu[i][m]=neighbour.top ();
Neighbour.pop ();
}
}
}
Generate a recommendation to predict a user's rating of an item
Double predictrate (int user,int iterm) {
Double sum1=0;
Double sum2=0;
for (int i=0;i<s;++i) {
int neib_index=nbu[user][i].id;
Double Neib_sim=nbu[user][i].value;
sum1+=neib_sim* (Rate[neib_index][iterm]-rate_avg[neib_index]);
Sum2+=fabs (Neib_sim);
}
return rate_avg[user]+sum1/sum2;
}
Test
int main () {
String file= "/home/orisun/dataset/movie-lens-100k/u.data";
String file= "E:\\c++programs1\\ml-100k\\ml-100k\\u.data";
if (readrate (file)!=0) {
return-1;
}
Getavgrate ();
Getnbi ();
Getnbu ();
while (1) {
cout<< "Please input user index and ITERM index which want predict" <<endl;
int user,iterm;
cin>>user>>iterm;
Cout<<predictrate (user,iterm) <<endl;
}
return 0;
}
Collaborative filtering algorithm reference code