The format of the data:
One line a user purchase record, uid pid1 pid2 ...
#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include <stdlib.h>#include <time.h>#include <pthread.h>#define Maxstring#define Maxusersize 1024*1024*10#define Similarity_user#define MaxLenCharfilename[ -][ the]={"Output1.txt","Output2.txt","Output3.txt","Output4.txt","Output5.txt","Output6.txt","Output7.txt","Output8.txt","Output9.txt","Output10.txt","Output11.txt","Output12.txt","Output13.txt","Output14.txt","Output15.txt","Output16.txt","Output17.txt","Output18.txt","Output19.txt","Output20.txt"};structuserinfo{CharUserid[maxstring];intTotalitem;unsigned Long Long* itemList;};structUserInfo * USER;floatsimilarity;Charmaxitemuserid[ -],maxitemiduserid[ -],str1[10000][ -];intMax_user= +, usernum=0, maxitem=0, maxitemuserindex,maxitemidindex,len,pos=-1;unsigned Long LongMaxitemid=0, value;Long Longnum=0;//user_numintnum_threads= -;Charstr2[ -];intBinary_search (unsigned Long LongA[],intNunsigned Long LongKey);intCompare123 (Const void*a,Const void*B) {return*(unsigned Long Long*) A-* (unsigned Long Long*) b; }intBinary_search (unsigned Long Long Array[],intNunsigned Long LongValue) {intleft=0;intright=n-1; while(Left<=right)//cycle conditions, timely and change{intMiddle=left + ((right-left) >>1);//Prevent overflow, shift is also more efficient. At the same time, each cycle needs to be updated. if(Array[Middle]>value] {right =middle-1;//right assignment, timely and change}Else if(Array[Middle]<value) {left=middle+1; }Else returnMiddle; }return-1; }void* Calusersim (void* a) {FILE * Fout = fopen (filename[(int) A],"W");floatBestsim[similarity_user];Charbestuserid[similarity_user][ -];floatPintCommonintleft = Usernum/num_threads * (intAintright = Usernum/num_threads * (int) A +1)-1;if((int) a==num_threads-1) right=usernum-1; for(inti = left; I <= right; ++i) { for(intW =0; W < Similarity_user; ++W)//Initialize{bestsim[w]=-1; bestuserid[w][0]=0; } for(intj =0; J < Usernum; ++J)//For an I, the process usernum the secondary traversal{if(I!=J) {common=0; for(intt =0; T < User[i].totalitem; ++T)//Search for the same number of items purchased{Pos=binary_search (user[j].itemlist,user[j].totalitem,user[i].itemlist[t]);if(pos!=-1) common++; }if(common>Ten)//If the same number of items is greater than 10, determine if it is in TOPK{p=sqrt(User[i].totalitem * user[j].totalitem); similarity = common/p; for(intK =0; K < Similarity_user; ++K) {if(Similarity>bestsim[k]) { for(intQ = Similarity_user-1; Q > k; q--) {Bestsim[q] = bestsim[q-1];strcpy(bestuserid[q],bestuserid[q-1]); } Bestsim[k] =similarity;strcpy(Bestuserid[k],user[j].userid); Break; } } } } } for(intc =0; (c < Similarity_user) &&bestsim[c]!=-1; ++C) {fprintf(Fout,'%s%s%f\n ', User[i].userid,bestuserid[c],bestsim[c]); }} fclose (Fout); Pthread_exit (NULL);}intReaduserinfo () {Char* Str= (Char*)malloc(maxusersize);if(Str==null) {printf("Str Allocate failed.\n");Exit(1); }//Open FileFILE * fin = fopen ("Data_5k.txt","RB");if(Fin==null) {printf("The input file doesn ' t exist.\n");Exit(1); } user = (structUserInfo *)malloc(max_user*sizeof(structUserInfo));if(User==null) {printf("User Allocate failed.\n");Exit(1); } while(Fgets (str, maxusersize,fin)! = NULL) {//Separate a user's purchase record by a space memset(STR1,0,sizeof(STR1));intCN =0;intb =0;intc =0; while(1) {Str1[cn][b] = Str[c]; b++; C + +; STR1[CN][B] =0;if(Str[c] = =Ten) Break;if(Str[c] = ="') {cn++; b =0; C + +; }} cn++;if(cn-1>Ten) {if(num +2>= max_user) {Max_user + = +; user = (structUserInfo *) realloc (user, Max_user *sizeof(structUserInfo));if(User==null) {printf("Realloc user memory failed.\n");Exit(1); }} user[num].itemlist = (unsigned Long Long*)malloc((cn-1)*sizeof(unsigned Long Long));if(User[num].itemlist==null) {printf("Allocate itemList failed.\n");Exit(1); } user[num].totalitem=cn-1;strcpy(user[num].userid,str1[0]); for(inti =1; I < CN; ++i)//Remove Product ID for P{len =strlen(Str1[i]); Value=0; for(intj=1; J < Len; J + +) value = value*Ten+ (str1[i][j]-' 0 '); user[num].itemlist[i-1]=value;if(Value>maxitemid) {Maxitemid = value;//Find the biggest itemidMaxitemidindex=num;strcpy(maxitemiduserid,str1[0]); }} qsort (User[num].itemlist, cn-1,sizeof(unsigned Long Long), compare123); num++; }} Usernum=num; Fclose (Fin);return 0;}//Combine multiple output files intovoidFilejoin () {FILE * Fout = fopen ("Output.txt","W"); for(inti =0; I < -; ++i) {FILE * fin = fopen (Filename[i],"R"); while(Fgets (STR2, maxlen,fin)! = NULL) {fprintf(Fout,'%s ', str2); } fclose (Fin);if(Remove (filename[i]) = =0)printf("Removed%s\n", Filename[i]);ElsePerror ("Remove"); } fclose (Fout);}intMain () {clock_t start=clock (); Readuserinfo (); pthread_t *pt = (pthread_t *)malloc(Num_threads *sizeof(pthread_t)); for(Long LongA =0; A < num_threads; a++) pthread_create (&pt[a], NULL, Calusersim, (void*) a); for(Long LongA =0; A < num_threads; a++) Pthread_join (Pt[a], NULL); clock_t Now=clock ();printf("Time:%lusecond \ n", (now-start+1)/clocks_per_sec); Filejoin ();return 0;}
The C language implementation of the multi-threaded version of USER_KNN