The C language implementation of the multi-threaded version of ITEM_KNN

Source: Internet
Author: User

This mimics the idea of building a hash index inside WORD2VEC.

#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include <stdlib.h>#include <time.h>#include <pthread.h>#define Maxstring#define Maxusersize 1024*1024*10#define Similarity_item#define MaxLenCharfilename[ -][ the]={"Output1.txt","Output2.txt","Output3.txt","Output4.txt","Output5.txt","Output6.txt","Output7.txt","Output8.txt","Output9.txt","Output10.txt","Output11.txt","Output12.txt","Output13.txt","Output14.txt","Output15.txt","Output16.txt","Output17.txt","Output18.txt","Output19.txt","Output20.txt"};Const intItem_hash_size =30000000;structiteminfo{CharItemid[maxstring];intTotaluser;unsigned Long Long* USERLIST;intMax_user;};unsigned Long LongMax_item = +, item_size=0;structItemInfo * Item;int* Item_hash;Charstr1[10000][ -];intnum_threads= -;Charstr2[ -];intGetwordhash (Char*itemid) {unsigned Long LongA, hash =0; for(A =0; A <strlen(ITEMID); a++) hash = hash *257+ Itemid[a]; hash = hash% item_hash_size;returnhash;}intSearchitem (Char*itemid) {unsigned inthash = Getwordhash (itemId); while(1) {if(Item_hash[hash] = =-1)return-1;if(!strcmp(ItemId, Item[item_hash[hash]].itemid))returnItem_hash[hash]; hash = (hash +1)% Item_hash_size; }return-1;}intAdditemidtoitem (Char*itemid,unsigned Long LongUSERID) {unsigned intHash Item[item_size].max_user = -; Item[item_size].userlist = (unsigned Long Long*)calloc(Item[item_size].max_user,sizeof(unsigned Long Long));strcpy(Item[item_size].itemid, itemId); item[item_size].userlist[0]=userid; Item[item_size].totaluser =1; item_size++;//Reallocate memory if needed  if(Item_size +2>= Max_item) {Max_item + = +; Item = (structItemInfo *) realloc (item, Max_item *sizeof(structItemInfo)); } hash = Getwordhash (itemId); while(Item_hash[hash]! =-1) hash = (hash +1)% Item_hash_size; Item_hash[hash] = item_size-1;returnItem_size-1;}intReaditeminfo () {intA//Open FileFILE * fin = fopen ("Data_1w.txt","RB");if(Fin==null) {printf("The input file doesn ' t exist.\n");Exit(1); } item = (structItemInfo *)malloc(max_item*sizeof(structItemInfo));if(Item==null) {printf("Item Allocate failed.\n");Exit(1); }//Read user's purchase record    Char* Str= (Char*)malloc(maxusersize); while(Fgets (str, maxusersize,fin)! = NULL) {//Segment the user purchase record        memset(STR1,0,sizeof(STR1));intCN =0;intb =0;intc =0; while(1) {Str1[cn][b] = Str[c];            b++;            C + +; STR1[CN][B] =0;if(Str[c] = =Ten) Break;if(Str[c] = ="') {cn++; b =0;            C + +; }} cn++;//Remove the UserID from U        intLen =strlen(str1[0]);unsigned Long LongValue=0; for(intj=1; J < Len; J + +) value = value*Ten+ (str1[0][j]-' 0 ');//Add Itemid to item         for(inti =1; I < CN; ++i) {intindex = Searchitem (Str1[i]);if(Index = =-1) A = Additemidtoitem (Str1[i],value);Else{item[index].totaluser++;if(item[index].totaluser+2>=item[index].max_user) {item[index].max_user+= -; Item[index].userlist = (unsigned Long Long*) realloc (item[index].userlist, Item[index].max_user *sizeof(unsigned Long Long)); } item[index].userlist[item[index].totaluser-1]=value; }        }    }return 0;}voidInit () {//Initialize hash tableItem_hash = (int*)calloc(Item_hash_size,sizeof(int)); for(inti =0; i < item_hash_size; ++i) Item_hash[i] =-1;//Allocate space for itemItem = (structItemInfo *)malloc(max_item*sizeof(structItemInfo));}intBinary_search (unsigned Long Long Array[],intNunsigned Long LongValue) {intleft=0;intright=n-1; while(Left<=right)//cycle conditions, timely and change{intMiddle=left + ((right-left) >>1);//Prevent overflow, shift is also more efficient.  At the same time, each cycle needs to be updated.         if(Array[Middle]>value] {right =middle-1;//right assignment, timely and change}Else if(Array[Middle]<value) {left=middle+1; }Else              returnMiddle; }return-1; }void* Calitemsim (void* a) {FILE * Fout = fopen (filename[(int) A],"W");floatBestsim[similarity_item];Charbestuserid[similarity_item][ -];floatp,similarity;intCommon,pos;intleft = Item_size/num_threads * (intAintright = Item_size/num_threads * (int) A +1)-1;if((int) a==num_threads-1) right=item_size-1; for(inti = left; I <= right; ++i)//Traverse this thread to process the item{ for(intW =0; W < Similarity_item; ++W)//Initialize{bestsim[w]=-1; bestuserid[w][0]=0; } for(intj =0; J < Item_size; ++J) {if(I!=J) {common=0; for(intt =0; T < Item[i].totaluser; ++T)//Search for the same number of items purchased{Pos=binary_search (item[j].userlist,item[j].totaluser,item[i].userlist[t]);if(pos!=-1) common++; }if(common>5) {p=sqrt(Item[i].totaluser * item[j].totaluser); similarity = common/p; for(intK =0; K < Similarity_item; ++K) {if(Similarity>bestsim[k]) { for(intQ = Similarity_item-1; Q > k; q--) {Bestsim[q] = bestsim[q-1];strcpy(bestuserid[q],bestuserid[q-1]); } Bestsim[k] =similarity;strcpy(Bestuserid[k],item[j].itemid); Break; }                    }                }            }           } for(intc =0; (c < Similarity_item) &&bestsim[c]!=-1; ++C) {fprintf(Fout,'%s%s%f\n ', Item[i].itemid,bestuserid[c],bestsim[c]);    }} fclose (Fout); Pthread_exit (NULL);}voidCreatmulthread () {pthread_t *pt = (pthread_t *)malloc(Num_threads *sizeof(pthread_t)); for(Long LongA =0; A < num_threads; a++) pthread_create (&pt[a], NULL, Calitemsim, (void*) a); for(Long LongA =0; A < num_threads;  a++) Pthread_join (Pt[a], NULL); }//Combine multiple output files intovoidFilejoin () {FILE * Fout = fopen ("Output.txt","W"); for(inti =0; I < -; ++i) {FILE * fin = fopen (Filename[i],"R"); while(Fgets (STR2, maxlen,fin)! = NULL) {fprintf(Fout,'%s ', str2); } fclose (Fin);if(Remove (filename[i]) = =0)printf("Removed%s\n", Filename[i]);ElsePerror ("Remove"); } fclose (Fout);}intMain () {//freopen ("Output.txt", "w", stdout);Init ();    Readiteminfo ();    Creatmulthread (); Filejoin ();/ * Char testpid[50]= "p535223";    int Test=searchitem (TESTPID);    printf ("test:%d\n", test);    printf ("item_size:%llu\n", item_size);        printf ("itemid:%s\n", Item[test].itemid);    printf ("totaluser:%d\n", Item[test].totaluser);    for (int i = 0; i < Item[test].totaluser; ++i) {printf ("%llu\n", Item[test].userlist[i]); }    */    return 0;}

C-language implementation of the multithreaded version of ITEM_KNN

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.