Apriori algorithm-C language implementation

Source: Internet
Author: User

Original address: http://blog.csdn.net/liema2000/article/details/6118423

#include <stdio.h>
typedef struct
{
int item[100]; Data item
} D_node; Database D

typedef struct
{
int item[100]; Data items, using item[0] to save the degree of support
} C_node; Candidate sets

typedef struct
{
int item[100]; Data items, using item[0] to save the degree of support
} l_node;//Frequent sets

C_node c[100][100];
L_node l[100][100];
D_node d[100];

int Min_supp; Minimum support level

Void input ()
{
int i,j,n,n1;//n is the size of the transaction set, N1 is the number of records entered, data is entered in d[100]
printf ("Please enter minimum support:");
scanf ("%d", &min_supp);
printf ("Enter the size of the transaction set");
scanf ("%d", &d[0].item[0]);
N=d[0].item[0];
for (i=1;i<=n;i++)//for1
{
printf ("Enter the number of records in transaction [%d] (n)", I);
scanf ("%d", &n1);
D[i].item[0]=n1;
for (j=1;j<=n1;j++)//for2
{
printf ("Enter transaction [%d] in the record entry, enter the number directly:", I);
scanf ("%d", &d[i].item[j]);
}//for2

}//for1

}//end of InPut

void C1 ()
{
//function: Scan DataSet D generates 1 candidate sets C1
//input: DataSet D
//Output 1 candidate set C1
//Initial data set D non-null
/* Put D in C, D[0]item[0] Is the number of transaction sets, D[1]item[0] is the number of data items for the first transaction set,
D[1]item[1] to d[1]item[] is the data for the first trade set.
C, regardless of the number of transaction sets, looks at the data item only. C[N][K].ITEM[0] is the number of support for the K term of the candidate set CN,
C[1][1]item[1] data item, C[1][1]ITEM[0] This data item. C[1][0]item[0] No is the number of different data items.
*/
int i,j,k;
int no=1,temp=0; No is the number of data items that are not duplicated
c[1][0].item[0]=0;//1 itemsets, in this algorithm, use c[n][k].item[0] to save the candidate set CN's K term Support
if (d[0].item[0]!=0)
{
C[1][1].item[1]=d[1].item[1];

}

for (i=1;i<=d[0].item[0];i++)//for1 transaction set
{

for (j=1;j<=d[i].item[0];j++)//for2 A record in a transaction set
{
Temp=1;
for (k=1;k<=no;k++)//for3
{
if (C[1][k].item[1]==d[i].item[j])
{
c[1][k].item[0]++;//support plus 1
Temp=0;

}//if
}//end For3


if (temp)//Generate new itemsets
{
C[1][++no].item[1]=d[i].item[j];
C[1][no].item[0]=1;
}

}//end for2

}//End For1
c[1][0].item[0]=no;//number of data items
}//end of C1 ()

void Cn (int n)
{
Based on frequent set Ln-1, the N candidate set CN is obtained by connection
int i,j,k,p,q,s,t,num;
int no=0,temp=0,count;
c[n][0].item[0]=0; Initialization
NUM=L[N-1][0].ITEM[0]; Num is the number of data in the Ln-1 itemsets
for (i=1;i<=num;i++)

for (j=i+1;j<=num;j++)//for2
{

Temp=1; Test whether the join condition is met
if (n>2)//if 1 is not a set, there may be duplicates
{
for (k=1;k<n-1;k++)//for3
{
if (L[n-1][i].item[k]!=l[n-1][j].item[k])//The same location has the same item to connect
{
Temp=0;
Break
}//if 1
}//end For3
}//end IF1
if (temp==1)//Meet the coupling conditions
{
no++;
for (p=1;p<=n-1;p++)
C[N][NO].ITEM[P]=L[N-1][I].ITEM[P];
c[n][no].item[p]=l[n-1][j].item[p-1];//This line p is after the execution of p++, 1 higher than the upstream P
c[n][no].item[0]=0;
for (q=1;q<=d[0].item[0];q++)//FOR5 test its support level
{
count=0; Count is used to count when the item being tested is present, Count plus 1, when count=n, the subset exists
for (S=1; c[n][no].item[s]!=0;s++)//for6
{
for (t=1;t<=d[q].item[0];t++)//for7
{
if (C[n][no].item[s]==d[q].item[t])
{count+=1;
Break
}
}//end FOR7

}//end for 6
if (count==n) c[n][no].item[0]+=1;//subset exists, the support degree of the No. 1 plus

}//end For5

C[n][0].item[0]+=1;
}//end IF2
}//end For2

}//end of Cn ()

void Ln (int n)
{
int i,j,k;
j=0;
l[n][0].item[0]=0;
for (i=1;i<=c[n][0].item[0];i++)//for 1
{
if (C[n][i].item[0]>=min_supp)
{
J+=1;
for (k=1;k<=n;k++)
L[N][J].ITEM[K]=C[N][I].ITEM[K];
L[N][J].ITEM[0]=C[N][I].ITEM[0];
}//end If

}//end For1

L[n][0].item[0]=j; Number of saved data
}//end of Ln (int n)

void OutPut (int n)
{
int i,j,k;
printf ("Frequent itemsets l%d as follows: \ n", n.);
K=L[N][0].ITEM[0];
if (k!=0)
{
for (i=1;i<=k;i++)
{
printf ("{");
for (j=1;j<=n;j++)
printf ("i%d", L[n][i].item[j]);
printf ("} Support degree:%d\n", l[n][i].item[0]);

}//for

}
Else
printf ("Project set is empty \ n");
}

void Main ()
{
int i;
int n=1;
InPut ();
C1 ();//Initialize, generate 1 candidate sets C1
Ln (1);//get 1 Frequent episodes L1
while (l[n][0].item[0]!=0)
{
N+=1;
Cn (n);
Ln (n);
}
for (i=1;i<=n;i++)
OutPut (i);
}

Test Case: (My doubts point)

The record for Transaction 4 is 23,34 12, 232 times 23, 343 times in L2. This situation does not generate a frequent set of three items.

If 12,23,34 are frequent, then 12, 23, and 12,24 are frequent. So if you decide to get the same set of locations, you will connect.

Apriori algorithm-C language implementation

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.