Original address: http://blog.csdn.net/liema2000/article/details/6118423
#include <stdio.h>
typedef struct
{
int item[100]; Data item
} D_node; Database D
typedef struct
{
int item[100]; Data items, using item[0] to save the degree of support
} C_node; Candidate sets
typedef struct
{
int item[100]; Data items, using item[0] to save the degree of support
} l_node;//Frequent sets
C_node c[100][100];
L_node l[100][100];
D_node d[100];
int Min_supp; Minimum support level
Void input ()
{
int i,j,n,n1;//n is the size of the transaction set, N1 is the number of records entered, data is entered in d[100]
printf ("Please enter minimum support:");
scanf ("%d", &min_supp);
printf ("Enter the size of the transaction set");
scanf ("%d", &d[0].item[0]);
N=d[0].item[0];
for (i=1;i<=n;i++)//for1
{
printf ("Enter the number of records in transaction [%d] (n)", I);
scanf ("%d", &n1);
D[i].item[0]=n1;
for (j=1;j<=n1;j++)//for2
{
printf ("Enter transaction [%d] in the record entry, enter the number directly:", I);
scanf ("%d", &d[i].item[j]);
}//for2
}//for1
}//end of InPut
void C1 ()
{
//function: Scan DataSet D generates 1 candidate sets C1
//input: DataSet D
//Output 1 candidate set C1
//Initial data set D non-null
/* Put D in C, D[0]item[0] Is the number of transaction sets, D[1]item[0] is the number of data items for the first transaction set,
D[1]item[1] to d[1]item[] is the data for the first trade set.
C, regardless of the number of transaction sets, looks at the data item only. C[N][K].ITEM[0] is the number of support for the K term of the candidate set CN,
C[1][1]item[1] data item, C[1][1]ITEM[0] This data item. C[1][0]item[0] No is the number of different data items.
*/
int i,j,k;
int no=1,temp=0; No is the number of data items that are not duplicated
c[1][0].item[0]=0;//1 itemsets, in this algorithm, use c[n][k].item[0] to save the candidate set CN's K term Support
if (d[0].item[0]!=0)
{
C[1][1].item[1]=d[1].item[1];
}
for (i=1;i<=d[0].item[0];i++)//for1 transaction set
{
for (j=1;j<=d[i].item[0];j++)//for2 A record in a transaction set
{
Temp=1;
for (k=1;k<=no;k++)//for3
{
if (C[1][k].item[1]==d[i].item[j])
{
c[1][k].item[0]++;//support plus 1
Temp=0;
}//if
}//end For3
if (temp)//Generate new itemsets
{
C[1][++no].item[1]=d[i].item[j];
C[1][no].item[0]=1;
}
}//end for2
}//End For1
c[1][0].item[0]=no;//number of data items
}//end of C1 ()
void Cn (int n)
{
Based on frequent set Ln-1, the N candidate set CN is obtained by connection
int i,j,k,p,q,s,t,num;
int no=0,temp=0,count;
c[n][0].item[0]=0; Initialization
NUM=L[N-1][0].ITEM[0]; Num is the number of data in the Ln-1 itemsets
for (i=1;i<=num;i++)
for (j=i+1;j<=num;j++)//for2
{
Temp=1; Test whether the join condition is met
if (n>2)//if 1 is not a set, there may be duplicates
{
for (k=1;k<n-1;k++)//for3
{
if (L[n-1][i].item[k]!=l[n-1][j].item[k])//The same location has the same item to connect
{
Temp=0;
Break
}//if 1
}//end For3
}//end IF1
if (temp==1)//Meet the coupling conditions
{
no++;
for (p=1;p<=n-1;p++)
C[N][NO].ITEM[P]=L[N-1][I].ITEM[P];
c[n][no].item[p]=l[n-1][j].item[p-1];//This line p is after the execution of p++, 1 higher than the upstream P
c[n][no].item[0]=0;
for (q=1;q<=d[0].item[0];q++)//FOR5 test its support level
{
count=0; Count is used to count when the item being tested is present, Count plus 1, when count=n, the subset exists
for (S=1; c[n][no].item[s]!=0;s++)//for6
{
for (t=1;t<=d[q].item[0];t++)//for7
{
if (C[n][no].item[s]==d[q].item[t])
{count+=1;
Break
}
}//end FOR7
}//end for 6
if (count==n) c[n][no].item[0]+=1;//subset exists, the support degree of the No. 1 plus
}//end For5
C[n][0].item[0]+=1;
}//end IF2
}//end For2
}//end of Cn ()
void Ln (int n)
{
int i,j,k;
j=0;
l[n][0].item[0]=0;
for (i=1;i<=c[n][0].item[0];i++)//for 1
{
if (C[n][i].item[0]>=min_supp)
{
J+=1;
for (k=1;k<=n;k++)
L[N][J].ITEM[K]=C[N][I].ITEM[K];
L[N][J].ITEM[0]=C[N][I].ITEM[0];
}//end If
}//end For1
L[n][0].item[0]=j; Number of saved data
}//end of Ln (int n)
void OutPut (int n)
{
int i,j,k;
printf ("Frequent itemsets l%d as follows: \ n", n.);
K=L[N][0].ITEM[0];
if (k!=0)
{
for (i=1;i<=k;i++)
{
printf ("{");
for (j=1;j<=n;j++)
printf ("i%d", L[n][i].item[j]);
printf ("} Support degree:%d\n", l[n][i].item[0]);
}//for
}
Else
printf ("Project set is empty \ n");
}
void Main ()
{
int i;
int n=1;
InPut ();
C1 ();//Initialize, generate 1 candidate sets C1
Ln (1);//get 1 Frequent episodes L1
while (l[n][0].item[0]!=0)
{
N+=1;
Cn (n);
Ln (n);
}
for (i=1;i<=n;i++)
OutPut (i);
}
:
Test Case: (My doubts point)
The record for Transaction 4 is 23,34 12, 232 times 23, 343 times in L2. This situation does not generate a frequent set of three items.
If 12,23,34 are frequent, then 12, 23, and 12,24 are frequent. So if you decide to get the same set of locations, you will connect.
Apriori algorithm-C language implementation