For algorithm details, see fast algorithm for mining association rules.
Graphical Version project + Test Case download stamp this http://download.csdn.net/detail/michealtx/4266155
The console version C ++ code is as follows:
# Include <iostream> # include <sstream> # include <fstream> # include <vector> # include <set> # include <map> # include <ctime> using namespace STD; // read the file and obtain the entire database stored in the database. The filename must be of the char * type. If the string is used, an error is returned. In () do not recognize bool obtaindatabase (vector <set <int> & database, char * filename) {/* set <int> data; data. insert (1); data. insert (2); data. insert (5); database. push_back (data); data. clear (); data. insert (2); data. insert (4); database. Push_back (data); data. clear (); data. insert (2); data. insert (3); database. push_back (data); data. clear (); data. insert (1); data. insert (2); data. insert (4); database. push_back (data); data. clear (); data. insert (1); data. insert (3); database. push_back (data); data. clear (); data. insert (2); data. insert (3); database. push_back (data); data. clear (); data. insert (1); data. insert (3); database. push_back (data); data. clear (); data. insert (1); D ATA. insert (2); data. insert (3); data. insert (5); database. push_back (data); data. clear (); data. insert (1); data. insert (2); data. insert (3); database. push_back (data); */ifstream in (filename); If (! In) {cout <"file opening failed! "<Endl; return false;} string S =" "; unsigned int I = 0; while (Getline (in, S) {// read a row of records I ++; set <int> transaction; int Len = S. length (); string STR = ""; for (INT I = 0; I <Len; I ++) {// extract the number from the record if (s [I]! = '') {STR + = s [I];} else if (s [I] ='' | I = len-1) {// string to intstringstream stoi (STR); int item = 0; stoi> item; transaction. insert (item); STR = "" ;}} database. push_back (transaction); s = "" ;}cout <I <Endl; // system ("pause"); Return true ;}// traverse the database once, create 1-void createitemset (vector <set <int> & database, vector <set <int> & largeitemset, unsigned int minsupport, Map <set <int>, int> & lm1) {Map <int, int> dir; Map <int, int>: iterator Dirit; vector <set <int>: iterator databaseit; set <int> temp; set <int>: iterator tempit; // create a dictionary based on the database, the dictionary format is <item, count> for (databaseit = database. begin (); databaseit! = Database. End (); databaseit ++) {temp = * databaseit; For (tempit = temp. Begin (); tempit! = Temp. end (); tempit ++) {int item = * tempit; dirit = dir. find (item); If (dirit = dir. end () {// item is not in the dictionary dir. insert (pair <int, int> (item, 1);} else {// item in the dictionary Dir, add its Count value to 1 (dirit-> second) ++ ;}}// select the itemfor (dirit = dir. begin (); dirit! = Dir. end (); dirit ++) {If (dirit-> second> = minsupport) {set <int> large; large. insert (dirit-> first); largeitemset. push_back (large); lm1.insert (pair <set <int>, int> (large, dirit-> second ));}}} // output the void outputlargeitemset (vector <set <int> & largeitemset, unsigned int I) {cout <"include" <largeitemset. size () <"item" <I <"-item set:" <Endl; vector <set <int> >:: iterator largeitemsetit; int J = 0; For (largeitemsetit = largeitemset. be Gin (); largeitemsetit! = Largeitemset. end (); largeitemsetit ++) {set <int> temp = * largeitemsetit; cout <"{"; for (set <int>: iterator tempit = temp. begin (); tempit! = Temp. end (); tempit ++) {cout <(* tempit) <";}cout <"} "; j ++; if (J % 4 = 0) {cout <Endl ;}} cout <Endl ;}// connection steps, if it1 and it2 meet the connection conditions, connect them to temp and return true; otherwise, return falsebool joint (set <int> & recordi, set <int> & recordj, set <int> & temp) {If (recordi. size ()! = Recordj. Size () {// The two sets have different sizes. Return immediately! Return false;} set <int >:: iterator it1 = recordi. begin (); set <int >:: iterator it2 = recordj. begin (); unsigned int size = recordi. size ()-1; for (INT I = 0; I <size; I ++) {If (* it1! = * It2) {return false;} temp. insert (* it1); it1 ++; it2 ++;} If (* it1 = * it2) {return false;} temp. insert (* it1); temp. insert (* it2); // cout <"Connect" <* it1 <"" <* it2 <Endl; return true;} // pruning step, if the K-1 item set for temp is not in L [k-1], cut off, false, otherwise truebool prune (set <int> & temp, vector <set <int> & largetemp) {unsigned int size = temp. size (); // obtain the full subset of K-1 items for temp and determine whether each subset is in L [k-1] For (INT I = 0; I <size; I ++) {set <int >:: iterator tempit = temp. begin (); se T <int> tempminusone; // contains a subset of K-1 items for (Int J = 0; j <size; j ++) {If (J! = I) {tempminusone. insert (* tempit);} * tempit ++;} // determines whether tempminusone is in L [k-1] vector <set <int>: iterator largetempit; bool flag = false; // whether the temp flag is truncated for (largetempit = largetemp. begin (); largetempit! = Largetemp. end (); largetempit ++) {// compare the big item Set * largetempit in the big item set largetemp one by one with tempminusone. If it is different, flag = true is guaranteed, otherwise, falseflag = true; set <int> large = * largetempit; set <int>: iterator tempminusoneit = tempminusone. begin (); For (set <int>: iterator largeit = large. begin (); largeit! = Large. End (); largeit ++) {If (* largeit! = * Tempminusoneit) {flag = false; break;} tempminusoneit ++;} If (flag = true) {// already exists. You no longer need to compare it with other major item sets, waste of time return true ;}}return false;} // using L [k-1], by concatenating and pruning two steps, generate candidatevoid aprien (vector <set <int> & largetemp, vector <set <int> & candidate) {unsigned int largetempsize = largetemp. size (); unsigned int sizetemp = largeTempSize-1; vector <set <int> >:: iterator largetempit = largetemp. begin (); // returns the concatenation of the big item set in the [k-1] To find the candidate set fo R (INT I = 0; I <sizetemp; I ++, largetempit ++) {// system ("pause "); cout <largetempsize <"" <I <Endl; set <int> recordi = * largetempit; For (Int J = I + 1; j <largetempsize; j ++) {// cout <j <Endl; set <int> recordj = * (largetempit + (J-I); set <int> temp; // cout <"Connect" <Endl; If (Joint (recordi, recordj, temp) {// recordi and recordj can be connected to temp, then, the temp is pruned // cout <"connection successful, and the request is pruned" <Endl; If (prune (temp, largetemp) {// temp is not truncated, add it to the set of candidates if (! Temp. empty () // cout <"Temp is not empty and is not cut off to become a candidate set" <Endl; candidate. push_back (temp);} // else {cout <"truncated" <Endl ;}} // else {cout <"" <Endl ;}// system ("pause") ;}/// compare each transaction in the database, calculate the support of each candidate set, and select a candidate set greater than or equal to the minimum support to form the L [k] void subset (vector <set <int> & database, vector <set <int> & candidate, vector <set <int> & largek, unsigned int minsupport, Map <set <int>, int> & lm) {vector <set <int> >:: iterator databaseit; vector <set <in T >>:: iterator candidateit; For (candidateit = candidate. Begin (); candidateit! = Candidate. end (); candidateit ++) {// for each candidate set can // bool cunzai = true; set <int> can = * candidateit; // cout <"cannnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn" <Endl; unsigned int cancount = 0; For (databaseit = database. begin (); databaseit! = Database. end (); databaseit ++) {// for each transaction in the database, check whether the set <int> DATA = * databaseit; If (Can. size ()> data. size () {continue; // the size of the candidate set is greater than the transaction size. Certainly not in this transaction} set <int>: iterator canit; For (canit = can. begin (); canit! = Can. end (); canit ++) {// for each item in CAN, check whether it is in the transaction data if (data. find (* canit) = data. end () {break;} If (canit = can. end () {// cout <"in" <Endl; // system ("pause"); cancount ++; // cout <cancount <Endl ;}} if (cancount> = minsupport) {// As long as cancount is greater than or equal to the minimum support, we exit the loop, it is a waste of time to count this candidate set. push_back (CAN); lm. insert (pair <set <int>, int> (CAN, cancount) ;}} int main (INT argc, char * argv []) {char name [200]; string file = ""; char * filename = "re Tail. dat "; int minsupport = 5000; // minimum support/* string CTL =" "; cout <" manually input file path and minimum support (y/n )? "; CIN> CTL; If (CTL =" Y "| CTL =" Y ") {cout <" Enter the file path and minimum support level in sequence, separated by spaces. (The file path should use a double slash): \ n "; CIN> File> minsupport; strcpy (name, file. c_str (); filename = Name;} */vector <Map <set <int>, int> Liss; clock_t start = clock (); vector <set <int> database; // database obtaindatabase (Database, filename); vector <set <int> large1; Map <set <int>, int> lm1; createitemset (Database, large1, minsupport, lm1); Liss. push_back (lm1); int K = 1; vector <set <int> largetemp = large1; while (! Largetemp. empty () {outputlargeitemset (largetemp, k); k ++; vector <set <int> candidate; apriorigen (largetemp, candidate ); vector <set <int> largek; Map <set <int>, int> lm; subset (Database, candidate, largek, minsupport, lm); largetemp = largek; if (largetemp. empty () {cout <"L [" <k <"] is empty" <Endl;} else {Liss. push_back (LM) ;}} clock_t end = clock (); cout <"finish! Total time: "<(end-Start) <" Ms "<Endl; System (" pause ");}