Data mining-association rule mining-Implementation of the Apriori algorithm

Source: Internet
Author: User

For algorithm details, see fast algorithm for mining association rules.

Graphical Version project + Test Case download stamp this http://download.csdn.net/detail/michealtx/4266155

The console version C ++ code is as follows:

# Include <iostream> # include <sstream> # include <fstream> # include <vector> # include <set> # include <map> # include <ctime> using namespace STD; // read the file and obtain the entire database stored in the database. The filename must be of the char * type. If the string is used, an error is returned. In () do not recognize bool obtaindatabase (vector <set <int> & database, char * filename) {/* set <int> data; data. insert (1); data. insert (2); data. insert (5); database. push_back (data); data. clear (); data. insert (2); data. insert (4); database. Push_back (data); data. clear (); data. insert (2); data. insert (3); database. push_back (data); data. clear (); data. insert (1); data. insert (2); data. insert (4); database. push_back (data); data. clear (); data. insert (1); data. insert (3); database. push_back (data); data. clear (); data. insert (2); data. insert (3); database. push_back (data); data. clear (); data. insert (1); data. insert (3); database. push_back (data); data. clear (); data. insert (1); D ATA. insert (2); data. insert (3); data. insert (5); database. push_back (data); data. clear (); data. insert (1); data. insert (2); data. insert (3); database. push_back (data); */ifstream in (filename); If (! In) {cout <"file opening failed! "<Endl; return false;} string S =" "; unsigned int I = 0; while (Getline (in, S) {// read a row of records I ++; set <int> transaction; int Len = S. length (); string STR = ""; for (INT I = 0; I <Len; I ++) {// extract the number from the record if (s [I]! = '') {STR + = s [I];} else if (s [I] ='' | I = len-1) {// string to intstringstream stoi (STR); int item = 0; stoi> item; transaction. insert (item); STR = "" ;}} database. push_back (transaction); s = "" ;}cout <I <Endl; // system ("pause"); Return true ;}// traverse the database once, create 1-void createitemset (vector <set <int> & database, vector <set <int> & largeitemset, unsigned int minsupport, Map <set <int>, int> & lm1) {Map <int, int> dir; Map <int, int>: iterator Dirit; vector <set <int>: iterator databaseit; set <int> temp; set <int>: iterator tempit; // create a dictionary based on the database, the dictionary format is <item, count> for (databaseit = database. begin (); databaseit! = Database. End (); databaseit ++) {temp = * databaseit; For (tempit = temp. Begin (); tempit! = Temp. end (); tempit ++) {int item = * tempit; dirit = dir. find (item); If (dirit = dir. end () {// item is not in the dictionary dir. insert (pair <int, int> (item, 1);} else {// item in the dictionary Dir, add its Count value to 1 (dirit-> second) ++ ;}}// select the itemfor (dirit = dir. begin (); dirit! = Dir. end (); dirit ++) {If (dirit-> second> = minsupport) {set <int> large; large. insert (dirit-> first); largeitemset. push_back (large); lm1.insert (pair <set <int>, int> (large, dirit-> second ));}}} // output the void outputlargeitemset (vector <set <int> & largeitemset, unsigned int I) {cout <"include" <largeitemset. size () <"item" <I <"-item set:" <Endl; vector <set <int> >:: iterator largeitemsetit; int J = 0; For (largeitemsetit = largeitemset. be Gin (); largeitemsetit! = Largeitemset. end (); largeitemsetit ++) {set <int> temp = * largeitemsetit; cout <"{"; for (set <int>: iterator tempit = temp. begin (); tempit! = Temp. end (); tempit ++) {cout <(* tempit) <";}cout <"} "; j ++; if (J % 4 = 0) {cout <Endl ;}} cout <Endl ;}// connection steps, if it1 and it2 meet the connection conditions, connect them to temp and return true; otherwise, return falsebool joint (set <int> & recordi, set <int> & recordj, set <int> & temp) {If (recordi. size ()! = Recordj. Size () {// The two sets have different sizes. Return immediately! Return false;} set <int >:: iterator it1 = recordi. begin (); set <int >:: iterator it2 = recordj. begin (); unsigned int size = recordi. size ()-1; for (INT I = 0; I <size; I ++) {If (* it1! = * It2) {return false;} temp. insert (* it1); it1 ++; it2 ++;} If (* it1 = * it2) {return false;} temp. insert (* it1); temp. insert (* it2); // cout <"Connect" <* it1 <"" <* it2 <Endl; return true;} // pruning step, if the K-1 item set for temp is not in L [k-1], cut off, false, otherwise truebool prune (set <int> & temp, vector <set <int> & largetemp) {unsigned int size = temp. size (); // obtain the full subset of K-1 items for temp and determine whether each subset is in L [k-1] For (INT I = 0; I <size; I ++) {set <int >:: iterator tempit = temp. begin (); se T <int> tempminusone; // contains a subset of K-1 items for (Int J = 0; j <size; j ++) {If (J! = I) {tempminusone. insert (* tempit);} * tempit ++;} // determines whether tempminusone is in L [k-1] vector <set <int>: iterator largetempit; bool flag = false; // whether the temp flag is truncated for (largetempit = largetemp. begin (); largetempit! = Largetemp. end (); largetempit ++) {// compare the big item Set * largetempit in the big item set largetemp one by one with tempminusone. If it is different, flag = true is guaranteed, otherwise, falseflag = true; set <int> large = * largetempit; set <int>: iterator tempminusoneit = tempminusone. begin (); For (set <int>: iterator largeit = large. begin (); largeit! = Large. End (); largeit ++) {If (* largeit! = * Tempminusoneit) {flag = false; break;} tempminusoneit ++;} If (flag = true) {// already exists. You no longer need to compare it with other major item sets, waste of time return true ;}}return false;} // using L [k-1], by concatenating and pruning two steps, generate candidatevoid aprien (vector <set <int> & largetemp, vector <set <int> & candidate) {unsigned int largetempsize = largetemp. size (); unsigned int sizetemp = largeTempSize-1; vector <set <int> >:: iterator largetempit = largetemp. begin (); // returns the concatenation of the big item set in the [k-1] To find the candidate set fo R (INT I = 0; I <sizetemp; I ++, largetempit ++) {// system ("pause "); cout <largetempsize <"" <I <Endl; set <int> recordi = * largetempit; For (Int J = I + 1; j <largetempsize; j ++) {// cout <j <Endl; set <int> recordj = * (largetempit + (J-I); set <int> temp; // cout <"Connect" <Endl; If (Joint (recordi, recordj, temp) {// recordi and recordj can be connected to temp, then, the temp is pruned // cout <"connection successful, and the request is pruned" <Endl; If (prune (temp, largetemp) {// temp is not truncated, add it to the set of candidates if (! Temp. empty () // cout <"Temp is not empty and is not cut off to become a candidate set" <Endl; candidate. push_back (temp);} // else {cout <"truncated" <Endl ;}} // else {cout <"" <Endl ;}// system ("pause") ;}/// compare each transaction in the database, calculate the support of each candidate set, and select a candidate set greater than or equal to the minimum support to form the L [k] void subset (vector <set <int> & database, vector <set <int> & candidate, vector <set <int> & largek, unsigned int minsupport, Map <set <int>, int> & lm) {vector <set <int> >:: iterator databaseit; vector <set <in T >>:: iterator candidateit; For (candidateit = candidate. Begin (); candidateit! = Candidate. end (); candidateit ++) {// for each candidate set can // bool cunzai = true; set <int> can = * candidateit; // cout <"cannnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn" <Endl; unsigned int cancount = 0; For (databaseit = database. begin (); databaseit! = Database. end (); databaseit ++) {// for each transaction in the database, check whether the set <int> DATA = * databaseit; If (Can. size ()> data. size () {continue; // the size of the candidate set is greater than the transaction size. Certainly not in this transaction} set <int>: iterator canit; For (canit = can. begin (); canit! = Can. end (); canit ++) {// for each item in CAN, check whether it is in the transaction data if (data. find (* canit) = data. end () {break;} If (canit = can. end () {// cout <"in" <Endl; // system ("pause"); cancount ++; // cout <cancount <Endl ;}} if (cancount> = minsupport) {// As long as cancount is greater than or equal to the minimum support, we exit the loop, it is a waste of time to count this candidate set. push_back (CAN); lm. insert (pair <set <int>, int> (CAN, cancount) ;}} int main (INT argc, char * argv []) {char name [200]; string file = ""; char * filename = "re Tail. dat "; int minsupport = 5000; // minimum support/* string CTL =" "; cout <" manually input file path and minimum support (y/n )? "; CIN> CTL; If (CTL =" Y "| CTL =" Y ") {cout <" Enter the file path and minimum support level in sequence, separated by spaces. (The file path should use a double slash): \ n "; CIN> File> minsupport; strcpy (name, file. c_str (); filename = Name;} */vector <Map <set <int>, int> Liss; clock_t start = clock (); vector <set <int> database; // database obtaindatabase (Database, filename); vector <set <int> large1; Map <set <int>, int> lm1; createitemset (Database, large1, minsupport, lm1); Liss. push_back (lm1); int K = 1; vector <set <int> largetemp = large1; while (! Largetemp. empty () {outputlargeitemset (largetemp, k); k ++; vector <set <int> candidate; apriorigen (largetemp, candidate ); vector <set <int> largek; Map <set <int>, int> lm; subset (Database, candidate, largek, minsupport, lm); largetemp = largek; if (largetemp. empty () {cout <"L [" <k <"] is empty" <Endl;} else {Liss. push_back (LM) ;}} clock_t end = clock (); cout <"finish! Total time: "<(end-Start) <" Ms "<Endl; System (" pause ");}

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.