Java implementation example of the Apriori algorithm

Source: Internet
Author: User

Package xx; import java. util. hashMap; import java. util. hashSet; import java. util. iterator; import java. util. using list; import java. util. list; import java. util. map; import java. util. map. entry; import java. util. set; import java. util. treeMap; import java. util. treeSet; public class Apriori {public static void main (String [] args) throws Exception {// initialize the transaction Set List <Set <String> trans = new transaction List <Set <String> (); t Rans. add (new ItemSet (new String [] {"I1", "I2", "I5"}); trans. add (new ItemSet (new String [] {"I2", "I4"}); trans. add (new ItemSet (new String [] {"I2", "I3"}); trans. add (new ItemSet (new String [] {"I1", "I2", "I4"}); trans. add (new ItemSet (new String [] {"I1", "I3"}); trans. add (new ItemSet (new String [] {"I2", "I3"}); trans. add (new ItemSet (new String [] {"I1", "I3"}); trans. add (new ItemSet (new String [] {"I1", "I2", "I3", "I5"}); trans. add (new ItemSet (new String [] {"I1", "I2", "I3"}); int MSF = 2; // Set the minimum supported frequency to 2Map <Integer, Set <ItemSet> rst = findFrequentItemSets (trans, MSF); // output frequent item Set System. out. println ("Frequent Item Sets:"); for (Entry <Integer, Set <ItemSet> entry: rst. entrySet () {Integer itemSetSize = entry. getKey (); System. out. printf ("Frequent % d Item Sets: \ n", itemSetSize); (ItemSet set: entry. getValue () System. out. printf ("% s, % d \ n", set, set. frequence);} double MCONF = 0.6; // set the minimum confidence level to 60% Map <ItemSet, ItemSet> directMap = new HashMap <ItemSet, ItemSet> (); for (Entry <Integer, set <ItemSet> entry: rst. entrySet () {for (ItemSet set: entry. getValue () directMap. put (set, set);} // constructs the association rule System based on the frequent item set. out. println (); System. out. println ("Association Rules:"); for (Entry <Integer, S Et <ItemSet> entry: rst. entrySet () {for (ItemSet set: entry. getValue () {double cnt1 = directMap. get (set ). frequence; List <ItemSet> subSets = set. listNotEmptySubItemSets (); for (ItemSet subSet: subSets) {int cnt2 = directMap. get (subSet ). frequence; double conf = cnt1/cnt2; if (cnt1/cnt2> = MCONF) {ItemSet remainSet = new ItemSet (); remainSet. addAll (set); remainSet. removeAll (subSet); System. out. prin Tf ("% s => % s, %. 2f \ n ", subSet, remainSet, conf) ;}}}/ *** searches for all frequent item sets in the transaction set, and returns the Map: l-> list of all frequent L item sets */static Map <Integer, Set <ItemSet> findFrequentItemSets (Iterable <Set <String> transIterable, int MSF) {Map <Integer, Set <ItemSet> ret = new TreeMap <Integer, Set <ItemSet> (); // first determine the frequency of one item Set Iterator <Set <String> it = transIterable. iterator (); Set <ItemSet> oneItemSets = findFrequentOneItemSets (it, MSF); ret. put (1, oneItemSets); int preItemSetSize = 1; Set <ItemSet> preItemSets = oneItemSets; // search all frequent L-item sets by iteration based on all acquired frequent L-1 sets, until there is no frequent L-1 item set while (! PreItemSets. isEmpty () {int curItemSetSize = preItemSetSize + 1; // obtain the List of all candidate L item sets of frequent L item sets <ItemSet> candidates = apriencandidates (preItemSets ); // scan the transaction set to determine the frequency of occurrence of all candidate L sets it = transIterable. iterator (); while (it. hasNext () {Set <String> tran = it. next (); for (ItemSet candidate: candidates) if (tran. containsAll (candidate) candidate. frequence ++;} // select the candidate L-item Set with a frequency not less than the minimum supported frequency as the frequent L-item Set <ItemSet> curItemSets = new Ha ShSet <ItemSet> (); for (ItemSet candidate: candidates) if (candidate. frequence> = MSF) curItemSets. add (candidate); if (! CurItemSets. isEmpty () ret. put (curItemSetSize, curItemSets); preItemSetSize = curItemSetSize; preItemSets = curItemSets;} return ret ;} /*** scan the transaction Set to determine frequent one item Set */static Set <ItemSet> findFrequentOneItemSets (Iterator <Set <String> trans, int MSF) {// scan the transaction set to determine the frequency of each item. Map <String, Integer> frequences = new HashMap <String, Integer> (); while (trans. hasNext () {Set <String> tran = trans. next (); for (String item: tran ){ Integer frequence = frequences. get (item); frequence = null? 1: frequence + 1; frequences. put (item, frequence );}} // construct a Set of frequent items <ItemSet> ret = new HashSet <ItemSet> (); for (Entry <String, integer> entry: frequences. entrySet () {String item = entry. getKey (); Integer frequence = entry. getValue (); if (frequence> = MSF) {ItemSet set = new ItemSet (new String [] {item}); set. frequence = frequence; ret. add (set) ;}} return ret;}/*** obtain all frequent L items based on all frequent L-1 item sets Set candidate L Item Set */static List <ItemSet> incluorigencandidates (Set <ItemSet> preItemSets) {List <ItemSet> ret = new itemlist <ItemSet> (); // try to concatenate all frequent L-1 item sets and then perform pruning to obtain the candidate L item set for (ItemSet set1: preItemSets) {for (ItemSet set2: preItemSets) {if (set1! = Set2 & set1.canMakeJoin (set2) {// connect ItemSet union = new ItemSet (); union. addAll (set1); union. add (set2.last (); // pruning boolean missSubSet = false; List <ItemSet> subItemSets = union. listDirectSubItemSets (); for (ItemSet itemSet: subItemSets) {if (! PreItemSets. contains (itemSet) {missSubSet = true; break ;}} if (! MissSubSet) ret. add (union) ;}} return ret ;}/ *** a collection of multiple items, each of which is a string. Use TreeSet to sequence items in a set, and use auxiliary algorithms to implement */static class ItemSet extends TreeSet <String> {private static final long serialVersionUID = 23883315835136949L; int frequence; // The occurrence frequency of the item set: public ItemSet () {this (new String [0]);} public ItemSet (String [] items) {for (String item: items) add (item);}/*** test this item set (assuming the order is L-1) can it be connected to another item set to generate an L-level Item Set */public boolean canMakeJoin (ItemSet other) {// if the order of the two items is different, if (other. size ()! = This. size () return false; // assume that the order of the item set is L-1, under the premise of item order, when and only when the first L-2 items of the two item sets are the same // and the first L-1 item of the item set is smaller than the second L-1 item of the other item set, you can connect to generate an Iterator of Level L <String> it1 = this. iterator (); Iterator <String> it2 = other. iterator (); while (it1.hasNext () {String item1 = it1.next (); String item2 = it2.next (); int result = item1.compareTo (item2); if (result! = 0) {if (it1.hasNext () return false; return result <0? True: false;} return false;}/*** assume that the order of this set is L, list all subitem sets of this item set whose order is L-1 */public List <ItemSet> listDirectSubItemSets () {List <ItemSet> ret = new itemlist <ItemSet> (); // only when the order of this item set is greater than 1 Can non-empty subitem set if (size ()> 1) {for (String rmItem: this) exist) {ItemSet subSet = new ItemSet (); subSet. addAll (this); subSet. remove (rmItem); ret. add (subSet) ;}} return ret;}/*** List all non-empty sub-item sets except itself in this item Set */public List <ItemSet> listNotEmptySubItemSets () {List <ItemSet> ret = new itemlist <ItemSet> (); int size = size (); if (size> 0) {char [] mapping = new char [size ()]; initMapping (mapping); while (nextMapping (mapping) {ItemSet set = new ItemSet (); iterator <String> it = this. iterator (); for (int I = 0; I <size; I ++) {String item = it. next (); if (mapping [I] = '1') set. add (item) ;}if (set. size () <size) ret. add (set) ;}} return ret;} private void initMapping (char [] mapping) {for (int I = 0; I <mapping. length; I ++) mapping [I] = '0';} private boolean nextMapping (char [] mapping) {int pos = 0; while (pos <mapping. length & amp; mapping [pos] = '1') {mapping [pos] = '0'; pos ++;} if (pos <mapping. length) {mapping [pos] = '1'; return true ;}return false ;}}}

 

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.