Apriori algorithm-java

Source: Internet
Author: User

Package Com.yang;

Import java.util.*;

public class Apriori {

Private double minsup = 0.2;//Minimum support degree
Private double minconf = 0.2;//minimum confidence level

Note Use Identityhashmap, otherwise overwrite occurs because the association rule produces the same key value
Private Identityhashmap Rulemap = new Identityhashmap ();

Private string[] Transset = {"abc", "abc", "Acde", "BCDF", "ABCD", "ABCDF"};//Transaction Collection

You can pass in the constructor as needed
Private string[] Transset = {"Abe", "BD", "BC", "Abd", "AC", "BC", "AC", "ABCE", "ABC"};//Transaction Collection
private int itemcounts = 0;//Candidate 1 Project set size, that is, the number of letters
Private treeset[] Frequencyset = new treeset[40];//frequent itemsets array, [0]: Represents 1 frequent sets ..., TreeSet () sorts elements using the natural order of the elements
Private TreeSet maxfrequency = new TreeSet ();//maximum frequent set [all frequent]
Private TreeSet candidate = new TreeSet ();
Private TreeSet candidateset[] = new treeset[40];//candidate set array [0]: Represents 1 candidate set
private int frequencyindex;

Public Apriori () {

maxfrequency = new TreeSet ();
Itemcounts = counts ();//Initialize 1 candidate set size 6
System.out.printf ("Size of the 1 itemsets" +itemcounts);
Initialize the other two
for (int i = 0; i < itemcounts; i++) {
Frequencyset[i] = new TreeSet ();//Initialize an array of frequent itemsets
Candidateset[i] = new TreeSet ();//Initialize array of candidate sets
}
Candidateset[0] = candidate;//1 candidate Set
}

Main function entry
public static void Main (string[] args) {
Apriori ap = new Apriori ();
Ap.run ();
}

Method runs
public void Run () {
int k = 1;
Item1_gen ();

do {
k++;
Canditate_gen (k);
Frequent_gen (k);
} while (!is_frequent_empty (k));
Frequencyindex = k-1;
Print_canditate ();
Maxfrequent_gen ();
Print_maxfrequent ();
Rulegen ();
Ruleprint ();
}
Records the number of occurrences of an element in each transaction, and the total number of occurrences of x in the transaction.
Public double Count_sup (String x) {
int temp = 0;
for (int i = 0; i < transset.length; i++) {
for (int j = 0; J < X.length (); j + +) {
if (Transset[i].indexof (X.charat (j)) = =-1)//Returns the index of the first occurrence of the specified character in this string, if not as a string, returns 1
Break
else if (j = = (X.length ()-1))
temp++;
}
}
return temp;
}

Statistics 1 candidate Set number A,b,c,d,e,f,return value is 6
public int counts () {

String temp1 = null;
Char temp2 = ' a ';
Iterates through all transaction sets string joins the collection, set automatically goes heavy
for (int i = 0; i < transset.length; i++) {
Temp1 = Transset[i];
for (int j = 0; J < Temp1.length (); j + +) {
Temp2 = Temp1.charat (j);//Returns the value of Temp1 with the position J
Candidate.add (string.valueof (TEMP2));//treeset Add will remove duplicate values
}
}
return candidate.size ();//The number of elements is not duplicated, and the increment sort
}

Ask for 1 frequent sets
public void Item1_gen () {
String Temp1 = "";
Double m = 0;

Iterator temp = Candidateset[0].iterator ();//Use Method Iterator () requires the container to return a Iterator.
while (Temp.hasnext ()) {//Traverse Temp (1 candidate sets)
Temp1 = (String) temp.next ();
m = Count_sup (TEMP1);//Call the following method to count the number of each element in the 1 candidate set, calculate the support degree, use this m/transset.length

Eligible to join 1 candidate sets
The value of if (M >= minsup * transset.length) {//minsup * transset.length is to record the number of occurrences of the element in each transaction, judging whether 1 frequent sets
Frequencyset[0].add (TEMP1);//1 frequent sets join an array of frequent itemsets, automatically go out of duplicate sets
}
}
}
Seeking K candidate Sets
public void Canditate_gen (int k) {
String y = "", Z = "", M = "";
Char C1, C2;

Iterator Temp1 = Frequencyset[k-2].iterator ();//iterator iterator for array traversal
Iterator Temp2 = Frequencyset[0].iterator ();//traversal of frequent itemsets array, [0]: represents 1 frequent sets
TreeSet h = new TreeSet ();

while (Temp1.hasnext ()) {
y = (String) temp1.next ();//
C1 = Y.charat (Y.length ()-1);//returns the char value of the specified Y.length ()-1 (the last of the array)

while (Temp2.hasnext ()) {
z = (String) temp2.next ();

C2 = Z.charat (0);//c2=a,b,c,d,e,f
if (C1 >= C2)
continue;//is greater than the last word characters. Abd, without ADB
else {
m = y + z;//m is a string combination of YZ
H.add (m);//m Join TreeSet
}
}
Temp2 = Frequencyset[0].iterator ();
}
CANDIDATESET[K-1] = h;
}

K Candidate set =>k frequent set
public void Frequent_gen (int k) {
String S1 = "";

Iterator IX = Candidateset[k-1].iterator ();//Traverse K candidate Set IX
while (Ix.hasnext ()) {
S1 = (String) ix.next (); value in//ix s1
if (Count_sup (S1) >= (Minsup * transset.length)) {//S1 itemsets support is greater than the minimum support level
Frequencyset[k-1].add (S1);//s1 join K frequent concentration
}
}
}
Judging frequent sets as empty
public boolean is_frequent_empty (int k) {
if (Frequencyset[k-1].isempty ())
return true;
Else
return false;
}
Print candidate set frequent sets
public void Print_canditate () {

for (int i = 0; i < frequencyset[0].size (); i++) {
Iterator IX = Candidateset[i].iterator ();
Iterator iy = Frequencyset[i].iterator ();
System.out.print ("Candidate set" + (i + 1) + ":");
while (Ix.hasnext ()) {
System.out.print (String) ix.next () + "\ T");
}
System.out.print ("\ n" + "frequent set" + (i + 1) + ":");
while (Iy.hasnext ()) {
System.out.print (String) iy.next () + "\ T");
}
System.out.println ();
}
}

Find Association Item collection
public void Maxfrequent_gen () {
int i;
for (i = 1; i < Frequencyindex; i++) {
Maxfrequency.addall (Frequencyset[i]);
}
}
Print Frequent item sets
public void Print_maxfrequent () {
Iterator Iterator = Maxfrequency.iterator ();
System.out.print ("Frequent item set:");
while (Iterator.hasnext ()) {
System.out.print ((String) Iterator.next ()) + "\ T");
}
System.out.println ();
System.out.println ();
}
Association rule item Set
public void Rulegen () {
String s;
Iterator Iterator = Maxfrequency.iterator ();
while (Iterator.hasnext ()) {
s = (String) iterator.next ();
Subgen (s);
}
}

Seeking Association Rules
Shift the number of 1 to the left, dividing s into two non-overlapping parts. Generates the associated rules. Re-judge the degree of support
public void Subgen (String s) {
String x = "", y = "";
for (int i = 1; i < (1 << s.length ())-1; i++) {
for (int j = 0; J < S.length (); j + +) {
if (((1 << j) & i)! = 0) {
x + = S.charat (j);
}
}

for (int j = 0; J < S.length (); j + +) {
if (((1 << J) & (~i))! = 0) {

Y + = S.charat (j);

}
}
if (count_sup (x + y)/count_sup (x) >= minconf) {
Rulemap.put (x, y);
}
x = "";
y = "";

}
}


Print Association Rules
public void Ruleprint () {
String x, y;
float temp = 0;

Set hs = Rulemap.keyset ();//iterations can only be taken with get key,set without repeating elements collection
Iterator Iterator = Hs.iterator ();
SYSTEM.OUT.PRINTLN ("Association Rules:");
while (Iterator.hasnext ()) {
x = (String) iterator.next ();

y = (String) rulemap.get (x);

temp = (float) (Count_sup (x + y)/count_sup (x));

SYSTEM.OUT.PRINTLN (x + (X.length () < 5?) "\ T": "") + "-" + y+ "\ T" + "confidence:" + temp);

}
}
}
Learning points: 1. Treeset.add Automatic de-weight

2.treeset[] Frequencyset; TreeSet frequencyset[]; Both methods define arrays, which appear to be the same.

3.canditate_gen, is greater than the last character when the spell, ADB is ABD, so there is no adb this.

4. When the association rule is generated, the number of bits is shifted from 1 to the left. Be able to generate all the combinations. X and y do not overlap.

Apriori algorithm-java

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.