Java implementation of the association algorithm Apriori, the database uses Redis

Source: Internet
Author: User
Tags addall new set

This algorithm is to realize the lexical association analysis of some specialized articles, and it is not the best application of Apriori, it is indeed a practice of frequency analysis of CI.
Package Com.my.analysis;import Java.util.arraylist;import Java.util.hashset;import java.util.set;import  Redis.clients.jedis.jedis;public class Apriorimyimpl {private double minsup = 0.3;//Minimum support degree private double minconf = 0.99;// Minimum confidence private int limitword = 100;//participate in the statistics of private arraylist<set<string>> ariclewl;//private arraylist< Set<set<string>>> candidatelist;//candidate Listprivate arraylist<set<set<string>>> frequencylist;//Frequent items listpublic set<set<string>> allsub = new hashset<set<string>> ();// The total number of private long filecount;//files for all subsets of the maximum frequent item private int step = 1;//represents the first step to the private Jedis Jedis = new Jedis ("localhost", 6379 );p ublic Apriorimyimpl () {candidatelist = new arraylist<set<set<string>>> (); frequencyList = new Arraylist<set<set<string>>> (); ariclewl = new arraylist<set<string>> (); filecount = Jedis.llen (ansjtxtfileparserforredis.filelist); for (int i = 0;i < filecount;i++) {AriclewL.add (Jedis.smembers (Ansjtxtfileparserforredis.fileprefix+i));}} /** * Initializes the first candidate set *///public void Item1_init () {//set<set<string>> candidate1 = new hashset<set<string >> ();//set<string> tset = Jedis.zrevrange (Ansjtxtfileparserforredis.tablename, 0,limitword-1);//for ( String s:tset) {//hashset<string> one = new hashset<string> ();//one.add (s);//candidate1.add (one);//}// Candidatelist.add (candidate1);//system.out.println ("Candidate Set-" + (STEP) + ":");//printsetsetstring (candidate1);/} public void Item1_init () {string[] keys ={"Sleep", "Time", "Baby", "cure", "disease", "body", "breath", "quality", "child", "Fall Asleep", "human body", "spirit", "Habit", "mental", "barrier "," pillow "," health "," Attention "," Doctor "," female "," Symptoms "," food "," diet "," Sport "," TCM "," mattress "," Child "," Baby "," reading "," Brain "," massage "," effect "," Epilepsy "," Environment "," nutrition "," pressure "," blood "," smart "," rest "," Mom "," Man "," physiology "," medicine "," society "," drug "," muscle "," male "," technology "," recovery "," weight loss "," relaxation "," nerves "," harm "," emotion "," pregnancy "," nap "," secretion "," descent "," feedback "," Music "," stimulation "," diabetes "," posture "," Old Man "," Stay Up "," Digest "," Memory "," eliminate "," Get Up "," customer "," Food "," cold "," hypertension "," recruit "," old "," pregnant "," watch "," solve "," phenomenon "," over ", "Cervical", "whole Body", "Air conditioning", "side", "position", "body temperature", "Gold pen", "reach", "hitSnoring "," TV "," Energy "," hypnosis "," substance "," condition "," energy "," author "," Device "," price "," patient "," protection "," data "," experience "," body "," fit "," gynecological "," Exercise "," newborn "," cough "," depression "," blood vessel ", "Inhibition", "infant", "Insomnia", "heart disease", "diet", "blood pressure", "tumor", "induced", "attention", "cardiovascular", "life", "urine", "immunity", "menstruation", "Evaluation", "Memory", "intelligence"}; set<set<string>> candidate1 = new hashset<set<string>> (); for (String S:keys) {hashset< string> one = new hashset<string> (); One.add (s); Candidate1.add (one);} Candidatelist.add (CANDIDATE1); SYSTEM.OUT.PRINTLN ("Candidate Set-" + (STEP) + ":");p rintsetsetstring (candidate1);} /** * Candidate sets are converted to frequent itemsets */public boolean candidatetofrequency () {set<set<string>> canditems = Candidatelist.get ( STEP-1); set<set<string>> freqitems = new hashset<set<string>> (); for (set<string> item: Canditems) {if ((Count_sup (item)/filecount) >=minsup) {Freqitems.add (item);}} if (Freqitems.size () ==0) {//cannot produce a frequent itemsets that meets the criteria return false;} Frequencylist.add (Freqitems); System.out.println ("Frequent itemsets-" + (STEP) + ":");p rintsetsetstring (freqitems);//output frequent itemsets step++;return true;} /** * Frequent itemsets form new set of candidates */PUBLIC boolean frequencytocandidate () {set<set<string>> frequencyitems = Frequencylist.get (step-2); set<string> maxsub = Maxsubset (Frequencyitems); set<set<string>> candidateitems = new hashset<set<string>> (); for (set<string> Freqs: Frequencyitems) {int len = freqs.size (); for (String sub:maxsub) {set<string> pitem = new hashset<string> (); Pitem.addall (FREQS);p Item.add (sub), if (pitem.size () = = (len+1) &&subisfreq (Frequencyitems,pitem)) { Candidateitems.add (Pitem);}}} if (Candidateitems.size () ==0) {//Does not form a new candidate set return false;} Candidatelist.add (Candidateitems); SYSTEM.OUT.PRINTLN ("Candidate Set-" + (STEP) + ":");p rintsetsetstring (candidateitems);//output frequent itemsets return true;} /** * Parentset subset in frequent collection Freq * @param freq * @param parentset * @return true Yes; False No */public Boolean subisfreq (set&lt ; Set<string>> freq,set<string> Parentset) {for (String s:parentset) {set<string> item = new HashSet <String> (); Item.addall (Parentset); Item.remove (s); if (!freq.contains (item)) {return false;}} return true;} /** * Gets the maximum itemsets for frequent itemsets * @param freqintems */public set<string> maxsubset (set<set<string>> freqIntems) {Set <String> maxsub = new hashset<string> (); for (set<string> Ss:freqintems) {for (String s:ss) { Maxsub.add (s);}} return maxsub;} /** * Calculate Support degree * @param x * @return */public double count_sup (set<string> x) {int temp = 0;for (set<string> ss:aric LEWL) {if (Ss.containsall (x)) {temp++;}} return temp;} /** * Calculates the confidence of the set X=>y * @param x * @param y * @return */public double cout_cand (set<string> x,set<string> y) {S Et<string> z = new hashset<string> (); Z.addall (x); Z.addall (y); return count_sup (z)/count_sup (x);} /** * Get all subsets * @param parent */public void Gensub (set<string> parent) {if (Parent.size () >0) {Allsub.add (parent);} set<string> ss = new hashset<string> (); Ss.addall (parent); for (String s:ss) {set<string> SS2 = new Hashset<string> (); Ss2.addall (ss); Ss2.remove (s); gensub(SS2);}} /** * Output * @param sss */public void printsetsetstring (set<set<string>> sss) {for (set<string> ss:sss) { SYSTEM.OUT.PRINTLN (ss);}} /** * Correlation Degree analysis * @param subset */public void Relerulecount (set<set<string>> subset) {for (set<string> x: subset) {for (set<string> y:subset) {set<string> xy = new hashset<string> (); Xy.addall (x); Xy.addAll (y if (xy.size () = = (X.size () +y.size ())) {Double Sup_count = Cout_cand (x, y); if (sup_count>minconf) { System.out.println (x+ "==>>" +y+ "= =" +sup_count);}}}} public void Jisuan () {item1_init ();//initialization of the first candidate set while (true) {if (!candidatetofrequency ()) break;if (! Frequencytocandidate ()) break; set<set<string>> maxfreqs = Frequencylist.get (Frequencylist.size ()-1); for (set<string> Maxfreq: MAXFREQS) {allsub = new hashset<set<string>> (); gensub (maxfreq); Relerulecount (allsub);}} public static void Main (string[] args) {//Initialize candidate, take the first few wordnew Apriorimyimpl (). Jisuan ();}

Java implementation of the association algorithm Apriori, the database uses Redis

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.