Just say the function of the algorithm,
The right side is the left side of the image of ES (Embedded subtree), the corresponding abc-1d-1-1e-1-1 is the left side of the pre-order-string, is also the input data format of the algorithm, in the middle we separated by a space (A B C-1 D-1-1 E-1-1) , -1 means go back.
This algorithm is mainly used to excavate frequent subtrees, that is, to find all the frequent subtrees in a tree. The principle is not to say, we can search the paper, this is a teacher Zou's essay. Here I mainly use Java to achieve, first for growthelement, Prefixtree, Prefixtreenode, PROJECTEDDB set up entity classes, and then recursive implementation of the search es.
Package Iie.ucas.treeminer.bean;public class Growthelement {private String label = "";p rivate int attached = 0;//attached means the GEs is attached to theprivate int times = 0;//number of occurrences, compared with min_sup//n-th node of the Prefix-tree.public Stri Ng Getlabel () {return label;} public int Gettimes () {return times;} public void settimes (int times) {this.times = times;} public void SetLabel (String label) {this.label = label;} public int getattached () {return attached;} public void setattached (int attached) {this.attached = attached;} Public String toString () {return "(" + label + "," + getattached () + "):" + Gettimes ();}} <pre name= "code" class= "Java" >package Iie.ucas.treeminer.bean;public class Prefixtree {private Projecteddb Prodb ;p rivate prefixtreenode[] ptnodes;private int length = 0;private int times = 1;public prefixtree (int n) {ptnodes = new Pre fixtreenode[n];//the number of prefix of the frequent pattern <a C-1 -1> n is 2for (int i = 0; i < n; i++) {ptnodes[i] = new Prefixtreenode ();} length = n;} Public PRefixtreenode getptnode (int index) {if (Index < length) return Ptnodes[index];elsereturn null;} public int Gettimes () {return times;} public void settimes (int times) {this.times = times;} public void Setptnode (int index, int numminus, String label, int pos) {if (Index < length) {Ptnodes[index].setnumminus ( Numminus);p Tnodes[index].setlabel (label);p Tnodes[index].setpos (POS);}} Public String toString () {String res = ""; for (int i = 0; i < length; i++) {if (i = = 0) Res + = Ptnodes[i].getlabel (); else {res + = "" + Ptnodes[i].getlabel ();}} return res;} Public String tolastlabelstring () {String res = "", if (length = = 1) return Ptnodes[0].getlabel (); for (int i = 0; i < length; i++) {if (i = = 0) {res + = Ptnodes[i].getlabel (); for (int j = 0; J < Ptnodes[i].getnumminus (); + j) {res + = "" + "-1";} } else if (i = = length-1) {res + = "" + Ptnodes[i].getlabel ();} else {res + = "" + Ptnodes[i].getlabel (); for (int j = 0; J < Ptnodes[i].getnumminus (); J + +) {res + = "" + "-1";}}} return res;} PublIC String toString (Boolean iscomplete) {String res = ""; for (int i = 0; i < length; i++) {if (i = = 0) {res + = Ptnodes[i].getlabel (); for (int j = 0; J < Ptnodes[i].getnumminus (); + j) {res + = "" + "-1";} } else {res + = "" + Ptnodes[i].getlabel (); for (int j = 0; J < Ptnodes[i].getnumminus (); J + +) {res + = "" + "-1";}}} return res;} public int GetLength () {return length;} public void setLength (int length) {this.length = length;} Public Projecteddb Getprodb () {return prodb;} public void Setprodb (Projecteddb prodb) {this.prodb = Prodb;} Public prefixtreenode[] Getptnode () {return ptnodes;} public void Setptnode (prefixtreenode[] ptnodes) {this.ptnodes = Ptnodes;}}
Package Iie.ucas.treeminer.bean;public class Prefixtreenode {private String label = "";p rivate int pos = 0;private int
numminus = 0;//is used to represent the number of 1 in front of the next label public int Getnumminus () {return numminus;} public void Setnumminus (int numminus) {this.numminus = Numminus;} Public String Getlabel () {return label;} public void SetLabel (String label) {this.label = label;} public int GetPos () {return pos;} public void SetPos (int pos) {this.pos = pos;}}
Package Iie.ucas.treeminer.bean;import Java.util.arraylist;public class Projecteddb {private arraylist<string> pre_order_list;//is used to store prodb,eg. {A B C-1-1-1; A b//B-1 D-1 -1-1}public projecteddb () {pre_order_list = new arraylist<string> ();} Public arraylist<string> getpre_order_list () {return pre_order_list;} public void Setpre_order_list (arraylist<string> preorderlist) {pre_order_list = Preorderlist;}}
Package Iie.ucas.treeminer.action;import Iie.ucas.treeminer.bean.growthelement;import Iie.ucas.treeminer.bean.prefixtree;import Iie.ucas.treeminer.bean.projecteddb;import Java.io.BufferedReader; Import Java.io.file;import java.io.fileinputstream;import java.io.ioexception;import java.io.InputStreamReader; Import Java.util.arraylist;import java.util.hashmap;import Java.util.hashset;import Java.util.iterator;import Java.util.map.entry;public class Prefixtreeespanaction {/** * reads data by line and stores it in ArrayList * * @param path * @return * @throws I Oexception * @throws ioexception */public arraylist<string> file2arrlist (String path) throws IOException, IOException {arraylist<string> arrlist = new arraylist<string> (); File datafile = new file (path); String line = ""; BufferedReader input = new BufferedReader (new InputStreamReader (New FileInputStream (datafile), "Utf-8")); Input.readline ()) = null) {Arrlist.add (line);} Input.close (); return arrlist;} /** * Count the number of occurrences of each label in the arrlist * * @param arrlist * @return */public hashmap<string, integer> Getalllabel (arraylist<string> arrlist) {HashMap <string, integer> hashmap = new hashmap<string, integer> (); for (int i = 0; i < arrlist.size (); i++) {String [] labels = arrlist.get (i). Split ("");//returns all labelfor of line I (int j = 0; J < Labels.length; J + +) {if (!labels[j].equals ("1")) {//Filter all Labelif (Hashmap.containskey (Labels[j])) {//label already exists, Add 1hashmap.put directly to the corresponding value (Labels[j], Hashmap.get (labels[j]) + 1);} else {hashmap.put (labels[j], 1);}}}} return HashMap;} /** * According to the PREFIXTREE structure corresponding PRODB * * @param prodb * @param pretree * @return */public projecteddb getprojecteddb (projecteddb Prodb, Prefixtree pretree) {//suppose//Pretree:<a (1)// -1>PROJECTEDDB Resprodb = new Projecteddb (); arraylist<string> pretlist = new arraylist<string> (); String label = Pretree.getptnode () [Pretree.getlength ()-1].getlabel (); System.out.println ("<" + pretree.tostring (True) + ">-PROJECTEDDB"); ArrayList<String> prodblist = Prodb.getpre_order_list (); for (int i = 0; i < prodblist.size (); i++) {string[] labels = prod Blist.get (i). Split (""); int length = labels.length;for (int k = Pretree.getlength ()-1; K < length; k++) {String prodbelement = label;//pretree.tolastlabelstring ();///?????????????????????????????????????? System.out.println ("lastlabelstring:" + prodbelement);//String prodbelement = label;//represents a row of such as. {A C E-1 -1-1}//System.out.println ("prodbelement:" + prodbelement), if (Label.equals (Labels[k])) {//Find all Occurrence of label//, and//construct prodbint Contlabel = 1;//pretree.getlength ();//???????? int minus = 0;//is used to mark the number of occurrences after 1 for (int j = k+1; j < length; J + +) {//start at the next position in the same position as the root label (!labels[j].equals ("1")) {//not-1contlabel++;p rodbelement + = "" + Labels[j];} else {minus++;p rodbelement + = "" + labels[j];if (minus >= contlabel) {////k = j + 1;break;}}} BreakSystem.out.println ("prodbelement:" + prodbelement);p Retlist.add (prodbelement);/Prodb a piece of content, namely a line}//end{if}}}resprodb.setpre_order_list (pretlist); return resprodb;} /** * return pretree corresponding growthelements * * @param prodb * @param min_sup * @param pretree * @return */public hashset<growth Element> getges (projecteddb prodb, Prefixtree pretree) {//PROJECTEDDB PDB = THIS.GETPROJECTEDDB (Prodb, Pretree); arraylist<string> Arrprodb = Prodb.getpre_order_list ();//Pdb.getpre_order_list (); hashset<growthelement> geset = new hashset<growthelement> (); int prelength = Pretree.getlength (); for (int i = 0; I < arrprodb.size (); i++) {String labels[] = Arrprodb.get (i). Split (""); for (int j = prelength; J < Labels.length; J + +) {//if (prelength = = 1) {////} else {//Pretree.getlength ()!=1int Contlabel = 0;int minus = 0;if (!labels[j].equals ( "-1")) {if (Contlabel >= minus) {//GE attached to//pretree.getlength () growthelement TMPGE = Null;boolean flag = False ;//used to indicate whether the new GE has previously found iterator<growthelement> it = Geset.iterator (); while (It.hasnext ()) {TMPGE= It.next (), if (Tmpge.getlabel (). Equals (Labels[j)) && tmpge.getattached () = = prelength) {flag = True;break;}} BREAKIF (flag) {//The GE has previously found Tmpge.settimes (Tmpge.gettimes () + 1); Geset.add (TMPGE);} else {growthelement ge = new Growt Helement (); ge.setattached (prelength); Ge.setlabel (Labels[j]); Ge.settimes (1); Geset.add (GE);}} else {////////////////////////////////////////////////////////growthelement TMPGE = Null;boolean flag = false;// Used to indicate whether the new GE has previously found iterator<growthelement> it = Geset.iterator (); while (It.hasnext ()) {Tmpge = It.next (); if ( Tmpge.getlabel (). Equals (Labels[j]) && tmpge.getattached () = = prelength) {flag = True;break;}} BREAKIF (flag) {//The GE has previously found Tmpge.settimes (Tmpge.gettimes () + 1); Geset.add (TMPGE);} else {growthelement ge = new Growt Helement (); ge.setattached (Prelength + (Contlabel-minus)); Ge.setlabel (Labels[j]); Ge.settimes (1); Geset.add (GE);}} contlabel++;} else {//Labels[j].equals ("-1") minus++;}}} return geset;} Public Prefixtree Extendsbyge (growthelement ge, prefixtree pretree,int min_sup) {prefixtree Respret = null;int length = Pretree.getlength (); if (Ge.gettimes () = Min_sup) {//Determine if the match is greater than min_supif (1 = = length) {Respret = new Prefixtree (length + 1); Respret.setptnode (0, 0, PRETREE.GETP Tnode (0). Getlabel (), 1); Respret.setptnode (1, 2, Ge.getlabel (), 2);} else {Respret = new Prefixtree (length + 1), int attached = ge.getattached (), if (attached = = length) {for (int i = 0; i < Length i++) {//Copy the previous information over eg.{ A b-1// -1}ge is (c,2) then {A B c-1// -1-1}respret.setptnode (i, 0,pretree.getptnode (i). Getlabel (), i + 1);} Respret.setptnode (length, length + 1, Ge.getlabel (), length + 1);//Set new information for GE added} else {//******************************* 88//for (int i = 0; i < length-1; i++) {//Copy the previous information over eg.{ A b// -1-1}ge is (c,2) then {a//B C-1 -1-1}respret.setptnode (I,pretree.getptnode (i). Getnumminus (), Pretree.getptnode (i). Getlabel (), Pretree.getptnode (i). GetPos ());} int Index,int Numminus, String label,int pOsrespret.setptnode (Length-1, (length-attached), Pretree.getptnode (length-1). Getlabel (), Pretree.getptnode (length -1). GetPos (), Respret.setptnode (length, attached + 1, Ge.getlabel (), length + 1);//Set new GE Information}}} else {//end{if} to determine conformance greater than Min_supreturn null;} System.err.println ("prefix after adding GE:" + respret.tostring (True)); return Respret;} public void Fre (Prefixtree pretree, projecteddb prodb, int min_sup) {hashset<growthelement> geset = This.getges (pro DB, Pretree);iterator<growthelement> it = Geset.iterator (); while (It.hasnext ()) {growthelement GE = it.next (); System.out.println ("GE:" + ge.tostring ()); Fre (This.extendsbyge (GE, Pretree, min_sup), This.getprojecteddb (Prodb, This.extendsbyge (GE, Pretree, min_sup)), Min_ SUP);} return;} public static void Main (string[] args) throws IOException {prefixtreeespanaction PTEs = new Prefixtreeespanaction (); arraylist<string> arrlist = ptes.file2arrlist ("C:\\users\\fernando\\desktop\\treedata\\cslog.data");// Raw Data hashmap<string, Integer> All_label = Ptes.getalllabel (arrlist); SYSTEM.OUT.PRINTLN ("All label Quantity:" + all_label.size ()); PROJECTEDDB basedb = new Projecteddb () basedb.setpre_order_list (arrlist);//First original prodbprefixtree pretrees[] = new Prefixtree[all_label.size ()];//{a-1},{b// -1}....for (int i = 0; i < pretrees.length; i++) {pretrees[i] = new PrefixT REE (1);//Generate Prefixtree}iterator iter with length 1 = All_label.entryset (). Iterator (); int index = 0;while (Iter.hasnext ()) {Entry Entry = (entry) iter.next ();p retrees[index].setlength (1);p retrees[index].getptnode (0). SetLabel ((String) Entry.getkey ());p retrees[index].getptnode (0). SetPos (1);p retrees[index].getptnode (0). Setnumminus (1); index++;} int min_sup = 1;for (int i = 0; i < pretrees.length; i++) {Projecteddb ptdb = ptes.getprojecteddb (Basedb, pretrees[i]); PTEs. Fre (Pretrees[i],ptdb, min_sup);//Hashset<growthelement> Geset = Ptes.getges (Ptdb, pretrees[i]);//Iterator< Growthelement> it = Geset.iterator ();//while (It.hasnext ()) {//Growthelement GE = It.next ();//System.out.println ("GE:" + ge.tostring ());//Prefixtree Induce_pre = Ptes.extendsbyge (GE, pretrees[i],//m IN_SUP);//Projecteddb Inducedb = Ptes.getprojecteddb (Ptdb, induce_pre);//Hashset<growthelement> GeSet1 = Ptes.getges (inducedb,//induce_pre);//Iterator<growthelement> it1 = Geset1.iterator ();//while (It1.hasNext ()) {//Growthelement Ge1 = It1.next ();////System.out.println ("Ge1:" + ge.tostring ());//Prefixtree Induce_pre1 = Ptes.exten Dsbyge (Ge1, induce_pre,//min_sup);//}//System.out.println ("======================");//}}//projecteddb ptdb = new PROJECTEDDB ();//for (int i = 0; i < pretrees.length; i++) {////{a-1}-prodb//Ptdb = Ptes.getprojecteddb (Basedb, Pret Rees[i]/////Hashset<growthelement> Geset = Ptes.getges (Ptdb, pretrees[i]);//iterator< Growthelement> it = Geset.iterator (),//while (It.hasnext ()) {//Growthelement GE = it.next ();//System.out.println ("G E: "+ ge.tostring ());//Prefixtree Induce_pre = Ptes.exTendsbyge (GE, pretrees[i], min_sup);//Projecteddb Inducedb = Ptes.getprojecteddb (Ptdb, induce_pre);//hashset< growthelement> Inducegeset = ptes.getges (inducedb,//pretrees[i]);//}////}//=================================== ================================================================//prefixtree pretree = new PrefixTree (1);// Pretree.getptnode () [0].setlabel ("the"]//Pretree.getptnode () [0].setpos (1);///Pretree.getptnode () [0]. Setcontainminus (True);//<72 (1)/// -1> the label of the first position is 72////***********//////PROJECTEDDB PRODB1 = PTES.GETPROJECTEDDB (Prodb, pretree);///***********////hashset<growthelement> HashSet = ptes.getges (Prodb, Pretree);//Iterator<growthelement> it = Hashset.iterator ();//System.out.println ("GEs:");/while (It.hasnext ( ) {//Growthelement GE = it.next ();//System.out.println ("GE:" + ge.tostring ());//Ptes.extendsbyge (GE, Pretree, 2);/} arraylist<string> treedata = new arraylist<string> ();//File datafile = new file(//"c:\\users\\fernando\\desktop\\treedata\\test.txt");//BufferedReader input = new BufferedReader (new InputStreamReader (//New FileInputStream (datafile), "Utf-8"));////String line = "";////while (line = Input.readline ()) ! = NULL) {//Treedata.add (line);//}////Treedata = Buffer.tostring ();//Input.close ();////System.out.println ("lines:" + treedata.size ());////String data[] = treedata.get (0). Split ("");//System.out.println (Data[1].equals ("1"));// System.out.println (Data[0] + ":" + data[1]);//hashset<growthelement> Geset = new Hashset<growthelement> () ;//Growthelement GE = new Growthelement ();//Geset.add (GE);//ge.setattached (one);//Geset.add (GE);// System.out.println (Geset.size ());}}
Prefixtreeespan frequent subtree pattern mining A pattern growth algorithm realizes mining embedded subtrees.