The data set looks like this:
1,1,12,1.5,13,0.5,13,5,-17,0.75,-17,4,28,5,28,5.5,2
The dataset has three properties, namely X and y in two-dimensional coordinates, the third attribute is the owning class, and 1 is the outlier, and the coordinate system looks like this:
The source code is as follows:
PackageNeugle.dbscan;ImportJava.io.BufferedReader;ImportJava.io.FileReader;Importjava.util.ArrayList;Importjava.util.List;ImportJava.util.Random; Public classDBScan {PrivateList<point> pointlist =NewArraylist<dbscan.point> ();//Sample Data read- in PrivateList<list<point>> clusterlist =NewArraylist<list<point>> ();//Final Classification Results PrivateList<point> noiselist =NewArraylist<dbscan.point> ();//Noise Data collection PrivateList<point> npointlist =NewArraylist<dbscan.point> ();//Candidate Data Collection PrivateList<integer> unvisitedlist =NewArraylist<integer> ();//unvisited Collection Private DoubleEPs//Neighborhood Radius Private intminpts;//density classPoint { Public Doublex; Public Doubley; PublicString Point_type; Public Booleanisvisited =false; } PublicDBScan (DoubleEpsintminpts) { This. EPS =EPS; This. minpts =minpts; } //reading Data PublicList<point>ReadFile (String filePath) {FileReader fr=NULL; BufferedReader BR=NULL; Try{FR=NewFileReader (FilePath); BR=NewBufferedReader (FR); String Line=NULL; while(line = Br.readline ())! =NULL) { point point=NewPoint (); String[] AGRs= Line.split (","); Point.x= Double.parsedouble (agrs[0]); Point.y= Double.parsedouble (agrs[1]); Point.point_type= Agrs[2]; This. Pointlist.add (point); } } Catch(Exception e) {e.printstacktrace (); } finally { Try{br.close (); } Catch(Exception e) {e.printstacktrace (); } } returnpointlist; } //Dbscan Main Method Public voidDbscanfun (String filePath) { This. ReadFile (FilePath); //This . Norm (); while( This. Isover ()) {point P= This. Pointlist.get ( This. Randomnum ());//randomly find the unvisited nodep.isvisited =true;//Mark P for visitedList<point> neighborlist = This. Getneighbors (P);//Find a neighbor node that meets the minimum density if(Neighborlist.size () < This. minpts) {//Adding noise data This. Noiselist.add (P);//put p into the noise collection}Else{List<Point> CList =NewArraylist<dbscan.point> ();//Create a new cluster CClist.add (P);//put P in cluster C This. npointlist = neighborlist;//A collection of neighborhood objects that make N p for(inti = 0; I < This. Npointlist.size (); i++) { if( This. Npointlist.get (i). isvisited = =false) {//Find the unvisited node in P ' This. Npointlist.get (i). isvisited =true;//Mark P ' for visitedList<point> neighborlists = This . Getneighbors ( This. Npointlist.get (i));//calculates P ' satisfies the neighbor's node collection if(Neighborlists.size () >= This. minpts) { for(intj = 0; J < Neighborlists.size (); J + +) { This. Npointlist.add (Neighborlists.get (j));//Add the Neighborhood node of P ' to n}} clist.add ( This. Npointlist.get (i));//add P ' to cluster C } } This. Clusterlist.add (CList); } } } //randomly selected in an inaccessible collection Private intRandomnum () {intnum = This. Unvisitedlist.size (); Random Rand=NewRandom (); intRandnum =rand.nextint (num); return This. Unvisitedlist.get (Randnum); } //get a neighborhood collection PrivateList<point>getneighbors (point P) {List<Point> list =NewArraylist<dbscan.point>(); for(inti = 0; I < This. Pointlist.size (); i++) { DoubleValue = This. Distancecalculate ( This. Pointlist.get (i), p); if(Value! = 0 && Value <= This. eps) {List.add ( This. Pointlist.get (i)); } } returnlist; } //Euclidean distance formula Private Doubledistancecalculate (Point iris1, point Iris2) {Doublesum = math.sqrt (Math.pow (iris1.x-iris2.x), 2) + Math.pow ((IRIS1.Y-IRIS2.Y), 2)); returnsum; } //determine if the data has been accessed Private BooleanIsover () { This. unvisitedlist =NewArraylist<integer>(); for(inti = 0; I < This. Pointlist.size (); i++) { if( This. Pointlist.get (i). isvisited = =false) {unvisitedlist.add (i); } } if( This. Unvisitedlist.size () > 0) { return true; } return false; } Public voidPrint () {System.out.println ("Poly to" + This. Clusterlist.size () + "class"); for(inti = 0; I < This. Clusterlist.size (); i++) {List<Point> C = This. Clusterlist.get (i); System.out.println ("------------"); for(intj = 0; J < C.size (); J + +) {System.out.println (C.get (j). x+ "" + C.get (j). Y + "" +C.get (j). Point_type); } System.out.println (C.size ()); System.out.println ("------------"); } System.out.println ("Noise Point has" + This. Noiselist.size () + "X"); System.out.println ("------------"); for(inti = 0; I < This. Noiselist.size (); i++) {System.out.println ( This. Noiselist.get (i). x + "" + This. Noiselist.get (i). Y + "" + This. Noiselist.get (i). Point_type); } System.out.println ("------------"); } Public Static voidMain (string[] args) {DBScan C=NewDBScan (2.5, 2); C.dbscanfun ("D:\\data\\dbscan\\test.data"); C.print (); }}
The experimental results are as follows:
Poly 2 class------------8.0 5.5 27.0 4.0 28.0 5.0------------------------3.0 0.5 11.0 1.0 12.0 1.5------------Noise point has 2-------- ----3.0 5.0-17.0 0.75-1------------
Data Mining Clustering Algorithm--dbscan