Description of the Baidu Encyclopedia
algorithm Description: (1detects objects in the database that have not been inspected p, if p is processed (classified as a cluster or marked as noise), then check its neighborhood, if the number of objects contained is not less than minpts, create a new cluster C, all of the points are added to the candidate set N;2to the candidate set n all objects that have not yet been processed Q, check its neighborhood, if it contains at least minpts objects, add these objects to n; If Q does not fall into any one cluster, q is added to C;3Repeat step 2) to continue checking for unhandled objects in N, with the current candidate set n empty;4) Repeat step 1) ~) until all objects are grouped into a cluster or marked as noise. Pseudo code: Input: Data Object Set D, Radius Eps, density threshold minpts output: Cluster Cdbscan (d, Eps, minpts) begininit C= 0;//the number of initialized clusters is 0 forEach unvisited point p in Dmark p as visited;//Mark P as visitedN =getneighbours (P, Eps);ifSizeOf (N) <minpts Thenmark p as Noise;//Mark p as noise if sizeof (N) < minpts are metElseC= Next cluster;//Create a new cluster CExpandcluster (P, N, C, Eps, minpts); endifEnd forend where the expandcluster algorithm pseudo code is as follows: Expandcluster (P, N, C, Eps, minpts) add p to cluster C;//first add the core points to C forEach point p ' in Nmark p' As visited; N ' = getneighbours (P ', Eps);//radius Check for all points in the N neighborhoodifSizeOf (N ') >=minpts Thenn= N+n ';//if it is greater than minpts, expand the number of nEndififP ' is not member by Clusteradd P ' to cluster C;//Add p ' to cluster CEndifEnd forEnd Expandcluster
Java implementation of Dbscan: turn from http://www.cnblogs.com/zhangchaoyang/articles/2182748.html
PackageOrisun;ImportJava.io.File;Importjava.util.ArrayList;ImportJava.util.Vector;ImportJava.util.Iterator; Public classDBScan {Doubleeps=3;//Area radiusintminpts=4;//density//since the distance to oneself is 0, so oneself is also own neighbor PublicVector<dataobject> getneighbors (DataObject p,arraylist<dataobject>objects) {Vector<DataObject> neighbors=NewVector<dataobject>(); Iterator<DataObject> iter=Objects.iterator (); while(Iter.hasnext ()) {DataObject Q=Iter.next (); Double[] arr1=P.getvector (); Double[] arr2=Q.getvector (); intlen=arr1.length; if(Global.caleditdist (Arr1,arr2,len) <=eps) {//use edit distance//if (global.caleuradist (arr1, arr2, Len) <=eps) {//using Euclidean distance//if (global.calcityblockdist (arr1, arr2, Len) <=eps) {//Use block distance//if (global.calsindist (arr1, arr2, Len) <=eps) {//sine using the angle of the vectorNeighbors.add (q); } } returnNeighbors; } Public intDbscan (arraylist<dataobject>objects) { intClusterid=0; BooleanAllvisited=false; while(!allvisited) {Iterator<DataObject> iter=Objects.iterator (); while(Iter.hasnext ()) {DataObject P=Iter.next (); if(p.isvisited ())Continue; Allvisited=false; P.setvisited (true);//when set to visited, it has been determined whether it is a core point or a boundary point .Vector<dataobject> neighbors=getneighbors (p,objects); if(Neighbors.size () <minpts) { if(P.getcid () <=0) P.setcid (-1);//The CID is initially 0, is not classified, is set to a positive number after classification, and 1 indicates noise. }Else{ if(P.getcid () <=0) {Clusterid++; Expandcluster (p,neighbors,clusterid,objects); }Else{ intIid=p.getcid (); Expandcluster (p,neighbors,iid,objects); }} allvisited=true; } } returnClusterid; } Private voidExpandcluster (DataObject p, vector<dataobject>Neighbors,intClusterid,arraylist<dataobject>objects) {p.setcid (Clusterid); Iterator<DataObject> iter=Neighbors.iterator (); while(Iter.hasnext ()) {DataObject Q=Iter.next (); if(!q.isvisited ()) {q.setvisited (true); Vector<DataObject> qneighbors=getneighbors (q,objects); if(Qneighbors.size () >=minpts) {Iterator<DataObject> it=Qneighbors.iterator (); while(It.hasnext ()) {DataObject no=It.next (); if(No.getcid () <=0) No.setcid (Clusterid); } } } if(Q.getcid () <=0) {//Q is not a member of any clusterq.setcid (Clusterid); } } } Public Static voidMain (string[] args) {DataSource DataSource=NewDataSource (); //eps=3,minpts=4Datasource.readmatrix (NewFile ("/home/orisun/test/dot.mat")); Datasource.readrlabel (NewFile ("/home/orisun/test/dot.rlabel")); //eps=2.5,minpts=4//Datasource.readmatrix (New File ("/home/orisun/text.normalized.mat"));//Datasource.readrlabel (New File ("/home/orisun/text.rlabel"));DBScan ds=NewDBScan (); intclunum=Ds.dbscan (datasource.objects); Datasource.printresult (Datasource.objects,clunum); }}