From:https://www.cnblogs.com/wsine/p/5180778.html
Operating Environment
- Pyhton3
- NumPy (Scientific Computing package)
- Matplotlib (drawing required, not drawing is not necessary)
Calculation process
St=>start: Start e=>end: End op1=>operation: Read Data cond=>condition: whether there are unclassified data op2=>operation: Find an unclassified point spread op3= >operation: Output St->op1->op2->condcond (yes)->op2cond (NO)->op3->e
Input sample
/* 788points.txt */ 15.55,28.6514.9,27.5514.45,28.3514.15,28.813.75,28.0513.35,28.4513,29.1513.45,27.513.6,26.512.8,27.3512.4,27.8512.3,28.41 2.2,28.6513.4,25.112.95,25.95
788points.txt full file: Download
Code implementation
#-*-Coding:utf-8-*-__author__ = ' wsine ' import numpy as Npimport Matplotlib.pyplot as Pltimport mathimport TimeUNCLASSI fied = Falsenoise = 0def loaddataset (filename, splitchar= ' \ t '): "" Input: File name output: DataSet Description: Read from File to DataSet "" "Datase t = [] with open (fileName) as Fr:for line in Fr.readlines (): CurLine = Line.strip (). Split (Splitchar) Fltline = List (map (float, curline)) Dataset.append (fltline) return Datasetdef Dist (A, B): "" " Input: Vector A, vector b output: European distance of two vectors "" "Return Math.sqrt (Np.power (A-B, 2). SUM ()) def Eps_neighbor (A, C, EPS):" "" Input: Vector A, vector b output: Whether within the EPs range "" "Return Dist (A, B) < Epsdef region_query (data, Pointid, EPS):" "" Input: Data set, check Polling ID, radius size output: The ID of the point within the EPs range "" "npoints = data.shape[1] seeds = [] for I in range (npoints): If Eps_ne Ighbor (data[:, Pointid], data[:, I], EPS): Seeds.append (i) return seedsdef expand_cluster (data, Clusterresul T, Pointid, Clusterid, EPS, minpts): "" "Input: Data set, classification result, id to be sorted, cluster ID, RADIUS size, minimum point number output: can successfully classify" "" "" Seeds = region_query (data, Pointid, E PS) If Len (seeds) < minpts: # does not meet minpts conditions for noise points clusterresult[pointid] = NOISE return False else: Clusterresult[pointid] = Clusterid # divided into the cluster for seedid in seeds:clusterresult[seedid] = Clusterid While Len (seeds) > 0: # continuous expansion currentpoint = Seeds[0] QueryResults = region_query (data, Curr Entpoint, EPS) If Len (QueryResults) >= minpts:for i in range (Len (QueryResults)): Resultpoint = Queryresults[i] if clusterresult[resultpoint] = = Unclassified: Seeds.append (resultpoint) clusterresult[resultpoint] = Clusterid elif clust Erresult[resultpoint] = = Noise:clusterresult[resultpoint] = Clusterid seeds = seeds[1:] return Truedef DBscan (data, EPS, minpts): "" "Input: Data set, radius size, minimum point count output: Cluster ID" "" Clusterid = 1 npoints = data.shape[1] Clusterresult = [Unclassified] * npoints for Pointid in range (npoints): Point = data[:, Pointid] If clust Erresult[pointid] = = Unclassified:if Expand_cluster (data, Clusterresult, Pointid, Clusterid, EPs, minpts): Clusterid = Clusterid + 1 return clusterresult, Clusterid-1def plotfeature (data, Clusters, Clusternum): npoints = data.shape[1] matclusters = Np.mat (clusters). Transpose () FIG = plt.figure () scattercolors = [' Black ', ' Blue ', ' green ', ' yellow ', ' red ', ' purple ', ' orange ', ' brown ' ax = fig.add_subplot (111) for I in range (clusternum + 1): Colorsytle = scattercolors[i% len (scattercolors)] Subcluster = data[:, Np.nonzero (matclusters[:, 0].A = = i)] Ax.scatter (subcluster[0,:].flatten (). A[0], subcluster[1,:].flatten (). A[0], C=colorsytle, S=50) def main (): DataSet = LoaddataseT (' 788points.txt ', splitchar= ', ') DataSet = Np.mat (DataSet). Transpose () # Print (DataSet) clusters, Clusternum = d Bscan (DataSet, 2, page) print ("Cluster Numbers =", Clusternum) # Print (clusters) plotfeature (dataset, clusters, cl Usternum) If __name__ = = ' __main__ ': start = Time.clock () main () end = Time.clock () print (' Finish all in%s '% STR (end-start)) Plt.show ()
Output sample
Cluster Numbers = 7finish All in 32.712135628590794
Python implementation Dbscan