In the past few days, due to work needs, the DBSCAN clustering algorithm has been implemented in C ++. Time complexity O (n ^ 2) is mainly used to calculate the points in each vertex field. The algorithm is very simple. Now I want to share your reference and have more exchanges.
The data point type is described as follows:
Copy codeThe Code is as follows: # include <vector>
Using namespace std;
Const int DIME_NUM = 2; // The data dimension is 2, which is a global constant.
// Data point type
Class DataPoint
{
Private:
Unsigned long dpID; // data point ID
Double dimension [DIME_NUM]; // dimension Data
Long clusterId; // cluster ID
Bool isKey; // whether it is a core object
Bool visited; // whether it has been accessed
Vector <unsigned long> arrivalPoints; // list of domain data point IDS
Public:
DataPoint (); // default constructor
DataPoint (unsigned long dpID, double * dimension, bool isKey); // Constructor
Unsigned long GetDpId (); // GetDpId Method
Void SetDpId (unsigned long dpID); // SetDpId Method
Double * GetDimension (); // GetDimension Method
Void SetDimension (double * dimension); // SetDimension Method
Bool IsKey (); // GetIsKey Method
Void SetKey (bool isKey); // SetKey Method
Bool isVisited (); // GetIsVisited Method
Void SetVisited (bool visited); // SetIsVisited Method
Long GetClusterId (); // GetClusterId Method
Void SetClusterId (long classId); // SetClusterId Method
Vector <unsigned long> & GetArrivalPoints (); // GetArrivalPoints Method
};
This is the implementation:Copy codeThe Code is as follows: # include "DataPoint. h"
// Default constructor
DataPoint: DataPoint ()
{
}
// Constructor
DataPoint: DataPoint (unsigned long dpID, double * dimension, bool isKey): isKey (isKey), dpID (dpID)
{
// Transmits dimension data for each dimension
For (int I = 0; I <DIME_NUM; I ++)
{
This-> dimension [I] = dimension [I];
}
}
// Set Dimension Data
Void DataPoint: SetDimension (double * dimension)
{
For (int I = 0; I <DIME_NUM; I ++)
{
This-> dimension [I] = dimension [I];
}
}
// Obtain Dimension Data
Double * DataPoint: GetDimension ()
{
Return this-> dimension;
}
// Obtain whether the object is the core object
Bool DataPoint: IsKey ()
{
Return this-> isKey;
}
// Set the core object flag
Void DataPoint: SetKey (bool isKey)
{
This-> isKey = isKey;
}
// Obtain the DpId Method
Unsigned long DataPoint: GetDpId ()
{
Return this-> dpID;
}
// Set the DpId Method
Void DataPoint: SetDpId (unsigned long dpID)
{
This-> dpID = dpID;
}
// GetIsVisited Method
Bool DataPoint: isVisited ()
{
Return this-> visited;
}
// SetIsVisited Method
Void DataPoint: SetVisited (bool visited)
{
This-> visited = visited;
}
// GetClusterId Method
Long DataPoint: GetClusterId ()
{
Return this-> clusterId;
}
// GetClusterId Method
Void DataPoint: SetClusterId (long clusterId)
{
This-> clusterId = clusterId;
}
// GetArrivalPoints Method
Vector <unsigned long> & DataPoint: GetArrivalPoints ()
{
Return arrivalPoints;
}
DBSCAN algorithm type description:Copy codeThe Code is as follows: # include <iostream>
# Include <cmath>
Using namespace std;
// Cluster analysis Type
Class ClusterAnalysis
{
Private:
Vector <DataPoint> dadaSets; // data set
Unsigned int dimNum; // dimension
Double radius; // radius
Unsigned int dataNum; // data quantity
Unsigned int minPTs; // minimum number of neighboring data
Double GetDistance (DataPoint & dp1, DataPoint & dp2); // Distance Function
Void SetArrivalPoints (DataPoint & dp); // set the domain point list of data points
Void KeyPointCluster (unsigned long I, unsigned long clusterId); // perform cluster operations on vertices in the data base field.
Public:
ClusterAnalysis () {}// default constructor
Bool Init (char * fileName, double radius, int minPTs); // Initialization
Bool DoDBSCANRecursive (); // DBSCAN Recursive Algorithm
Bool WriteToFile (char * fileName); // write the clustering result to the file
};
Clustering implementation:Copy codeThe Code is as follows: # include "ClusterAnalysis. h"
# Include <fstream>
# Include <iosfwd>
# Include <math. h>
/*
Function: Cluster Initialization
Description: writes the data file name, radius, and minimum number of data in the field to the clustering algorithm class, reads the file, and reads the data information into the algorithm-class dataset.
Parameters:
Char * fileName; // file name
Double radius; // radius
Int minPTs; // minimum number of domain data
Return Value: true ;*/
Bool ClusterAnalysis: Init (char * fileName, double radius, int minPTs)
{
This-> radius = radius; // set the radius.
This-> minPTs = minPTs; // you can specify the minimum number of data records in a field.
This-> dimNum = DIME_NUM; // sets the data dimension.
Ifstream ifs (fileName); // open the file
If (! Ifs. is_open () // if the file has been opened, an error message is returned.
{
Cout <"Error opening file"; // output Error message
Exit (-1); // exit the program
}
Unsigned long I = 0; // number of data records
While (! Ifs. eof () // read the POI information from the file and write the information to the POI list.
{
DataPoint tempDP; // temporary data point object
Double tempDimData [DIME_NUM]; // dimension information of temporary data points
For (int j = 0; j <DIME_NUM; j ++) // read the file and read data in each dimension.
{
Ifs> tempDimData [j];
}
TempDP. SetDimension (tempDimData); // Save the dimension information to the data point object
// Char date [20] = "";
// Char time [20] = "";
/// Double type; // useless information
// Ifs> date;
// Ifs> time; // read useless information
TempDP. SetDpId (I); // set the ID of the data point object to I
TempDP. SetVisited (false); // set isVisited to false.
TempDP. SetClusterId (-1); // set the default cluster ID to-1
DadaSets. push_back (tempDP); // press the object into the data collection container
I ++; // count + 1
}
Ifs. close (); // close the file stream
DataNum = I; // set the data object set size to I
For (unsigned long I = 0; I <dataNum; I ++)
{
SetArrivalPoints (dadaSets [I]); // compute the objects in the data point Field
}
Return true; // return
}
/*
Function: writes data sets processed by clustering algorithms back to a file.
Note: Write the cluster results back to the file.
Parameters:
Char * fileName; // name of the file to be written
Return Value: true */
Bool ClusterAnalysis: WriteToFile (char * fileName)
{
Ofstream of1 (fileName); // initialize the file output stream
For (unsigned long I = 0; I <dataNum; I ++) // write a file to each processed data point
{
For (int d = 0; d <DIME_NUM; d ++) // write the dimension information to the file.
Of1 <dadaSets [I]. GetDimension () [d] <'\ T ';
Of1 <dadaSets [I]. GetClusterId () <endl; // write the cluster ID to a file
}
Of1.close (); // close the output file stream
Return true; // return
}
/*
Function: sets the domain point list of data points.
Description: sets the domain point list of data points.
Parameters:
Return Value: true ;*/
Void ClusterAnalysis: SetArrivalPoints (DataPoint & dp)
{
For (unsigned long I = 0; I <dataNum; I ++) // Execute
{
Double distance = GetDistance (dadaSets [I], dp); // obtain the distance from a specific vertex.
If (distance <= radius & I! = Dp. GetDpId () // if the distance is smaller than the radius, and the id of a specific vertex is different from the id of dp
Dp. GetArrivalPoints (). push_back (I); // list the fields under which the specified vertex id is under pressure.
}
If (dp. GetArrivalPoints (). size ()> = minPTs) // if the data volume of data points in the dp field is greater than minPTs, execute
{
Dp. SetKey (true); // set the identifier of the dp core object to true
Return; // return
}
Dp. SetKey (false); // if it is not a core object, set the identifier of the dp core object to false.
}
/*
Function: Performs clustering operations.
Note: Perform cluster operations.
Parameters:
Return Value: true ;*/
Bool ClusterAnalysis: DoDBSCANRecursive ()
{
Unsigned long clusterId = 0; // cluster id count, initialized to 0
For (unsigned long I = 0; I <dataNum; I ++) // Execute
{
DataPoint & dp = dadaSets [I]; // obtain the I data point object
If (! Dp. isVisited () & dp. IsKey () // if the object has not been accessed and is executed by the core object
{
Dp. SetClusterId (clusterId); // set the cluster ID of the object to clusterId
Dp. SetVisited (true); // sets whether the object has been accessed.
KeyPointCluster (I, clusterId); // clustering the interior point of the object field
ClusterId ++; // 1 terid auto-increment 1
}
// Cout <"isolated point \ T" <I <endl;
}
Cout <"copolymer class" <clusterId <"" <endl; // The number of clusters output after the algorithm is completed.
Return true; // return
}
/*
Function: Performs clustering operations on vertices in the data base field.
Note: recursive methods are used for deep-priority clustering of data.
Parameters:
Unsigned long dpID; // data point id
Unsigned long clusterId; // cluster id of the data point
Return Value: void ;*/
Void ClusterAnalysis: KeyPointCluster (unsigned long dpID, unsigned long clusterId)
{
DataPoint & srcDp = dadaSets [dpID]; // obtain the data point object
If (! SrcDp. IsKey () return;
Vector <unsigned long> & arrvalPoints = srcDp. GetArrivalPoints (); // obtains the list of Interior Point IDs in the object field.
For (unsigned long I = 0; I <arrvalPoints. size (); I ++)
{
DataPoint & desDp = dadaSets [arrvalPoints [I]; // obtain the point data point in the field
If (! DesDp. isVisited () // if the object has not been accessed and executed
{
// Cout <"data point \ t" <desDp. GetDpId () <"cluster ID is \ t" <clusterId <endl;
DesDp. SetClusterId (clusterId); // set the ID of the cluster to which the object belongs to as clusterId, that is, the object is sucked into the cluster
DesDp. SetVisited (true); // sets the object to be accessed.
If (desDp. IsKey () // if this object is a core object
{
KeyPointCluster (desDp. GetDpId (), clusterId); // recursively performs clustering operations on vertices in the field of point data in this field, using the depth-first Method
}
}
}
}
// Distance between two data points
/*
Function: obtains the distance between two data points.
Returns the Euclidean distance between two data points.
Parameters:
DataPoint & dp1; // data point 1
DataPoint & dp2; // data point 2
Return Value: double; // the distance between two points */
Double ClusterAnalysis: GetDistance (DataPoint & dp1, DataPoint & dp2)
{
Double distance = 0; // The initialization distance is 0.
For (int I = 0; I <DIME_NUM; I ++) // executes
{
Distance + = pow (dp1.GetDimension () [I]-dp2.GetDimension () [I], 2); // distance + square of the difference of each dimension
}
Return pow (distance, 0.5); // the start and return the distance
}
Algorithm calling is simple:Copy codeThe Code is as follows: # include "ClusterAnalysis. h"
# Include <cstdio>
Using namespace std;
Int main ()
{
ClusterAnalysis myClusterAnalysis; // cluster algorithm object Declaration
MyClusterAnalysis. init ("D :\\ 1108 \ XY.txt", 9); // algorithm initialization, with a specified radius of 15 and a minimum number of data points in the field of 3, (The data dimension specified in the program is 2)
MyClusterAnalysis. DoDBSCANRecursive (); // executes the clustering algorithm.
MyClusterAnalysis. WriteToFile ("D: \ 1108 \ XYResult.txt"); // write the execution result to the file
System ("pause"); // display the result
Return 0; // return
}