Data Mining Algorithm: The C ++ implementation of the dbscan algorithm, and the dbscan

Source: Internet
Author: User

Data Mining Algorithm: The C ++ implementation of the dbscan algorithm, and the dbscan

(The final exam is coming soon, so it is rough. Please understand it ..)

I,Concept

DBSCAN is a density-based clustering algorithm that generates clustering. The number of clusters is automatically determined by the algorithm. Points in low-density areas are ignored as noise. Therefore, DBSCAN does not generate full clustering.

II,Pseudocode

1. Mark all points as core points, boundary points, and noise points.

2. Delete the noise point.

3. assign an edge between all core points within the Eps.

4. Each group of connected core points form a cluster.

5. Assign each boundary point to a cluster associated with the core point.

III,Important Data Structure

1. Define the value of the neighborhood radius, density threshold, and number of dataset points.

# Define Eps 3 // the Eps is the neighboring radius value

# Define MinPts 3 // neighborhood density threshold

# Define N 20 // The dataset contains N objects

2 define an array to save all vertices

Double point [N] [2]; // save all data points

3. Define the position where the vector stores core points, boundary points, and noise points.

Vector <int> kernel_point; // stores the position of the core point in point [] []

Vector <int> border_point; // stores the position of the boundary point in point [] []

Vector <int> noise_point; // stores the location of the noise point in point [] [].

4 define the final cluster saved by the vector

Vector <int> cluster; // stores the final cluster. Each cluster contains the position in point [] [].

Iv. Source Code

# Include <iostream>
# Include <cstdlib>
# Include <ctime>
# Include <vector>
# Include <cmath>

Using namespace std;

# Define Eps 3 // the Eps is the neighboring radius value
# Define MinPts 3 // neighborhood density threshold
# Define N 20 // The dataset contains N objects

Double point [N] [2]; // save all data points
Vector <int> kernel_point; // stores the position of the core point in point [] []
Vector <int> border_point; // stores the position of the boundary point in point [] []
Vector <int> noise_point; // stores the location of the noise point in point [] [].
Vector <int> mid; // overlapping clusters may exist.
Vector <int> cluster; // stores the final cluster. Each cluster contains the position in point [] [].

// Initialize N coordinate points
Void init (int n ){
Srand (unsigned) time (NULL ));
For (int I = 0; I <n; I ++ ){
For (int j = 0; j <2; j ++ ){
Point [I] [j] = rand () % (N + 1 );
}
}
}

Int main (int argc, char ** argv ){

// Initialize the dataset
Int n = N;
Init (n );

// Mark all points as core points, boundary points, or noise points
// Mark the core point
For (int I = 0; I <N; I ++ ){
Int num = 0; // determines whether the value exceeds MinPts. If num> = MinPts after a loop, the core point is added.
For (int j = 0; j <N; j ++ ){
If (pow (point [I] [0]-point [j] [0], 2) + pow (point [I] [1]-point [j] [1], 2) <= pow (Eps, 2) {// itself is also
Num ++;
}
}
If (num> = MinPts ){
Kernel_point.push_back (I );
}
}

// Mark as a boundary point or a noise point
For (int I = 0; I <N; I ++ ){
// The boundary point or noise point cannot be the core point
Int flag = 0; // If flag = 0, the vertex is not the core vertex. If flag = 1, the vertex is the core vertex.
For (int j = 0; j <kernel_point.size (); j ++ ){
If (I = kernel_point [j]) {
Flag = 1;
Break;
}
}
If (flag = 0 ){
// Determine whether it is a boundary point or a noise point
Int flag2 = 0; // If flag = 0, the point is the boundary point. If flag = 1, the point is the noise point.
For (int j = 0; j <kernel_point.size (); j ++ ){
Int s = kernel_point [j]; // mark the position of the j-th core point in point [] [] for convenient calling
If (pow (point [I] [0]-point [s] [0], 2) + pow (point [I] [1]-point [s] [1], 2) <pow (Eps, 2 )){
Flag2 = 0;
Border_point.push_back (I );
Break;
}
Else {
Flag2 = 1;
Continue;
}
}
If (flag2 = 1 ){
// Add noise points
Noise_point.push_back (I );
Continue;
}
}
Else {
Continue;
}
}

// Place the core point within the Eps in a vector
For (int I = 0; I <kernel_point.size (); I ++ ){
Int x = kernel_point [I];
Vector <int> record; // create a record for each vertex and put it in mid.
Record. push_back (x );
For (int j = I + 1; j <kernel_point.size (); j ++ ){
Int y = kernel_point [j];
If (pow (point [x] [0]-point [y] [0], 2) -pow (point [x] [1]-point [y] [1], 2) <pow (Eps, 2 )){
Record. push_back (y );
}
}
Mid. push_back (record );
}

// Merge the vector
For (int I = 0; I <mid. size (); I ++) {// for each row in the mid
// Determine whether the row has been added to a previous row
If (mid [I] [0] =-1 ){
Continue;
}
// If it has not been determined
For (int j = 0; j <mid [I]. size (); j ++) {// judge each of these values
// Determine whether other rows exist for each value
For (int x = I + 1; x <mid. size (); x ++) {// for each subsequent row
If (mid [x] [0] =-1 ){
Continue;
}
For (int y = 0; y <mid [x]. size (); y ++ ){
If (mid [I] [j] = mid [x] [y]) {
// If the same element exists, put it in a vector, add precluster after the loop, and set all element values in the vector to-1.
For (int a = 0; a <mid [x]. size (); a ++ ){
Mid [I]. push_back (mid [x] [a]);
Mid [x] [a] =-1;
}
Break;
}
}
}
}

Cluster. push_back (mid [I]);

}

// Delete duplicate elements in the cluster
For (int I = 0; I <cluster. size (); I ++) {// for each row
For (int j = 0; j <cluster [I]. size (); j ++ ){
For (int n = j + 1; n <cluster [I]. size (); n ++ ){
If (cluster [I] [j] = cluster [I] [n]) {
Cluster [I]. erase (cluster [I]. begin () + n );
N --;
}
}
}
}

// At This point, each cluster is saved, and each cluster has a location corresponding to point [] []
// Assign each boundary point to a cluster associated with the core point
For (int I = 0; I <border_point.size (); I ++) {// for each boundary point
Int x = border_point [I];
For (int j = 0; j <cluster. size (); j ++) {// check each cluster, determine the core node associated with the edge node, and add the edge node to the cluster where the first core node appears.
Int flag = 0; // flag = 0 indicates that no matching item exists. flag = 1 indicates that a matching item exists and the loop is exited.
For (int k = 0; k <cluster [j]. size (); k ++ ){
Int y = cluster [j] [k];
If (pow (point [x] [0]-point [y] [0], 2) + pow (point [x] [1]-point [y] [1], 2) <pow (Eps, 2 )){
Cluster [j]. push_back (x );
Flag = 1;
Break;
}
}
If (flag = 1 ){
Break;
}
}
}


/*************************************** **************************************** ************/
Cout <"All Points:" <endl;
For (int I = 0; I <N; I ++ ){
Cout <"no." <I <"no." <"\ t ";
For (int j = 0; j <2; j ++ ){
Cout <point [I] [j] <"\ t ";
}
Cout <endl;
}
Cout <endl;

Cout <"Kernel Points:" <endl;
For (int I = 0; I <kernel_point.size (); I ++ ){
Cout <kernel_point [I] <"\ t ";
}
Cout <endl;

Cout <"Border Points:" <endl;
For (int I = 0; I <border_point.size (); I ++ ){
Cout <border_point [I] <"\ t ";
}
Cout <endl;

Cout <"Noise Points:" <endl;
For (int I = 0; I <noise_point.size (); I ++ ){
Cout <noise_point [I] <"\ t ";
}
Cout <endl;

Cout <"Cluster:" <endl;
For (int I = 0; I <cluster. size (); I ++ ){
Cout <"no." <I <"no." <"\ t ";
For (int j = 0; j <cluster [I]. size (); j ++ ){
Cout <cluster [I] [j] <"\ t ";
}
Cout <endl;
}

Return 0;
}

V,Running result

Figure 1 running result of the DBSCAN algorithm

Figure 2 demonstrate the running results of the DBSCAN algorithm using Graph

(Pink points are noise points, and blue and yellow are two clusters)

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.