Data Mining algorithm: C + + implementation of Dbscan algorithm

Source: Internet
Author: User
Tags pow

(The final exam is coming soon, so it is rough, please understand the reader.) )

First, Concept

Dbscan is a density-based clustering algorithm that generates clustering, and the number of clusters is determined automatically by the algorithm. Points in low-density areas are considered noise and are ignored, so dbscan does not produce a complete cluster.

Second, Pseudo Code

1 mark all points as core points, boundary points, and noise points.

2 Remove the noise point.

3 assigns an edge to the distance between all core points within the EPS.

4 each group of connected core points forms a cluster.

5 assigns each boundary point to a cluster of core points associated with it.

Third, Important Data Structures

1 Defining neighborhood radius values, density thresholds, data set points

#define EPS 3//EPS as neighborhood radius value

#define MINPTS 3//Neighborhood density threshold

#define N 20//DataSet contains N objects

2 Define array Save all points

Double point[n][2]; Save all data points

3 Defining the position of the vector to hold the core points, boundary points, noise points

Vector<int> Kernel_point; Save the location of the core point in point[][]

Vector<int> Border_point; Save the location of the boundary point in point[][]

Vector<int> Noise_point; Position of the Save noise Point in point[][]

4 Defining vectors to preserve the resulting clusters

vector<vector<int> > cluster; Save the resulting cluster, where each cluster contains points in point[][]

Iv. Source Code

#include <iostream>
#include <cstdlib>
#include <ctime>
#include <vector>
#include <cmath>

using namespace Std;

#define EPS 3//eps as neighborhood radius value
#define MINPTS 3//Neighborhood density threshold
#define N 20//datasets contain n objects

Double point[n][2];//to save all data points
Vector<int> kernel_point;//Save the location of the core point in point[][]
Vector<int> border_point;//The location of the save boundary point in point[][]
Vector<int> noise_point;//The position of the noise point in point[][]
vector<vector<int> > mid;//There may be overlapping clusters
vector<vector<int> > cluster;//Save the resulting clusters, where each cluster contains points in point[][]

Initialize n coordinate points
void init (int n) {
Srand ((unsigned) time (NULL));
for (int i=0; i<n; i++) {
for (int j=0; j<2; J + +) {
POINT[I][J] = rand ()% (n+1);
}
}
}

int main (int argc, char** argv) {

Initializing datasets
int n = n;
Init (n);

Mark all points as core points, boundary points, or noise points
Mark Core points
for (int i=0; i<n; i++) {
int num = 0;//Determine if the minpts is exceeded, and if num>=minpts after a loop, add the core point
for (int j=0; j<n; J + +) {
if (POW (point[i][0]-point[j][0], 2) +pow (Point[i][1]-point[j][1], 2) <=pow (Eps, 2)) {//itself is also counted as a
num++;
}
}
if (num>=minpts) {
Kernel_point.push_back (i);
}
}

Mark as boundary point or noise point
for (int i=0; i<n; i++) {
A boundary point or noise point cannot be a core point
int flag = 0;//If flag=0, then the point is not the core point, if flag=1, then the point is the core point
for (int j=0; j<kernel_point.size (); j + +) {
if (i = = Kernel_point[j]) {
flag = 1;
Break
}
}
if (flag = = 0) {
To determine whether it is a boundary point or a noise point
int flag2 = 0;//if flag=0, then the point is the boundary point, if flag=1, then the point noise Point
for (int j=0; j<kernel_point.size (); j + +) {
int s = kernel_point[j];//marks the position of the J Core point in point[][] for easy invocation
if (POW (point[i][0]-point[s][0], 2) +pow (Point[i][1]-point[s][1], 2) <pow (Eps, 2)) {
Flag2 = 0;
Border_point.push_back (i);
Break
}
else {
Flag2 = 1;
Continue
}
}
if (Flag2 = = 1) {
adding noise points
Noise_point.push_back (i);
Continue
}
}
else {
Continue
}
}

Place the distance within the EPS core point in a vector
for (int i=0; i<kernel_point.size (); i++) {
int x = Kernel_point[i];
Vector<int> record;//Create a record for each point and put it in mid
Record.push_back (x);
for (int j=i+1; j<kernel_point.size (); j + +) {
int y = kernel_point[j];
if (POW (point[x][0]-point[y][0], 2)-pow (Point[x][1]-point[y][1], 2) <pow (Eps, 2)) {
Record.push_back (y);
}
}
Mid.push_back (record);
}

Merging vectors
for (int i=0; i<mid.size (); i++) {//For each row in mid
Determines whether the row has been added to a previous row
if (mid[i][0] = =-1) {
Continue
}
If you haven't been judged.
for (int j=0; j<mid[i].size (); j + +) {//Determine each of these values
Determine if the other rows exist for each value
for (int x=i+1; x<mid.size (); x + +) {//For each subsequent line
if (mid[x][0] = =-1) {
Continue
}
for (int y=0; y<mid[x].size (); y++) {
if (mid[i][j] = = Mid[x][y]) {
If you have the same element, you should put it in a vector and add precluster after the loop, and set the value of all the elements in the vector to 1.
for (int a=0; a<mid[x].size (); a++) {
Mid[i].push_back (Mid[x][a]);
Mid[x][a] =-1;
}
Break
}
}
}
}

Cluster.push_back (Mid[i]);

}

Delete a repeating element in a cluster
for (int i=0; i<cluster.size (); i++) {//For each row
for (int j=0; j<cluster[i].size (); j + +) {
for (int n=j+1; n<cluster[i].size (); n++) {
if (cluster[i][j] = = Cluster[i][n]) {
Cluster[i].erase (Cluster[i].begin () +n);
n--;
}
}
}
}

At this point, each cluster is saved in cluster, and a bit in each cluster corresponds to the position in point[][]
Assigns each boundary point to a cluster of core points associated with it
for (int i=0; i<border_point.size (); i++) {//For each of the boundary points
int x = Border_point[i];
for (int j=0; j<cluster.size (); j + +) {//check each cluster to determine if the boundary point is associated with the core point in the cluster and add the boundary point to the cluster that appears at the first core point
int flag = 0;//Flag=0 indicates that there are no matching items, flag=1 indicates that they have been matched, exits the loop
for (int k=0; k<cluster[j].size (); k++) {
int y = cluster[j][k];
if (POW (point[x][0]-point[y][0], 2) +pow (Point[x][1]-point[y][1], 2) <pow (Eps, 2)) {
Cluster[j].push_back (x);
flag = 1;
Break
}
}
if (flag = = 1) {
Break
}
}
}


/*******************************************************************************************/
cout<< "All Points:" <<endl;
for (int i=0; i<n; i++) {
cout<< "First" <<i<< "<<" "T";
for (int j=0; j<2; J + +) {
cout<<point[i][j]<< "\ t";
}
cout<<endl;
}
cout<<endl;

cout<< "Kernel Points:" <<endl;
for (int i=0; i<kernel_point.size (); i++) {
cout<<kernel_point[i]<< "\ t";
}
cout<<endl<<endl;

cout<< "Border Points:" <<endl;
for (int i=0; i<border_point.size (); i++) {
cout<<border_point[i]<< "\ t";
}
cout<<endl<<endl;

cout<< "Noise Points:" <<endl;
for (int i=0; i<noise_point.size (); i++) {
cout<<noise_point[i]<< "\ t";
}
cout<<endl<<endl;

cout<< "Cluster:" <<endl;
for (int i=0; i<cluster.size (); i++) {
cout<< "First" <<i<< "<<" "T";
for (int j=0; j<cluster[i].size (); j + +) {
cout<<cluster[i][j]<< "\ t";
}
cout<<endl;
}

return 0;
}

Five, Run Results

Figure 1 Running results of the Dbscan algorithm

Figure 2 Displaying the Dbscan algorithm running results using graph graphing

(where pink dots are noise points, blue and yellow are two clusters)

Data Mining algorithm: C + + implementation of Dbscan algorithm

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.