Clustering Algorithms are an important branch of ML and generally use unsupervised learning for learning. clustering algorithms include K-means, K-medoids, GMM, spectral clustering, and ncut algorithms; this article will implement the K-eans algorithm.
K-means algorithm:
1. Divide the data into k non-empty subsets.
2. Calculate the center of each class (the K-means <centroid> center is the average of all vertices) and mark it as the seed point
3. Cluster each object to the nearest seed point
4. Return 2. Stop when the clustering result does not change.
Complexity:
O (kndt)
-Calculate the distance between two points: d
-Specified class: O (kN), k is the number of classes
-Maximum number of iterations: T
Kmeans. h
1 #include "stdafx.h" 2 #include <iostream> 3 using namespace std; 4 template <typename Type> 5 class KMeans{ 6 public: 7 KMeans(const size_t nd =0,const int nk=0,const float precision = 0.0001):m_ndataNumbers(nd), 8 m_nkNumbers(nk),m_iterations(0),m_datas(NULL),m_center(NULL),m_precision(precision){ 9 m_datas = new Type[m_ndataNumbers]; 10 m_center = new Type[m_nkNumbers]; 11 } 12 KMeans(Type[],Type[],const size_t ,const int,const float); 13 ~KMeans(){ 14 delete[]m_datas; 15 delete[]m_center; 16 } 17 18 Type* getDatas()const; // get the datas 19 Type* getCenter()const; // get the centers 20 int iterationTimes()const; // iteration times 21 void kmeans(); // carry out k-means 22 void printCenter(); // cout center 23 private: 24 float dataDivide(Type* , size_t*); // data divide 25 void changeCenters(Type* , size_t*); // change centers 26 private: 27 Type *m_datas; 28 Type *m_center; 29 const size_t m_ndataNumbers; //data numbers 30 const int m_nkNumbers; // center numbers 31 const float m_precision; // end iteration precision 32 int m_iterations; // carry out times 33 }; 34 // initialize the datas and the center 35 template <typename Type> 36 KMeans<Type>::KMeans( Type datas[] ,Type center[],const size_t nd,const int nk,const float precision): 37 m_ndataNumbers(nd),m_nkNumbers(nk),m_iterations(0),m_center(NULL),m_datas(NULL),m_precision(precision){ 38 m_datas = new Type[m_ndataNumbers]; 39 m_center=new Type[m_nkNumbers]; 40 for(size_t i = 0 ; i<m_ndataNumbers ;i++){ 41 m_datas[i] = datas[i]; 42 } 43 for(int i = 0 ; i<m_nkNumbers ; i++){ 44 m_center[i] = center[i]; 45 } 46 } 47 template <typename Type> 48 Type* KMeans<Type> ::getDatas()const{ 49 return this->m_datas; 50 } 51 // get the center 52 template <typename Type> 53 Type* KMeans<Type> ::getCenter()const{ 54 return this->m_center; 55 } 56 // get iteration times 57 template<typename Type> 58 int KMeans<Type>::iterationTimes()const{ 59 return this->m_iterations; 60 } 61 // carry out kmeans 62 template<typename Type> 63 void KMeans<Type>::kmeans(){ 64 float previous = 0; // 65 float current = 1; 66 size_t *numbers = new size_t[m_nkNumbers]; // record every cluster datas 67 Type *sumvalues = new Type[m_nkNumbers]; // record every cluster values 68 while((current-previous)>m_precision){ 69 // initialize zero 70 for(int i = 0 ; i<m_nkNumbers ; i++){ 71 numbers[i] = 0; 72 sumvalues[i] =0; 73 } 74 previous = current; 75 current = dataDivide(sumvalues,numbers); 76 changeCenters(sumvalues,numbers); 77 m_iterations++; 78 79 } 80 delete[] numbers; 81 delete[] sumvalues; 82 } 83 // data divide 84 template <typename Type> 85 float KMeans<Type>::dataDivide(Type*sumvalues , size_t*numbers){ 86 float dist = 0.0; 87 for(size_t i = 0 ; i<m_ndataNumbers ; i++){ 88 float d = sqrt(float(m_datas[i]-m_center[0])); 89 int pos = 0; 90 for(int j =1 ; j <m_nkNumbers ; j++){ 91 if(d > sqrt(float(m_datas[i]-m_center[j]))){ 92 d = sqrt(float(m_datas[i]-m_center[j])); 93 pos = j; 94 } 95 dist+=d; 96 sumvalues[pos]+=m_datas[i]; 97 numbers[pos]++; 98 } 99 }100 return dist;101 }102 // change the center103 template<typename Type>104 void KMeans<Type>::changeCenters(Type*sumvalues , size_t*numbers){105 for(int i=0 ; i<m_nkNumbers ; i++){106 if(numbers[i]==0)continue;107 m_center[i] = sumvalues[i]/numbers[i];108 }109 }110 template<typename Type>111 void KMeans<Type>::printCenter(){112 for(int i = 0 ; i<m_nkNumbers ;i++){113 cout << "center " << i <<": "<< m_center[i] <<endl;114 }115 cout <<endl;116 }
K-means clustering algorithm