/**
* Function K-means euclideandistance
* Author aris_w.e-mail: aris_zzy@126.com
* Date 2006-6-24
* Cluster no. 0, 1, 2, 3, 4
*/
Import java. Lang. Math;
Import java. Io .*;
Import java. util. arrays;
Import java. Text. decimalformat;
Public class kmeanseuclideandistance {
// To computer the euclideandistance
Private Static double eudistance (double array1 [], double array2 []) {
Double Dist = 0.0;
If (array1.length! = Array2.length ){
System. Out. println ("the number of the arrary is ineql ");
} Else {
For (INT I = 0; I <array2.length; I ++ ){
Dist = DIST + (array1 [I]-array2 [I]) * (array1 [I]-array2 [I]);
}
}
Return math. SQRT (DIST );
}
// To print the float Array
Private Static void printarray (double array []) {
System. Out. Print ('[');
For (INT I = 0; I <array. length; I ++ ){
System. Out. Print (format. Format (array [I]);
If (I + 1) <array. Length ){
System. Out. Print (",");
}
}
System. Out. println (']');
}
// To print the int Array
Private Static void printarray (INT array []) {
System. Out. Print ('[');
For (INT I = 0; I <array. length; I ++ ){
System. Out. Print (array [I]);
If (I + 1) <array. Length ){
System. Out. Print (",");
}
}
System. Out. println (']');
}
/*
Private Static void printarray (Abstract array []) {
System. Out. Print ('[');
For (INT I = 0; I <array. length; I ++ ){
System. Out. Print (format. Format (array [I]);
If (I + 1) <array. Length ){
System. Out. Print (",");
}
}
System. Out. println (']');
}
*/
// To print the Matrix
Private Static void printmatrix (double matrix [] [], int row, int col ){
System. Out. println ("matrix is :");
System. Out. println ('{');
For (INT I = 0; I <row; I ++ ){
// System. Out. Print ('[');
For (Int J = 0; j <Col; j ++ ){
// Matrix [I] [J] =-1.0; for test
System. Out. Print (format. Format (Matrix [I] [J]);
If (J + 1) <col ){
System. Out. Print (",");
}
}
// System. Out. println (']');
System. Out. println ();
}
System. Out. println ('}');
}
Private Static decimalformat format = new decimalformat ("00.00 ");
// Randperm the former M form the randpern (N)
Private Static int [] randperm (int n, int m ){
Double [] permf = new double [N];
Int [] permi = new int [N];
Int [] retarray = new int [m];
Double tempf;
Int tempi;
For (INT I = 0; I <n; I ++ ){
Permf [I] = math. Random ();
Permi [I] = I;
}
// Sort choosing the big to forward
For (INT I = 0; I <N-1; I ++ ){
For (Int J = I + 1; j <n; j ++ ){
If (permf [I] <permf [J]) {
Tempf = permf [I];
Tempi = permi [I];
Permf [I] = permf [J];
Permi [I] = permi [J];
Permf [J] = tempf;
Permi [J] = tempi;
}
}
}
For (INT I = 0; I <m; I ++ ){
Retarray [I] = permi [I];
}
Return retarray;
}
// The judge the equal two Array
Private Static Boolean isequal (INT array1 [], int array2 []) {
For (INT I = 0; I <array1.length; I ++ ){
If (array1 [I]! = Array2 [I]) {
Return false;
}
}
Return true;
}
// Get the location of Min element from the array
Private Static int minlocation (double array []) {
Int location;
Double min;
// Initial
Min = array [0];
Location = 0;
// Iteration
For (INT I = 1; I <array. length; I ++ ){
If (array [I] <min ){
Location = I;
Min = array [I];
}
}
Return location;
}
// To clustering the Data Matrix
Private Static int [] kmeans (double matrix [] [], int row, int Col, int clusternum ){
Int [] centerid = new int [clusternum];
Int [] cid = new int [row];
Int [] oldcid = new int [row];
Int [] numofeverycluster = new int [clusternum];
Double [] [] clustercenter = new double [clusternum] [col];
Double [] centerdist = new double [clusternum];
// Initial the clustercenter
// Random get the clustercenter
Centerid = randperm (row, clusternum );
For (INT I = 0; I <clusternum; I ++ ){
For (Int J = 0; j <Col; j ++ ){
Clustercenter [I] [J] = matrix [centerid [I] [J];
}
}
// Initial the oldcide
For (INT I = 0; I <row; I ++ ){
Oldcid [I] = 1;
}
Int maxiter = 200;
Int iter = 1;
While (! Isequal (CID, oldcid) | ITER <maxiter ){
// Change it on 2006 6 25
For (INT I = 0; I <row; I ++ ){
Oldcid [I] = CID [I];
}
// Implement the hmeans Algorithm
// For each point, find the distance
// To all cluster centers
For (INT I = 0; I <row; I ++ ){
For (Int J = 0; j <clusternum; j ++ ){
Centerdist [J] = eudistance (Matrix [I], clustercenter [J]);
}
CID [I] = minlocation (centerdist );
}
// To get the number of every Cluster
For (Int J = 0; j <clusternum; j ++ ){
Numofeverycluster [J] = 0;
For (INT I = 0; I <row; I ++ ){
If (CID [I] = J ){
Numofeverycluster [J] = numofeverycluster [J] + 1;
}
}
}
// Find the new cluster centers
// Sum ..........
For (Int J = 0; j <clusternum; j ++ ){
For (int K = 0; k <Col; k ++ ){
Clustercenter [J] [k] = 0.0;
For (INT I = 0; I <row; I ++ ){
If (CID [I] = J ){
Clustercenter [J] [k] = clustercenter [J] [k] + matrix [I] [k];
}
}
}
}
// To means the sum...
For (Int J = 0; j <clusternum; j ++ ){
For (int K = 0; k <Col; k ++ ){
Clustercenter [J] [k] = clustercenter [J] [k]/numofeverycluster [J];
}
}
Iter = ITER + 1;
}
Return CID;
}
// Main to test the kmeans
Public static void main (string [] ARGs ){
Int matrix_row;
Int matrix_col;
Int clusternum;
Matrix_col = 5;
Matrix_row = 10;
Clusternum = 3;
Double [] [] matrix = new double [matrix_row] [matrix_col];
// Double [] Y = new double [matrix_row];
Int [] list = new int [matrix_row];
// Int [] list1 = new int [matrix_row];
// List1 [2] = 1;
// Double Dist;
For (INT I = 0; I <matrix_row; I ++ ){
For (Int J = 0; j <matrix_col; j ++ ){
Matrix [I] [J] = 10 * Math. Random ();
}
}
// For test the code
Double [] [] distmatrix = new double [matrix_row] [matrix_row];
For (INT I = 0; I <matrix_row; I ++ ){
For (Int J = 0; j <matrix_row; j ++ ){
Distmatrix [I] [J] = eudistance (Matrix [I], matrix [J]);
}
}
// Printarray (Matrix [0]);
// System. Out. Print (minlocation (Matrix [0]);
Printmatrix (matrix, matrix_row, matrix_col );
System. Out. println ("The distmatrix is :");
Printmatrix (distmatrix, matrix_row, matrix_row );
/*
Int [] perm = new int [10];
Perm = randperm (100,10 );
For (INT I = 0; I <100; I ++ ){
System. Out. Print (Perm [I] + "");
}
*/
// System. Out. Print (isequal (list, list1 ));
List = kmeans (matrix, matrix_row, matrix_col, clusternum );
System. Out. println ("the result of clustering, value of No. I means the ith belong to the No. Value cluster ");
Printarray (list );
System. runfinalization ();
}
}
The program has a bug. If you understand the idea of kmeans, you will naturally modify it. If you want to use it directly, then ................
See MATLAB (previous article)