Kmeans clustering algorithm java lite design implementation programming
There are many java algorithms written by Kmeans on the Internet. Of course, depending on the individual encoding style, different codes are written. Therefore, based on understanding the principles, it is best to write the code according to your own design ideas.
The basic principle of DU Niang's Kmeans search is as follows:
Package net. codeal. suanfa. kmeans; import java. util. set;/***** @ ClassName: Distancable * @ Description: TODO (a decentralized parent class that can calculate the distance between two points) * @ author fuhuaguo * @ date 11:41:23, January 1, September 1, 2015 **/public class Kmeansable
{/*** Obtain the distance between two points * @ param other * @ return */public double getDistance (E other) {return 0 ;} /*** get the new center point * @ param eSet * @ return */public E getNewCenter (Set
ESet) {return null ;}}
Package net. codeal. suanfa. kmeans; import java. util. set;/***** @ ClassName: Point * @ Description: TODO (clustering dimension information bean, which can be divided into K dimensions. similarity calculation is its own behavior, put in bean internal only suitable, cancel annotation use) * @ author fuhuaguo * @ e-mail fhg@jusfoun.com * @ date September 1, 2015 10:43:25 **/public class Point extends Kmeansable
{Private String id; // Dimension 1 private double k1; // dimension 2 private double k2; // dimension 3 private double k3; public Point () {} public Point (String id, double k1, double k2, double k3) {this. id = id; this. k1 = k1; this. k2 = k2; this. k3 = k3;}/*** calculate the distance from another vertex. The euclidean algorithm is used to calculate the sqrt value of the sum of squares of the calculated dimensions. That is: specificity * @ param other * @ return */@ Overridepublic double getDistance (Point other) {return Math. sqrt (this. k1-other.getK1 () * (this. k1-other.getK1 () + (this. k2-other.getK2 () * (this. k2-other.getK2 () + (this. k3-other.getK3 () * (this. k3-other.getK3 ();} @ Overridepublic Point getNewCenter (Set
ESet) {if (eSet = null | eSet. size () = 0) {return this;} Point temp = new Point (); int count = 0; for (Point p: eSet) {temp. setK1 (temp. getK1 () + p. getK1 (); temp. setK2 (temp. getK2 () + p. getK2 (); temp. setK3 (temp. getK3 () + p. getK3 (); count ++;} temp. setK1 (temp. getK1 ()/count); temp. setK2 (temp. getK2 ()/count); temp. setK3 (temp. getK3 ()/count); return temp;} @ Overridepublic boolean equals (Object obj) {if (obj = nul L |! (Obj instanceof Point) return false; Point other = (Point) obj; return (this. k1 = other. getK1 () & (this. k2 = other. getK2 () & (this. k3 = other. getK3 () ;}@ Overridepublic int hashCode () {return new Double (k1 + k2 + k3 ). hashCode () ;}@ Overridepublic String toString () {return (+ k1 +, + k2 +, + k3 +);} public String getId () {return id ;} public void setId (String id) {this. id = id;} public double getK1 () {return k1;} public void setK1 (double k1) {this. k1 = k1;} public double getK2 () {return k2;} public void setK2 (double k2) {this. k2 = k2;} public double getK3 () {return k3;} public void setK3 (double k3) {this. k3 = k3 ;}}
Package net. codeal. suanfa. kmeans; import java. util. HashMap; import java. util. HashSet; import java. util. Map; import java. util. Set; public class KmeansAlgorithm
> {/*** Perform K value clustering on the Set, and the maximum computing depth is depth */public void kmeans (Set
DataSet, int k, int depth) {// The number of classes is not Set properly if (k <= 1 | dataSet. size () <= k) {return;} Set
KSet = new HashSet
(); Int count = 0; // randomly determine K centers for (E: dataSet) {if (count ++> = k) break; kSet. add (e) ;}// calculate the distance between each value and each center, and assign it to the center with the smallest distance. boolean flag = true; while (flag & depth> 0) {Map
> KMap = new HashMap
> (); For (E e: kSet) {kMap. put (e, new HashSet
();} // Complete clustering for (E data: dataSet) {double d = Double. MAX_VALUE; E e = null; for (E center: kSet) {double d1 = data. getDistance (center); if (d> d1) {e = center; d = d1 ;}} kMap. get (e ). add (data);} // The first group is computed and the new center is obtained. out. println (this is the + depth + secondary clustering); for (Map. entry
> M: kMap. entrySet () {System. out. println (m. getKey () +: + m. getValue ();} // obtain the new cluster center Set
OldSet = kSet; kSet = getNewCenters (kMap); flag =! IsSameCenters (kSet, oldSet); depth -- ;}}/*** get the new center list */public Set
GetNewCenters (Map
> KMap) {Set
ESet = new HashSet
(); For (Map. Entry
> M: kMap. entrySet () {eSet. add (m. getKey (). getNewCenter (m. getValue ();} return eSet;}/*** determine whether it is the same center list */public boolean isSameCenters (Set
OldSet, Set
NewSet) {// The return oldSet. containsAll (newSet);} public static void main (String [] args) {Set
DataSet = new HashSet
(); DataSet. add (new Point (1, 1, 1); dataSet. add (new Point (1, 2, 2); dataSet. add (new Point (1, 5, 6, 1); dataSet. add (new Point (1, 10, 10); dataSet. add (new Point (,); new KmeansAlgorithm
(). Kmeans (dataSet, 2, 10 );}}
Result:
This is 10th Clustering
(1.0, 1.0, 1.0): [(1.0, 1.0, 1.0), (2.0, 2.0, 2.0), (5.0, 6.0, 1.0)]
(10.0, 10.0, 10.0): [(10.0, 10.0, 10.0), (11.0, 11.0, 11.0)]
This is 9th Clustering
(10.5, 10.5, 10.5): [(10.0, 10.0, 10.0), (11.0, 11.0, 11.0)]
(2.6666666666666665, 3.0, 1.3333333333333333): [(1.0, 1.0, 1.0), (2.0, 2.0, 2.0), (5.0, 6.0, 1.0)]