Function [CID, NR, centers] = cskmeans (x, K, NC)
% Cskmeans K-means clustering-general method.
%
% This implements the more general K-means algorithm, where
% Hmeans is used to find the initial partition and then each
% Observation is examined for further improvements in minimizing
% The within-group sum of squares.
%
% [CID, NR, centers] = cskmeans (x, K, NC) performs K-means
% Clustering using the data given in X.
%
% Inputs: X is the N x D matrix of data,
% Where each row indicates an observation. k indicates
% The number of desired clusters. NC is a k x matrix for
% Initial cluster centers. If NC is not specified, then
% Centers will be randomly chosen from the observations.
%
% Outputs: Cid provides a set of N indexes indicating Cluster
% Membership for each point. nr is the number of observations
% In each cluster. centers is a matrix, where each row
% Corresponds to a cluster center.
%
% See also cshmeans
% W. L. and A. R. Martinez, 9/15/01
% Computational statistics toolbox
Warning off
[N, d] = size (X );
If nargin <3
% Then pick some observations to be the cluster centers.
IND = Ceil (N * rand (1, k ));
% We will add some noise to make it interesting.
NC = x (IND, :) + randn (K, d );
End
% Set up storage
% Integer 1,..., K indicating cluster membership
Cid = zeros (1, N );
% Make this different to get the loop started.
Oldcid = ones (1, N );
% The number in each cluster.
Nr = zeros (1, k );
% Set up maximum number of iterations.
Maxiter = 100;
Iter = 1;
While ~ Isequal (CID, oldcid) & ITER <maxiter
% Implement the hmeans Algorithm
% For each point, find the distance to all cluster centers
For I = 1: N
Dist = sum (repmat (X (I, :), k, 1)-NC). ^ 2 );
[M, IND] = min (DIST); % assign it to this cluster center
CID (I) = ind;
End
% Find the new cluster centers
For I = 1: K
% Find all points in this cluster
IND = find (cid = I );
% Find the centroid
NC (I, :) = mean (x (IND ,:));
% Find the number in each cluster;
NR (I) = length (IND );
End
Iter = ITER + 1;
End
% Now check each observation to see if the error can be minimized some more.
% Loop through all points.
Maxiter = 2;
Iter = 1;
Move = 1;
While ITER <maxiter & move ~ = 0
Move = 0;
% Loop through all points.
For I = 1: N
% Find the distance to all cluster centers
Dist = sum (repmat (X (I, :), k, 1)-NC). ^ 2 );
R = CID (I); % This is the cluster ID for X
% NR, NR + 1;
Dadj = Nr./(NR + 1). * Dist '; % All adjusted distances
[M, IND] = min (dadj); % minimum shocould be the cluster it belongs
If ind ~ = R % if not, then move x
CID (I) = ind;
Ic = find (cid = IND );
NC (IND, :) = mean (x (IC ,:));
Move = 1;
End
End
Iter = ITER + 1;
End
Centers = NC;
If move = 0
Disp ('no points were moved after the initial clustering procedure .')
Else
Disp ('some points were moved after the initial clustering procedure .')
End
Warning on