/
d3.kmeans.js
118 lines (103 loc) · 2.98 KB
/
d3.kmeans.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
'use strict';
function kmeans(arrayToProcess, numberOfClusters, accessor) {
var groups = new Array();
var centroids = new Array();
var oldCentroids = new Array();
var changed = false;
/*
* 1. no accessor provided
* 2. accessor returns a single value (1-dimensional)
* 3. accessor returns an array (multi-dimentioanl)
*/
if (!accessor) {
accessor = function (d) { return [d]; };
}
var dist = function (a, b) {
a = (typeof a === Array) ? a : [a];
b = (typeof b === Array) ? b : [b];
if (a.length !== b.length) {
throw "Number of dimensions not aligned between data points."
}
var ds = 0;
for (var i = 0; i < a.length; i++) {
ds += Math.pow(a[i] - b[i], 2);
}
return Math.sqrt(ds);
};
var calCentroid = function (group) {
var centroid = [];
var dim = group[0].length;
for (var i = 0; i < dim; i++) {
centroid.push(
group.map(function (p) { return p[i]; })
.reduce(function (prevVal, currVal) {
return prevVal + currVal;
}, 0) / group.length);
}
return centroid;
};
var moveCentroids = function (groups) {
return groups.map(calCentroid);
};
var identicalCoordinate = function(a, b) {
for (var d = 0; d <= a.length; d++) {
if (a[d] !== b[d]) return false;
}
return true;
};
var existsIn = function(basket, p) {
for(var j = 0; j <= basket.length; j++) {
if (identicalCoordinate(basket[j], p)) {
return true;
}
}
return false;
};
var centroidsChanged = function (oldSet, currSet) {
// every old centroid should be found in new set, otherwise consider centroids changed
for (var i = 0;i < oldSet.length; i++) {
if (!existsIn(currSet, oldSet[i])) return true;
}
return false;
};
// initialise group arrays
for (var initGroups = 0; initGroups < numberOfClusters; initGroups++) {
groups[initGroups] = new Array();
}
// hop for systemical sampling
var hop = Math.round(arrayToProcess.length / (numberOfClusters + 1));
var i, j, idx;
for (i = 0; i < numberOfClusters; i++) {
idx = hop * (i + 1);
centroids[i] = accessor(arrayToProcess[idx]);
}
do {
for (j = 0; j < numberOfClusters; j++) {
groups[j] = [];
}
changed = false;
for (i = 0; i < arrayToProcess.length; i++) {
var distance = -1;
var oldDistance = -1
var newGroup;
for (j = 0; j < numberOfClusters; j++) {
distance = dist(centroids[j], accessor(arrayToProcess[i]));
if (oldDistance == -1) {
oldDistance = distance;
newGroup = j;
} else if (distance <= oldDistance) {
newGroup = j;
oldDistance = distance;
}
}
groups[newGroup].push(arrayToProcess[i]);
}
oldCentroids = centroids;
centroids = moveCentroids(groups);
changed = centroidsChanged(oldCentroids, centroids);
} while (changed == true);
return groups;
}
if (typeof d3 !== "undefined") {
d3.kmeans = kmeans;
}