KMEANS.js 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. /**
  2. * KMEANS clustering
  3. *
  4. * @author Lukasz Krawczyk <contact@lukaszkrawczyk.eu>
  5. * @copyright MIT
  6. */
  7. /**
  8. * KMEANS class constructor
  9. * @constructor
  10. *
  11. * @param {Array} dataset
  12. * @param {number} k - number of clusters
  13. * @param {function} distance - distance function
  14. * @returns {KMEANS}
  15. */
  16. function KMEANS(dataset, k, distance) {
  17. this.k = 3; // number of clusters
  18. this.dataset = []; // set of feature vectors
  19. this.assignments = []; // set of associated clusters for each feature vector
  20. this.centroids = []; // vectors for our clusters
  21. this.init(dataset, k, distance);
  22. }
  23. /**
  24. * @returns {undefined}
  25. */
  26. KMEANS.prototype.init = function(dataset, k, distance) {
  27. this.assignments = [];
  28. this.centroids = [];
  29. if (typeof dataset !== 'undefined') {
  30. this.dataset = dataset;
  31. }
  32. if (typeof k !== 'undefined') {
  33. this.k = k;
  34. }
  35. if (typeof distance !== 'undefined') {
  36. this.distance = distance;
  37. }
  38. };
  39. /**
  40. * @returns {undefined}
  41. */
  42. KMEANS.prototype.run = function(dataset, k) {
  43. this.init(dataset, k);
  44. var len = this.dataset.length;
  45. // initialize centroids
  46. for (var i = 0; i < this.k; i++) {
  47. this.centroids[i] = this.randomCentroid();
  48. }
  49. var change = true;
  50. while(change) {
  51. // assign feature vectors to clusters
  52. change = this.assign();
  53. // adjust location of centroids
  54. for (var centroidId = 0; centroidId < this.k; centroidId++) {
  55. var mean = new Array(maxDim);
  56. var count = 0;
  57. // init mean vector
  58. for (var dim = 0; dim < maxDim; dim++) {
  59. mean[dim] = 0;
  60. }
  61. for (var j = 0; j < len; j++) {
  62. var maxDim = this.dataset[j].length;
  63. // if current cluster id is assigned to point
  64. if (centroidId === this.assignments[j]) {
  65. for (var dim = 0; dim < maxDim; dim++) {
  66. mean[dim] += this.dataset[j][dim];
  67. }
  68. count++;
  69. }
  70. }
  71. if (count > 0) {
  72. // if cluster contain points, adjust centroid position
  73. for (var dim = 0; dim < maxDim; dim++) {
  74. mean[dim] /= count;
  75. }
  76. this.centroids[centroidId] = mean;
  77. } else {
  78. // if cluster is empty, generate new random centroid
  79. this.centroids[centroidId] = this.randomCentroid();
  80. change = true;
  81. }
  82. }
  83. }
  84. return this.getClusters();
  85. };
  86. /**
  87. * Generate random centroid
  88. *
  89. * @returns {Array}
  90. */
  91. KMEANS.prototype.randomCentroid = function() {
  92. var maxId = this.dataset.length -1;
  93. var centroid;
  94. var id;
  95. do {
  96. id = Math.round(Math.random() * maxId);
  97. centroid = this.dataset[id];
  98. } while (this.centroids.indexOf(centroid) >= 0);
  99. return centroid;
  100. }
  101. /**
  102. * Assign points to clusters
  103. *
  104. * @returns {boolean}
  105. */
  106. KMEANS.prototype.assign = function() {
  107. var change = false;
  108. var len = this.dataset.length;
  109. var closestCentroid;
  110. for (var i = 0; i < len; i++) {
  111. closestCentroid = this.argmin(this.dataset[i], this.centroids, this.distance);
  112. if (closestCentroid != this.assignments[i]) {
  113. this.assignments[i] = closestCentroid;
  114. change = true;
  115. }
  116. }
  117. return change;
  118. }
  119. /**
  120. * Extract information about clusters
  121. *
  122. * @returns {undefined}
  123. */
  124. KMEANS.prototype.getClusters = function() {
  125. var clusters = new Array(this.k);
  126. var centroidId;
  127. for (var pointId = 0; pointId < this.assignments.length; pointId++) {
  128. centroidId = this.assignments[pointId];
  129. // init empty cluster
  130. if (typeof clusters[centroidId] === 'undefined') {
  131. clusters[centroidId] = [];
  132. }
  133. clusters[centroidId].push(pointId);
  134. }
  135. return clusters;
  136. };
  137. // utils
  138. /**
  139. * @params {Array} point
  140. * @params {Array.<Array>} set
  141. * @params {Function} f
  142. * @returns {number}
  143. */
  144. KMEANS.prototype.argmin = function(point, set, f) {
  145. var min = Number.MAX_VALUE;
  146. var arg = 0;
  147. var len = set.length;
  148. var d;
  149. for (var i = 0; i < len; i++) {
  150. d = f(point, set[i]);
  151. if (d < min) {
  152. min = d;
  153. arg = i;
  154. }
  155. }
  156. return arg;
  157. };
  158. /**
  159. * Euclidean distance
  160. *
  161. * @params {number} p
  162. * @params {number} q
  163. * @returns {number}
  164. */
  165. KMEANS.prototype.distance = function(p, q) {
  166. var sum = 0;
  167. var i = Math.min(p.length, q.length);
  168. while (i--) {
  169. var diff = p[i] - q[i];
  170. sum += diff * diff;
  171. }
  172. return Math.sqrt(sum);
  173. };
  174. if (typeof module !== 'undefined' && module.exports) {
  175. module.exports = KMEANS;
  176. }