main.js 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. "use strict";
  2. /*jshint esversion: 6 */
  3. var Distance = require("./distance.js"),
  4. ClusterInit = require("./kinit.js"),
  5. eudist = Distance.eudist,
  6. mandist = Distance.mandist,
  7. dist = Distance.dist,
  8. kmrand = ClusterInit.kmrand,
  9. kmpp = ClusterInit.kmpp;
  10. var MAX = 10000;
  11. /**
  12. * Inits an array with values
  13. */
  14. function init(len, val, v) {
  15. v = v || [];
  16. for (var i = 0; i < len; i++) {
  17. v[i] = val;
  18. }return v;
  19. }
  20. function skmeans(data, k, initial, maxit) {
  21. var ks = [],
  22. old = [],
  23. idxs = [],
  24. dist = [];
  25. var conv = false,
  26. it = maxit || MAX;
  27. var len = data.length,
  28. vlen = data[0].length,
  29. multi = vlen > 0;
  30. var count = [];
  31. if (!initial) {
  32. var _idxs = {};
  33. while (ks.length < k) {
  34. var idx = Math.floor(Math.random() * len);
  35. if (!_idxs[idx]) {
  36. _idxs[idx] = true;
  37. ks.push(data[idx]);
  38. }
  39. }
  40. } else if (initial == "kmrand") {
  41. ks = kmrand(data, k);
  42. } else if (initial == "kmpp") {
  43. ks = kmpp(data, k);
  44. } else {
  45. ks = initial;
  46. }
  47. do {
  48. // Reset k count
  49. init(k, 0, count);
  50. // For each value in data, find the nearest centroid
  51. for (var i = 0; i < len; i++) {
  52. var min = Infinity,
  53. _idx = 0;
  54. for (var j = 0; j < k; j++) {
  55. // Multidimensional or unidimensional
  56. var dist = multi ? eudist(data[i], ks[j]) : Math.abs(data[i] - ks[j]);
  57. if (dist <= min) {
  58. min = dist;
  59. _idx = j;
  60. }
  61. }
  62. idxs[i] = _idx; // Index of the selected centroid for that value
  63. count[_idx]++; // Number of values for this centroid
  64. }
  65. // Recalculate centroids
  66. var sum = [],
  67. old = [],
  68. dif = 0;
  69. for (var _j = 0; _j < k; _j++) {
  70. // Multidimensional or unidimensional
  71. sum[_j] = multi ? init(vlen, 0, sum[_j]) : 0;
  72. old[_j] = ks[_j];
  73. }
  74. // If multidimensional
  75. if (multi) {
  76. for (var _j2 = 0; _j2 < k; _j2++) {
  77. ks[_j2] = [];
  78. } // Sum values and count for each centroid
  79. for (var _i = 0; _i < len; _i++) {
  80. var _idx2 = idxs[_i],
  81. // Centroid for that item
  82. vsum = sum[_idx2],
  83. // Sum values for this centroid
  84. vect = data[_i]; // Current vector
  85. // Accumulate value on the centroid for current vector
  86. for (var h = 0; h < vlen; h++) {
  87. vsum[h] += vect[h];
  88. }
  89. }
  90. // Calculate the average for each centroid
  91. conv = true;
  92. for (var _j3 = 0; _j3 < k; _j3++) {
  93. var ksj = ks[_j3],
  94. // Current centroid
  95. sumj = sum[_j3],
  96. // Accumulated centroid values
  97. oldj = old[_j3],
  98. // Old centroid value
  99. cj = count[_j3]; // Number of elements for this centroid
  100. // New average
  101. for (var _h = 0; _h < vlen; _h++) {
  102. ksj[_h] = sumj[_h] / cj || 0; // New centroid
  103. }
  104. // Find if centroids have moved
  105. if (conv) {
  106. for (var _h2 = 0; _h2 < vlen; _h2++) {
  107. if (oldj[_h2] != ksj[_h2]) {
  108. conv = false;
  109. break;
  110. }
  111. }
  112. }
  113. }
  114. }
  115. // If unidimensional
  116. else {
  117. // Sum values and count for each centroid
  118. for (var _i2 = 0; _i2 < len; _i2++) {
  119. var _idx3 = idxs[_i2];
  120. sum[_idx3] += data[_i2];
  121. }
  122. // Calculate the average for each centroid
  123. for (var _j4 = 0; _j4 < k; _j4++) {
  124. ks[_j4] = sum[_j4] / count[_j4] || 0; // New centroid
  125. }
  126. // Find if centroids have moved
  127. conv = true;
  128. for (var _j5 = 0; _j5 < k; _j5++) {
  129. if (old[_j5] != ks[_j5]) {
  130. conv = false;
  131. break;
  132. }
  133. }
  134. }
  135. conv = conv || --it <= 0;
  136. } while (!conv);
  137. return {
  138. it: MAX - it,
  139. k: k,
  140. idxs: idxs,
  141. centroids: ks
  142. };
  143. }
  144. module.exports = skmeans;
  145. //# sourceMappingURL=main.js.map