skmeans.js 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. "use strict";
  2. (function e(t, n, r) {
  3. function s(o, u) {
  4. if (!n[o]) {
  5. if (!t[o]) {
  6. var a = typeof require == "function" && require;if (!u && a) return a(o, !0);if (i) return i(o, !0);var f = new Error("Cannot find module '" + o + "'");throw f.code = "MODULE_NOT_FOUND", f;
  7. }var l = n[o] = { exports: {} };t[o][0].call(l.exports, function (e) {
  8. var n = t[o][1][e];return s(n ? n : e);
  9. }, l, l.exports, e, t, n, r);
  10. }return n[o].exports;
  11. }var i = typeof require == "function" && require;for (var o = 0; o < r.length; o++) {
  12. s(r[o]);
  13. }return s;
  14. })({ 1: [function (require, module, exports) {
  15. "use strict";
  16. (function () {
  17. var root = this;
  18. var previous_skmeans = root.skmeans;
  19. var skmeans = require('./main.js');
  20. if (typeof exports !== 'undefined') {
  21. if (typeof module !== 'undefined' && module.exports) {
  22. exports = module.exports = skmeans;
  23. }
  24. exports.skmeans = skmeans;
  25. }
  26. if (typeof window !== 'undefined') {
  27. window.skmeans = skmeans;
  28. }
  29. }).call(this);
  30. }, { "./main.js": 4 }], 2: [function (require, module, exports) {
  31. module.exports = {
  32. /**
  33. * Euclidean distance
  34. */
  35. eudist: function eudist(v1, v2, sqrt) {
  36. var len = v1.length;
  37. var sum = 0;
  38. for (var i = 0; i < len; i++) {
  39. var d = (v1[i] || 0) - (v2[i] || 0);
  40. sum += d * d;
  41. }
  42. // Square root not really needed
  43. return sqrt ? Math.sqrt(sum) : sum;
  44. },
  45. mandist: function mandist(v1, v2, sqrt) {
  46. var len = v1.length;
  47. var sum = 0;
  48. for (var i = 0; i < len; i++) {
  49. sum += Math.abs((v1[i] || 0) - (v2[i] || 0));
  50. }
  51. // Square root not really needed
  52. return sqrt ? Math.sqrt(sum) : sum;
  53. },
  54. /**
  55. * Unidimensional distance
  56. */
  57. dist: function dist(v1, v2, sqrt) {
  58. var d = Math.abs(v1 - v2);
  59. return sqrt ? d : d * d;
  60. }
  61. };
  62. }, {}], 3: [function (require, module, exports) {
  63. var Distance = require("./distance.js"),
  64. eudist = Distance.eudist,
  65. dist = Distance.dist;
  66. module.exports = {
  67. kmrand: function kmrand(data, k) {
  68. var map = {},
  69. ks = [],
  70. t = k << 2;
  71. var len = data.length;
  72. var multi = data[0].length > 0;
  73. while (ks.length < k && t-- > 0) {
  74. var d = data[Math.floor(Math.random() * len)];
  75. var key = multi ? d.join("_") : "" + d;
  76. if (!map[key]) {
  77. map[key] = true;
  78. ks.push(d);
  79. }
  80. }
  81. if (ks.length < k) throw new Error("Error initializating clusters");else return ks;
  82. },
  83. /**
  84. * K-means++ initial centroid selection
  85. */
  86. kmpp: function kmpp(data, k) {
  87. var distance = data[0].length ? eudist : dist;
  88. var ks = [],
  89. len = data.length;
  90. var multi = data[0].length > 0;
  91. var map = {};
  92. // First random centroid
  93. var c = data[Math.floor(Math.random() * len)];
  94. var key = multi ? c.join("_") : "" + c;
  95. ks.push(c);
  96. map[key] = true;
  97. // Retrieve next centroids
  98. while (ks.length < k) {
  99. // Min Distances between current centroids and data points
  100. var dists = [],
  101. lk = ks.length;
  102. var dsum = 0,
  103. prs = [];
  104. for (var i = 0; i < len; i++) {
  105. var min = Infinity;
  106. for (var j = 0; j < lk; j++) {
  107. var _dist = distance(data[i], ks[j]);
  108. if (_dist <= min) min = _dist;
  109. }
  110. dists[i] = min;
  111. }
  112. // Sum all min distances
  113. for (var _i = 0; _i < len; _i++) {
  114. dsum += dists[_i];
  115. }
  116. // Probabilities and cummulative prob (cumsum)
  117. for (var _i2 = 0; _i2 < len; _i2++) {
  118. prs[_i2] = { i: _i2, v: data[_i2], pr: dists[_i2] / dsum, cs: 0 };
  119. }
  120. // Sort Probabilities
  121. prs.sort(function (a, b) {
  122. return a.pr - b.pr;
  123. });
  124. // Cummulative Probabilities
  125. prs[0].cs = prs[0].pr;
  126. for (var _i3 = 1; _i3 < len; _i3++) {
  127. prs[_i3].cs = prs[_i3 - 1].cs + prs[_i3].pr;
  128. }
  129. // Randomize
  130. var rnd = Math.random();
  131. // Gets only the items whose cumsum >= rnd
  132. var idx = 0;
  133. while (idx < len - 1 && prs[idx++].cs < rnd) {}
  134. ks.push(prs[idx - 1].v);
  135. /*
  136. let done = false;
  137. while(!done) {
  138. // this is our new centroid
  139. c = prs[idx-1].v
  140. key = multi? c.join("_") : `${c}`;
  141. if(!map[key]) {
  142. map[key] = true;
  143. ks.push(c);
  144. done = true;
  145. }
  146. else {
  147. idx++;
  148. }
  149. }
  150. */
  151. }
  152. return ks;
  153. }
  154. };
  155. }, { "./distance.js": 2 }], 4: [function (require, module, exports) {
  156. /*jshint esversion: 6 */
  157. var Distance = require("./distance.js"),
  158. ClusterInit = require("./kinit.js"),
  159. eudist = Distance.eudist,
  160. mandist = Distance.mandist,
  161. dist = Distance.dist,
  162. kmrand = ClusterInit.kmrand,
  163. kmpp = ClusterInit.kmpp;
  164. var MAX = 10000;
  165. /**
  166. * Inits an array with values
  167. */
  168. function init(len, val, v) {
  169. v = v || [];
  170. for (var i = 0; i < len; i++) {
  171. v[i] = val;
  172. }return v;
  173. }
  174. function skmeans(data, k, initial, maxit) {
  175. var ks = [],
  176. old = [],
  177. idxs = [],
  178. dist = [];
  179. var conv = false,
  180. it = maxit || MAX;
  181. var len = data.length,
  182. vlen = data[0].length,
  183. multi = vlen > 0;
  184. var count = [];
  185. if (!initial) {
  186. var _idxs = {};
  187. while (ks.length < k) {
  188. var idx = Math.floor(Math.random() * len);
  189. if (!_idxs[idx]) {
  190. _idxs[idx] = true;
  191. ks.push(data[idx]);
  192. }
  193. }
  194. } else if (initial == "kmrand") {
  195. ks = kmrand(data, k);
  196. } else if (initial == "kmpp") {
  197. ks = kmpp(data, k);
  198. } else {
  199. ks = initial;
  200. }
  201. do {
  202. // Reset k count
  203. init(k, 0, count);
  204. // For each value in data, find the nearest centroid
  205. for (var i = 0; i < len; i++) {
  206. var min = Infinity,
  207. _idx = 0;
  208. for (var j = 0; j < k; j++) {
  209. // Multidimensional or unidimensional
  210. var dist = multi ? eudist(data[i], ks[j]) : Math.abs(data[i] - ks[j]);
  211. if (dist <= min) {
  212. min = dist;
  213. _idx = j;
  214. }
  215. }
  216. idxs[i] = _idx; // Index of the selected centroid for that value
  217. count[_idx]++; // Number of values for this centroid
  218. }
  219. // Recalculate centroids
  220. var sum = [],
  221. old = [],
  222. dif = 0;
  223. for (var _j = 0; _j < k; _j++) {
  224. // Multidimensional or unidimensional
  225. sum[_j] = multi ? init(vlen, 0, sum[_j]) : 0;
  226. old[_j] = ks[_j];
  227. }
  228. // If multidimensional
  229. if (multi) {
  230. for (var _j2 = 0; _j2 < k; _j2++) {
  231. ks[_j2] = [];
  232. } // Sum values and count for each centroid
  233. for (var _i4 = 0; _i4 < len; _i4++) {
  234. var _idx2 = idxs[_i4],
  235. // Centroid for that item
  236. vsum = sum[_idx2],
  237. // Sum values for this centroid
  238. vect = data[_i4]; // Current vector
  239. // Accumulate value on the centroid for current vector
  240. for (var h = 0; h < vlen; h++) {
  241. vsum[h] += vect[h];
  242. }
  243. }
  244. // Calculate the average for each centroid
  245. conv = true;
  246. for (var _j3 = 0; _j3 < k; _j3++) {
  247. var ksj = ks[_j3],
  248. // Current centroid
  249. sumj = sum[_j3],
  250. // Accumulated centroid values
  251. oldj = old[_j3],
  252. // Old centroid value
  253. cj = count[_j3]; // Number of elements for this centroid
  254. // New average
  255. for (var _h = 0; _h < vlen; _h++) {
  256. ksj[_h] = sumj[_h] / cj || 0; // New centroid
  257. }
  258. // Find if centroids have moved
  259. if (conv) {
  260. for (var _h2 = 0; _h2 < vlen; _h2++) {
  261. if (oldj[_h2] != ksj[_h2]) {
  262. conv = false;
  263. break;
  264. }
  265. }
  266. }
  267. }
  268. }
  269. // If unidimensional
  270. else {
  271. // Sum values and count for each centroid
  272. for (var _i5 = 0; _i5 < len; _i5++) {
  273. var _idx3 = idxs[_i5];
  274. sum[_idx3] += data[_i5];
  275. }
  276. // Calculate the average for each centroid
  277. for (var _j4 = 0; _j4 < k; _j4++) {
  278. ks[_j4] = sum[_j4] / count[_j4] || 0; // New centroid
  279. }
  280. // Find if centroids have moved
  281. conv = true;
  282. for (var _j5 = 0; _j5 < k; _j5++) {
  283. if (old[_j5] != ks[_j5]) {
  284. conv = false;
  285. break;
  286. }
  287. }
  288. }
  289. conv = conv || --it <= 0;
  290. } while (!conv);
  291. return {
  292. it: MAX - it,
  293. k: k,
  294. idxs: idxs,
  295. centroids: ks
  296. };
  297. }
  298. module.exports = skmeans;
  299. }, { "./distance.js": 2, "./kinit.js": 3 }] }, {}, [1]);
  300. //# sourceMappingURL=skmeans.js.map