main.js 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. /*jshint esversion: 6 */
  2. const
  3. Distance = require("./distance.js"),
  4. ClusterInit = require("./kinit.js"),
  5. eudist = Distance.eudist,
  6. mandist = Distance.mandist,
  7. dist = Distance.dist,
  8. kmrand = ClusterInit.kmrand,
  9. kmpp = ClusterInit.kmpp;
  10. const MAX = 10000;
  11. /**
  12. * Inits an array with values
  13. */
  14. function init(len,val,v) {
  15. v = v || [];
  16. for(let i=0;i<len;i++) v[i] = val;
  17. return v;
  18. }
  19. function skmeans(data,k,initial,maxit) {
  20. var ks = [], old = [], idxs = [], dist = [];
  21. var conv = false, it = maxit || MAX;
  22. var len = data.length, vlen = data[0].length, multi = vlen>0;
  23. var count = [];
  24. if(!initial) {
  25. let idxs = {};
  26. while(ks.length<k) {
  27. let idx = Math.floor(Math.random()*len);
  28. if(!idxs[idx]) {
  29. idxs[idx] = true;
  30. ks.push(data[idx]);
  31. }
  32. }
  33. }
  34. else if(initial=="kmrand") {
  35. ks = kmrand(data,k);
  36. }
  37. else if(initial=="kmpp") {
  38. ks = kmpp(data,k);
  39. }
  40. else {
  41. ks = initial;
  42. }
  43. do {
  44. // Reset k count
  45. init(k,0,count);
  46. // For each value in data, find the nearest centroid
  47. for(let i=0;i<len;i++) {
  48. let min = Infinity, idx = 0;
  49. for(let j=0;j<k;j++) {
  50. // Multidimensional or unidimensional
  51. var dist = multi? eudist(data[i],ks[j]) : Math.abs(data[i]-ks[j]);
  52. if(dist<=min) {
  53. min = dist;
  54. idx = j;
  55. }
  56. }
  57. idxs[i] = idx; // Index of the selected centroid for that value
  58. count[idx]++; // Number of values for this centroid
  59. }
  60. // Recalculate centroids
  61. var sum = [], old = [], dif = 0;
  62. for(let j=0;j<k;j++) {
  63. // Multidimensional or unidimensional
  64. sum[j] = multi? init(vlen,0,sum[j]) : 0;
  65. old[j] = ks[j];
  66. }
  67. // If multidimensional
  68. if(multi) {
  69. for(let j=0;j<k;j++) ks[j] = [];
  70. // Sum values and count for each centroid
  71. for(let i=0;i<len;i++) {
  72. let idx = idxs[i], // Centroid for that item
  73. vsum = sum[idx], // Sum values for this centroid
  74. vect = data[i]; // Current vector
  75. // Accumulate value on the centroid for current vector
  76. for(let h=0;h<vlen;h++) {
  77. vsum[h] += vect[h];
  78. }
  79. }
  80. // Calculate the average for each centroid
  81. conv = true;
  82. for(let j=0;j<k;j++) {
  83. let ksj = ks[j], // Current centroid
  84. sumj = sum[j], // Accumulated centroid values
  85. oldj = old[j], // Old centroid value
  86. cj = count[j]; // Number of elements for this centroid
  87. // New average
  88. for(let h=0;h<vlen;h++) {
  89. ksj[h] = (sumj[h])/(cj) || 0; // New centroid
  90. }
  91. // Find if centroids have moved
  92. if(conv) {
  93. for(let h=0;h<vlen;h++) {
  94. if(oldj[h]!=ksj[h]) {
  95. conv = false;
  96. break;
  97. }
  98. }
  99. }
  100. }
  101. }
  102. // If unidimensional
  103. else {
  104. // Sum values and count for each centroid
  105. for(let i=0;i<len;i++) {
  106. let idx = idxs[i];
  107. sum[idx] += data[i];
  108. }
  109. // Calculate the average for each centroid
  110. for(let j=0;j<k;j++) {
  111. ks[j] = sum[j]/count[j] || 0; // New centroid
  112. }
  113. // Find if centroids have moved
  114. conv = true;
  115. for(let j=0;j<k;j++) {
  116. if(old[j]!=ks[j]) {
  117. conv = false;
  118. break;
  119. }
  120. }
  121. }
  122. conv = conv || (--it<=0);
  123. }while(!conv);
  124. return {
  125. it : MAX-it,
  126. k : k,
  127. idxs : idxs,
  128. centroids : ks
  129. };
  130. }
  131. module.exports = skmeans;