DBSCAN.js 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. /**
  2. * DBSCAN - Density based clustering
  3. *
  4. * @author Lukasz Krawczyk <contact@lukaszkrawczyk.eu>
  5. * @copyright MIT
  6. */
  7. /**
  8. * DBSCAN class construcotr
  9. * @constructor
  10. *
  11. * @param {Array} dataset
  12. * @param {number} epsilon
  13. * @param {number} minPts
  14. * @param {function} distanceFunction
  15. * @returns {DBSCAN}
  16. */
  17. function DBSCAN(dataset, epsilon, minPts, distanceFunction) {
  18. /** @type {Array} */
  19. this.dataset = [];
  20. /** @type {number} */
  21. this.epsilon = 1;
  22. /** @type {number} */
  23. this.minPts = 2;
  24. /** @type {function} */
  25. this.distance = this._euclideanDistance;
  26. /** @type {Array} */
  27. this.clusters = [];
  28. /** @type {Array} */
  29. this.noise = [];
  30. // temporary variables used during computation
  31. /** @type {Array} */
  32. this._visited = [];
  33. /** @type {Array} */
  34. this._assigned = [];
  35. /** @type {number} */
  36. this._datasetLength = 0;
  37. this._init(dataset, epsilon, minPts, distanceFunction);
  38. };
  39. /******************************************************************************/
  40. // public functions
  41. /**
  42. * Start clustering
  43. *
  44. * @param {Array} dataset
  45. * @param {number} epsilon
  46. * @param {number} minPts
  47. * @param {function} distanceFunction
  48. * @returns {undefined}
  49. * @access public
  50. */
  51. DBSCAN.prototype.run = function(dataset, epsilon, minPts, distanceFunction) {
  52. this._init(dataset, epsilon, minPts, distanceFunction);
  53. for (var pointId = 0; pointId < this._datasetLength; pointId++) {
  54. // if point is not visited, check if it forms a cluster
  55. if (this._visited[pointId] !== 1) {
  56. this._visited[pointId] = 1;
  57. // if closest neighborhood is too small to form a cluster, mark as noise
  58. var neighbors = this._regionQuery(pointId);
  59. if (neighbors.length < this.minPts) {
  60. this.noise.push(pointId);
  61. } else {
  62. // create new cluster and add point
  63. var clusterId = this.clusters.length;
  64. this.clusters.push([]);
  65. this._addToCluster(pointId, clusterId);
  66. this._expandCluster(clusterId, neighbors);
  67. }
  68. }
  69. }
  70. return this.clusters;
  71. };
  72. /******************************************************************************/
  73. // protected functions
  74. /**
  75. * Set object properties
  76. *
  77. * @param {Array} dataset
  78. * @param {number} epsilon
  79. * @param {number} minPts
  80. * @param {function} distance
  81. * @returns {undefined}
  82. * @access protected
  83. */
  84. DBSCAN.prototype._init = function(dataset, epsilon, minPts, distance) {
  85. if (dataset) {
  86. if (!(dataset instanceof Array)) {
  87. throw Error('Dataset must be of type array, ' +
  88. typeof dataset + ' given');
  89. }
  90. this.dataset = dataset;
  91. this.clusters = [];
  92. this.noise = [];
  93. this._datasetLength = dataset.length;
  94. this._visited = new Array(this._datasetLength);
  95. this._assigned = new Array(this._datasetLength);
  96. }
  97. if (epsilon) {
  98. this.epsilon = epsilon;
  99. }
  100. if (minPts) {
  101. this.minPts = minPts;
  102. }
  103. if (distance) {
  104. this.distance = distance;
  105. }
  106. };
  107. /**
  108. * Expand cluster to closest points of given neighborhood
  109. *
  110. * @param {number} clusterId
  111. * @param {Array} neighbors
  112. * @returns {undefined}
  113. * @access protected
  114. */
  115. DBSCAN.prototype._expandCluster = function(clusterId, neighbors) {
  116. /**
  117. * It's very important to calculate length of neighbors array each time,
  118. * as the number of elements changes over time
  119. */
  120. for (var i = 0; i < neighbors.length; i++) {
  121. var pointId2 = neighbors[i];
  122. if (this._visited[pointId2] !== 1) {
  123. this._visited[pointId2] = 1;
  124. var neighbors2 = this._regionQuery(pointId2);
  125. if (neighbors2.length >= this.minPts) {
  126. neighbors = this._mergeArrays(neighbors, neighbors2);
  127. }
  128. }
  129. // add to cluster
  130. if (this._assigned[pointId2] !== 1) {
  131. this._addToCluster(pointId2, clusterId);
  132. }
  133. }
  134. };
  135. /**
  136. * Add new point to cluster
  137. *
  138. * @param {number} pointId
  139. * @param {number} clusterId
  140. */
  141. DBSCAN.prototype._addToCluster = function(pointId, clusterId) {
  142. this.clusters[clusterId].push(pointId);
  143. this._assigned[pointId] = 1;
  144. };
  145. /**
  146. * Find all neighbors around given point
  147. *
  148. * @param {number} pointId,
  149. * @param {number} epsilon
  150. * @returns {Array}
  151. * @access protected
  152. */
  153. DBSCAN.prototype._regionQuery = function(pointId) {
  154. var neighbors = [];
  155. for (var id = 0; id < this._datasetLength; id++) {
  156. var dist = this.distance(this.dataset[pointId], this.dataset[id]);
  157. if (dist < this.epsilon) {
  158. neighbors.push(id);
  159. }
  160. }
  161. return neighbors;
  162. };
  163. /******************************************************************************/
  164. // helpers
  165. /**
  166. * @param {Array} a
  167. * @param {Array} b
  168. * @returns {Array}
  169. * @access protected
  170. */
  171. DBSCAN.prototype._mergeArrays = function(a, b) {
  172. var len = b.length;
  173. for (var i = 0; i < len; i++) {
  174. var P = b[i];
  175. if (a.indexOf(P) < 0) {
  176. a.push(P);
  177. }
  178. }
  179. return a;
  180. };
  181. /**
  182. * Calculate euclidean distance in multidimensional space
  183. *
  184. * @param {Array} p
  185. * @param {Array} q
  186. * @returns {number}
  187. * @access protected
  188. */
  189. DBSCAN.prototype._euclideanDistance = function(p, q) {
  190. var sum = 0;
  191. var i = Math.min(p.length, q.length);
  192. while (i--) {
  193. sum += (p[i] - q[i]) * (p[i] - q[i]);
  194. }
  195. return Math.sqrt(sum);
  196. };
  197. if (typeof module !== 'undefined' && module.exports) {
  198. module.exports = DBSCAN;
  199. }