123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236 |
- /**
- * DBSCAN - Density based clustering
- *
- * @author Lukasz Krawczyk <contact@lukaszkrawczyk.eu>
- * @copyright MIT
- */
- /**
- * DBSCAN class construcotr
- * @constructor
- *
- * @param {Array} dataset
- * @param {number} epsilon
- * @param {number} minPts
- * @param {function} distanceFunction
- * @returns {DBSCAN}
- */
- function DBSCAN(dataset, epsilon, minPts, distanceFunction) {
- /** @type {Array} */
- this.dataset = [];
- /** @type {number} */
- this.epsilon = 1;
- /** @type {number} */
- this.minPts = 2;
- /** @type {function} */
- this.distance = this._euclideanDistance;
- /** @type {Array} */
- this.clusters = [];
- /** @type {Array} */
- this.noise = [];
- // temporary variables used during computation
- /** @type {Array} */
- this._visited = [];
- /** @type {Array} */
- this._assigned = [];
- /** @type {number} */
- this._datasetLength = 0;
- this._init(dataset, epsilon, minPts, distanceFunction);
- };
- /******************************************************************************/
- // public functions
- /**
- * Start clustering
- *
- * @param {Array} dataset
- * @param {number} epsilon
- * @param {number} minPts
- * @param {function} distanceFunction
- * @returns {undefined}
- * @access public
- */
- DBSCAN.prototype.run = function(dataset, epsilon, minPts, distanceFunction) {
- this._init(dataset, epsilon, minPts, distanceFunction);
- for (var pointId = 0; pointId < this._datasetLength; pointId++) {
- // if point is not visited, check if it forms a cluster
- if (this._visited[pointId] !== 1) {
- this._visited[pointId] = 1;
- // if closest neighborhood is too small to form a cluster, mark as noise
- var neighbors = this._regionQuery(pointId);
- if (neighbors.length < this.minPts) {
- this.noise.push(pointId);
- } else {
- // create new cluster and add point
- var clusterId = this.clusters.length;
- this.clusters.push([]);
- this._addToCluster(pointId, clusterId);
- this._expandCluster(clusterId, neighbors);
- }
- }
- }
- return this.clusters;
- };
- /******************************************************************************/
- // protected functions
- /**
- * Set object properties
- *
- * @param {Array} dataset
- * @param {number} epsilon
- * @param {number} minPts
- * @param {function} distance
- * @returns {undefined}
- * @access protected
- */
- DBSCAN.prototype._init = function(dataset, epsilon, minPts, distance) {
- if (dataset) {
- if (!(dataset instanceof Array)) {
- throw Error('Dataset must be of type array, ' +
- typeof dataset + ' given');
- }
- this.dataset = dataset;
- this.clusters = [];
- this.noise = [];
- this._datasetLength = dataset.length;
- this._visited = new Array(this._datasetLength);
- this._assigned = new Array(this._datasetLength);
- }
- if (epsilon) {
- this.epsilon = epsilon;
- }
- if (minPts) {
- this.minPts = minPts;
- }
- if (distance) {
- this.distance = distance;
- }
- };
- /**
- * Expand cluster to closest points of given neighborhood
- *
- * @param {number} clusterId
- * @param {Array} neighbors
- * @returns {undefined}
- * @access protected
- */
- DBSCAN.prototype._expandCluster = function(clusterId, neighbors) {
- /**
- * It's very important to calculate length of neighbors array each time,
- * as the number of elements changes over time
- */
- for (var i = 0; i < neighbors.length; i++) {
- var pointId2 = neighbors[i];
- if (this._visited[pointId2] !== 1) {
- this._visited[pointId2] = 1;
- var neighbors2 = this._regionQuery(pointId2);
- if (neighbors2.length >= this.minPts) {
- neighbors = this._mergeArrays(neighbors, neighbors2);
- }
- }
- // add to cluster
- if (this._assigned[pointId2] !== 1) {
- this._addToCluster(pointId2, clusterId);
- }
- }
- };
- /**
- * Add new point to cluster
- *
- * @param {number} pointId
- * @param {number} clusterId
- */
- DBSCAN.prototype._addToCluster = function(pointId, clusterId) {
- this.clusters[clusterId].push(pointId);
- this._assigned[pointId] = 1;
- };
- /**
- * Find all neighbors around given point
- *
- * @param {number} pointId,
- * @param {number} epsilon
- * @returns {Array}
- * @access protected
- */
- DBSCAN.prototype._regionQuery = function(pointId) {
- var neighbors = [];
- for (var id = 0; id < this._datasetLength; id++) {
- var dist = this.distance(this.dataset[pointId], this.dataset[id]);
- if (dist < this.epsilon) {
- neighbors.push(id);
- }
- }
- return neighbors;
- };
- /******************************************************************************/
- // helpers
- /**
- * @param {Array} a
- * @param {Array} b
- * @returns {Array}
- * @access protected
- */
- DBSCAN.prototype._mergeArrays = function(a, b) {
- var len = b.length;
- for (var i = 0; i < len; i++) {
- var P = b[i];
- if (a.indexOf(P) < 0) {
- a.push(P);
- }
- }
- return a;
- };
- /**
- * Calculate euclidean distance in multidimensional space
- *
- * @param {Array} p
- * @param {Array} q
- * @returns {number}
- * @access protected
- */
- DBSCAN.prototype._euclideanDistance = function(p, q) {
- var sum = 0;
- var i = Math.min(p.length, q.length);
- while (i--) {
- sum += (p[i] - q[i]) * (p[i] - q[i]);
- }
- return Math.sqrt(sum);
- };
- if (typeof module !== 'undefined' && module.exports) {
- module.exports = DBSCAN;
- }
|