123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213 |
- /**
- * KMEANS clustering
- *
- * @author Lukasz Krawczyk <contact@lukaszkrawczyk.eu>
- * @copyright MIT
- */
- /**
- * KMEANS class constructor
- * @constructor
- *
- * @param {Array} dataset
- * @param {number} k - number of clusters
- * @param {function} distance - distance function
- * @returns {KMEANS}
- */
- function KMEANS(dataset, k, distance) {
- this.k = 3; // number of clusters
- this.dataset = []; // set of feature vectors
- this.assignments = []; // set of associated clusters for each feature vector
- this.centroids = []; // vectors for our clusters
- this.init(dataset, k, distance);
- }
- /**
- * @returns {undefined}
- */
- KMEANS.prototype.init = function(dataset, k, distance) {
- this.assignments = [];
- this.centroids = [];
- if (typeof dataset !== 'undefined') {
- this.dataset = dataset;
- }
- if (typeof k !== 'undefined') {
- this.k = k;
- }
- if (typeof distance !== 'undefined') {
- this.distance = distance;
- }
- };
- /**
- * @returns {undefined}
- */
- KMEANS.prototype.run = function(dataset, k) {
- this.init(dataset, k);
- var len = this.dataset.length;
- // initialize centroids
- for (var i = 0; i < this.k; i++) {
- this.centroids[i] = this.randomCentroid();
- }
- var change = true;
- while(change) {
- // assign feature vectors to clusters
- change = this.assign();
- // adjust location of centroids
- for (var centroidId = 0; centroidId < this.k; centroidId++) {
- var mean = new Array(maxDim);
- var count = 0;
- // init mean vector
- for (var dim = 0; dim < maxDim; dim++) {
- mean[dim] = 0;
- }
- for (var j = 0; j < len; j++) {
- var maxDim = this.dataset[j].length;
- // if current cluster id is assigned to point
- if (centroidId === this.assignments[j]) {
- for (var dim = 0; dim < maxDim; dim++) {
- mean[dim] += this.dataset[j][dim];
- }
- count++;
- }
- }
- if (count > 0) {
- // if cluster contain points, adjust centroid position
- for (var dim = 0; dim < maxDim; dim++) {
- mean[dim] /= count;
- }
- this.centroids[centroidId] = mean;
- } else {
- // if cluster is empty, generate new random centroid
- this.centroids[centroidId] = this.randomCentroid();
- change = true;
- }
- }
- }
- return this.getClusters();
- };
- /**
- * Generate random centroid
- *
- * @returns {Array}
- */
- KMEANS.prototype.randomCentroid = function() {
- var maxId = this.dataset.length -1;
- var centroid;
- var id;
- do {
- id = Math.round(Math.random() * maxId);
- centroid = this.dataset[id];
- } while (this.centroids.indexOf(centroid) >= 0);
- return centroid;
- }
- /**
- * Assign points to clusters
- *
- * @returns {boolean}
- */
- KMEANS.prototype.assign = function() {
- var change = false;
- var len = this.dataset.length;
- var closestCentroid;
- for (var i = 0; i < len; i++) {
- closestCentroid = this.argmin(this.dataset[i], this.centroids, this.distance);
- if (closestCentroid != this.assignments[i]) {
- this.assignments[i] = closestCentroid;
- change = true;
- }
- }
- return change;
- }
- /**
- * Extract information about clusters
- *
- * @returns {undefined}
- */
- KMEANS.prototype.getClusters = function() {
- var clusters = new Array(this.k);
- var centroidId;
- for (var pointId = 0; pointId < this.assignments.length; pointId++) {
- centroidId = this.assignments[pointId];
- // init empty cluster
- if (typeof clusters[centroidId] === 'undefined') {
- clusters[centroidId] = [];
- }
- clusters[centroidId].push(pointId);
- }
- return clusters;
- };
- // utils
- /**
- * @params {Array} point
- * @params {Array.<Array>} set
- * @params {Function} f
- * @returns {number}
- */
- KMEANS.prototype.argmin = function(point, set, f) {
- var min = Number.MAX_VALUE;
- var arg = 0;
- var len = set.length;
- var d;
- for (var i = 0; i < len; i++) {
- d = f(point, set[i]);
- if (d < min) {
- min = d;
- arg = i;
- }
- }
- return arg;
- };
- /**
- * Euclidean distance
- *
- * @params {number} p
- * @params {number} q
- * @returns {number}
- */
- KMEANS.prototype.distance = function(p, q) {
- var sum = 0;
- var i = Math.min(p.length, q.length);
- while (i--) {
- var diff = p[i] - q[i];
- sum += diff * diff;
- }
- return Math.sqrt(sum);
- };
- if (typeof module !== 'undefined' && module.exports) {
- module.exports = KMEANS;
- }
|