diff --git a/README.md b/README.md index 5aed3c4..c599df3 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,9 @@ ##Additional Features - The algorithm also works on data points given by ***GPS coordinates***. Moreover, the algorithm can be used on data with a time dimension also. This allows for ***spatio-temporal*** clustering. + The algorithm also works on data points given by ***GPS coordinates***. Moreover, the algorithm can be used on data with a time dimension also. This allows for ***spatio-temporal*** clustering. + + Also automated calculation of eps and minPts supported. ##Usage 1. Import the script. @@ -123,6 +125,12 @@ To run the algorithm you need to provide the data along with the **eps** and **m // Configure a DBSCAN instance. var dbscanner = jDBSCAN().eps(0.075).minPts(1).distance('EUCLIDEAN').data(point_data); ``` +For the automated calculation of **eps** or **minPts** or both of them, see bellow. +Note: **Call .data(point_data) first** otherwise Eps, MinPts can't be estimated. +```javascript + // Configure a DBSCAN instance with automated Eps and MinPts. + var dbscanner = jDBSCAN().data(point_data).distance('HAVERSINE').autoMinPts().autoEps(); +``` The distance functions available are: **'EUCLIDEAN', 'HAVERSINE'** (for GPS data), **'MANHATTAN'**. Additionally you can provide your own distance function, which must accept at least two parameters (the two points), and passing it to the *distance* method. The next step is to simply run the clustering algorithm. diff --git a/example/automated_eps_minpts_example.html b/example/automated_eps_minpts_example.html new file mode 100644 index 0000000..17a9190 --- /dev/null +++ b/example/automated_eps_minpts_example.html @@ -0,0 +1,114 @@ + + + + + DBSCAN EXAMPLE + + + + + + + + +
+ + +
+ + + + diff --git a/jDBSCAN.js b/jDBSCAN.js index b4edd26..f554f7b 100644 --- a/jDBSCAN.js +++ b/jDBSCAN.js @@ -2,10 +2,19 @@ Author: Corneliu S. (github.com/upphiminn) 2013 */ + + /* + Author AutoEPS AutoMinPts: Olezt. (github.com/olezt) + 2016 + */ + (function () { jDBSCAN = function () { //Local instance vars. var eps; + var nearest = []; + var second_nearest = []; + var third_nearest = []; var time_eps; var minPts; var data = []; @@ -14,7 +23,7 @@ var graph = []; var distance = euclidean_distance; var time_distance = timestamp_distance; - + var MAXdistance=9999999999; //Utils function array_min(array, f) { var i = -1; @@ -114,6 +123,61 @@ return neighbours; } + + //count nearest neighbour for each point + //used for the eps estimation + function count_nearest_neighbour(point_idx) { + var d = data[point_idx]; + nearest[point_idx] = MAXdistance-2; + second_nearest[point_idx] = MAXdistance-1; + third_nearest[point_idx] = MAXdistance; + + for (var i = 0; i < data.length; i++) { + if (point_idx !== i) { + if (parseInt(distance(data[i], d)) <= parseInt(nearest[point_idx])) { + third_nearest[point_idx] = second_nearest[point_idx]; + second_nearest[point_idx] = nearest[point_idx]; + nearest[point_idx] = distance(data[i], d); + } + } + } + + } + + //automatically set eps + function set_eps(neighbour) { + var sum = 0; + for (var i = 0; i < neighbour.length; i++) { + sum += parseInt(neighbour[i], 10); + } + var eps = sum / neighbour.length; + eps=eps+10; //add 10 for better results + return eps; + } + + //automatically set minPts + function set_minpts() { + var minpts; + if (data.length < 100) { + minpts = 1; + } else if (data.length < 1000) { + minpts = parseInt(data.length / 100, 10) + 1; + }else{ + minpts = parseInt(data.length / 100, 10) - parseInt(data.length / 400, 10); + } + return minpts; + } + +function set_neighbour(neighbour) { + if(data.length<30){ + neighbour=nearest; + }else if(data.length<400){ + neighbour=second_nearest; + }else{ + neighbour=third_nearest; + } + return neighbour; + } function expand_cluster(point_idx, neighbours, cluster_idx) { clusters[cluster_idx - 1].push(point_idx); //add point to cluster @@ -140,7 +204,7 @@ var dbscan = function () { status = []; clusters = []; - + for (var i = 0; i < data.length; i++) { if (status[i] === undefined) { status[i] = 0; //visited and marked as noise by default @@ -155,7 +219,6 @@ } } } - return status; }; @@ -209,6 +272,21 @@ return clusters_centers; }; + + dbscan.autoMinPts = function () { + minPts = set_minpts(); + return dbscan; + } + + dbscan.autoEps = function () { + for (var i = 0; i < data.length; i++) { + count_nearest_neighbour(i); + } + var neighbour=[]; + neighbour=set_neighbour(neighbour); + eps = set_eps(neighbour); + return dbscan; + } //Getters and setters dbscan.data = function (d) {