Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@

##Additional Features

The algorithm also works on data points given by ***GPS coordinates***. Moreover, the algorithm can be used on data with a time dimension also. This allows for ***spatio-temporal*** clustering.
The algorithm also works on data points given by ***GPS coordinates***. Moreover, the algorithm can be used on data with a time dimension also. This allows for ***spatio-temporal*** clustering.

Also automated calculation of eps and minPts supported.

##Usage
1. Import the script.
Expand Down Expand Up @@ -123,6 +125,12 @@ To run the algorithm you need to provide the data along with the **eps** and **m
// Configure a DBSCAN instance.
var dbscanner = jDBSCAN().eps(0.075).minPts(1).distance('EUCLIDEAN').data(point_data);
```
For the automated calculation of **eps** or **minPts** or both of them, see bellow.
Note: **Call .data(point_data) first** otherwise Eps, MinPts can't be estimated.
```javascript
// Configure a DBSCAN instance with automated Eps and MinPts.
var dbscanner = jDBSCAN().data(point_data).distance('HAVERSINE').autoMinPts().autoEps();
```
The distance functions available are: **'EUCLIDEAN', 'HAVERSINE'** (for GPS data), **'MANHATTAN'**.

Additionally you can provide your own distance function, which must accept at least two parameters (the two points), and passing it to the *distance* method. The next step is to simply run the clustering algorithm.
Expand Down
114 changes: 114 additions & 0 deletions example/automated_eps_minpts_example.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>DBSCAN EXAMPLE</title>
<script src="http://d3js.org/d3.v3.min.js" charset="utf-8"></script>
<script type="text/javascript" src="../jDBSCAN.js"></script>


<style>
.node {
stroke: #fff;
stroke-width: 1.5px;
}

.link {
stroke: #999;
stroke-opacity: .6;
}
</style>
</head>

<body>
<div id="content_wrapper">
<input type="button" value="Reset" id='reset_btn'/>
<input type="button" value="Run DBSCAN" id='dbscan_btn'/>
</div>
</body>

<script>

var width = 600;
var height = 600;

function rnd_snd() {
return (Math.random() * 2 - 1) + (Math.random() * 2 - 1) + (Math.random() * 2 - 1);
}

function rnd(mean, stdev) {
return Math.round(rnd_snd() * stdev + mean);
}

function generate_cluster_data() {
var num_clusters = 3;
var max_x_stdev = 10;
var max_y_stdev = 15;
var cluster_size = 30;

var raw_point_data = [];
var cluster_centers = [];
for (var i = 0; i < num_clusters; i++) {
cluster_centers.push({x: Math.random() * (width - 30), y: Math.random() * (height - 30)});
}

cluster_centers.forEach(function (d) {
for (var i = 0; i < cluster_size; i++) {
raw_point_data.push({x: rnd(d.x, max_x_stdev), y: rnd(d.y, max_y_stdev)});
}
});

return raw_point_data;
}

function plot_points(points) {
var node = svg.selectAll(".node")
.data(points)
.enter().append("circle")
.attr("class", "node")
.attr("r", 5)
.style("fill", '#a30500')
.attr('cx', function (d) {
return d.x;
})
.attr('cy', function (d) {
return d.y;
});
}

var svg = d3.select("body").append("svg")
.attr("width", width)
.attr("height", height);

var raw_point_data = generate_cluster_data();
console.log('Raw point data input', raw_point_data);
plot_points(raw_point_data);


d3.select('#dbscan_btn').on('click', function () {
var dbscanner = jDBSCAN().data(raw_point_data).distance('EUCLIDEAN').autoMinPts().autoEps();
var point_assignment_result = dbscanner();
console.log('Resulting DBSCAN output', point_assignment_result);

point_assignment_result.forEach(function (d, i) {
raw_point_data[i].cluster = d;
});

var color = d3.scale.category20().domain(d3.range([0, d3.max(point_assignment_result)]));
d3.selectAll('.node')
.data(raw_point_data)
.style('fill', function (d) {
return color(d.cluster);
})
});


d3.select('#reset_btn').on('click', function () {
svg.selectAll('.node').remove();
var raw_point_data = generate_cluster_data();
console.log('Raw point data input', raw_point_data);
plot_points(raw_point_data);
});

</script>
</html>
84 changes: 81 additions & 3 deletions jDBSCAN.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,19 @@
Author: Corneliu S. (github.com/upphiminn)
2013
*/

/*
Author AutoEPS AutoMinPts: Olezt. (github.com/olezt)
2016
*/

(function () {
jDBSCAN = function () {
//Local instance vars.
var eps;
var nearest = [];
var second_nearest = [];
var third_nearest = [];
var time_eps;
var minPts;
var data = [];
Expand All @@ -14,7 +23,7 @@
var graph = [];
var distance = euclidean_distance;
var time_distance = timestamp_distance;

var MAXdistance=9999999999;
//Utils
function array_min(array, f) {
var i = -1;
Expand Down Expand Up @@ -114,6 +123,61 @@

return neighbours;
}

//count nearest neighbour for each point
//used for the eps estimation
function count_nearest_neighbour(point_idx) {
var d = data[point_idx];
nearest[point_idx] = MAXdistance-2;
second_nearest[point_idx] = MAXdistance-1;
third_nearest[point_idx] = MAXdistance;

for (var i = 0; i < data.length; i++) {
if (point_idx !== i) {
if (parseInt(distance(data[i], d)) <= parseInt(nearest[point_idx])) {
third_nearest[point_idx] = second_nearest[point_idx];
second_nearest[point_idx] = nearest[point_idx];
nearest[point_idx] = distance(data[i], d);
}
}
}

}

//automatically set eps
function set_eps(neighbour) {
var sum = 0;
for (var i = 0; i < neighbour.length; i++) {
sum += parseInt(neighbour[i], 10);
}
var eps = sum / neighbour.length;
eps=eps+10; //add 10 for better results
return eps;
}

//automatically set minPts
function set_minpts() {
var minpts;
if (data.length < 100) {
minpts = 1;
} else if (data.length < 1000) {
minpts = parseInt(data.length / 100, 10) + 1;
}else{
minpts = parseInt(data.length / 100, 10) - parseInt(data.length / 400, 10);
}
return minpts;
}

function set_neighbour(neighbour) {
if(data.length<30){
neighbour=nearest;
}else if(data.length<400){
neighbour=second_nearest;
}else{
neighbour=third_nearest;
}
return neighbour;
}

function expand_cluster(point_idx, neighbours, cluster_idx) {
clusters[cluster_idx - 1].push(point_idx); //add point to cluster
Expand All @@ -140,7 +204,7 @@
var dbscan = function () {
status = [];
clusters = [];

for (var i = 0; i < data.length; i++) {
if (status[i] === undefined) {
status[i] = 0; //visited and marked as noise by default
Expand All @@ -155,7 +219,6 @@
}
}
}

return status;
};

Expand Down Expand Up @@ -209,6 +272,21 @@

return clusters_centers;
};

dbscan.autoMinPts = function () {
minPts = set_minpts();
return dbscan;
}

dbscan.autoEps = function () {
for (var i = 0; i < data.length; i++) {
count_nearest_neighbour(i);
}
var neighbour=[];
neighbour=set_neighbour(neighbour);
eps = set_eps(neighbour);
return dbscan;
}

//Getters and setters
dbscan.data = function (d) {
Expand Down