欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

基于密度聚类DBSCAN

程序员文章站 2022-07-03 11:42:58
...
/*
DBSCAN Algorithm
15S103182
Ethan
*/
#include <iostream>
#include <sstream>
#include <fstream>
#include <vector>
#include <ctime>
#include <cstdlib>
#include <limits>
#include <cmath>
#include <stack>
using namespace std;
class point{
public:
	float x;
	float y;
	int cluster = 0;
	int pointType = 1;//1 noise 2 border 3 core
	int pts = 0;//points in MinPts 
	vector<int> corepts;
	int visited = 0;
	point(){}
	point(float a, float b, int c){
		x = a;
		y = b;
		cluster = c;
	}
};
float stringToFloat(string i){
	stringstream sf;
	float score = 0;
	sf << i;
	sf >> score;
	return score;
}
vector<point> openFile(const char* dataset){
	fstream file;
	file.open(dataset, ios::in);
	if (!file)
	{
		cout << "Open File Failed!" << endl;
		vector<point> a;
		return a;
	}
	vector<point> data;
	int i = 1;
	while (!file.eof()){
		string temp;
		file >> temp;
		int split = temp.find(',', 0);
		point p(stringToFloat(temp.substr(0, split)), stringToFloat(temp.substr(split + 1, temp.length() - 1)), i++);
		data.push_back(p);
	}
	file.close();
	cout << "successful!" << endl;
	return data;
}
float squareDistance(point a, point b){
	return sqrt((a.x - b.x)*(a.x - b.x) + (a.y - b.y)*(a.y - b.y));
}
void DBSCAN(vector<point> dataset, float Eps, int MinPts){
	int len = dataset.size();
	//calculate pts
	cout << "calculate pts" << endl;
	for (int i = 0; i<len; i++){
		for (int j = i + 1; j<len; j++){
			if (squareDistance(dataset[i], dataset[j])<Eps)
				dataset[i].pts++;
			dataset[j].pts++;
		}
	}
	//core point 
	cout << "core point " << endl;
	vector<point> corePoint;
	for (int i = 0; i<len; i++){
		if (dataset[i].pts >= MinPts) {
			dataset[i].pointType = 3;
			corePoint.push_back(dataset[i]);
		}
	}
	cout << "joint core point" << endl;
	//joint core point
	for (int i = 0; i<corePoint.size(); i++){
		for (int j = i + 1; j<corePoint.size(); j++){
			if (squareDistance(corePoint[i], corePoint[j])<Eps){
				corePoint[i].corepts.push_back(j);
				corePoint[j].corepts.push_back(i);
			}
		}
	}
	for (int i = 0; i<corePoint.size(); i++){
		stack<point*> ps;
		if (corePoint[i].visited == 1)
			continue;
		ps.push(&corePoint[i]);
		point *v;
		while (!ps.empty()){
			v = ps.top();
			v->visited = 1;
			ps.pop();
			for (int j = 0; j<v->corepts.size(); j++){
				if (corePoint[v->corepts[j]].visited == 1) continue;
				corePoint[v->corepts[j]].cluster = corePoint[i].cluster;
				corePoint[v->corepts[j]].visited = 1;
				ps.push(&corePoint[v->corepts[j]]);
			}
		}
	}
	cout << "border point,joint border point to core point" << endl;
	//border point,joint border point to core point
	for (int i = 0; i<len; i++){
		if (dataset[i].pointType == 3) continue;
		for (int j = 0; j<corePoint.size(); j++){
			if (squareDistance(dataset[i], corePoint[j])<Eps) {
				dataset[i].pointType = 2;
				dataset[i].cluster = corePoint[j].cluster;
				break;
			}
		}
	}
	cout << "output" << endl;
	//output
	fstream clustering;
	clustering.open("clustering.txt", ios::out);
	for (int i = 0; i<len; i++){
		if (dataset[i].pointType == 2){
			clustering << dataset[i].x << "," << dataset[i].y << "," << dataset[i].cluster << "\n";
			//cout << dataset[i].x << "," << dataset[i].y << "," << dataset[i].cluster << "\n";
		}
	}
	for (int i = 0; i<corePoint.size(); i++){
		clustering << corePoint[i].x << "," << corePoint[i].y << "," << corePoint[i].cluster << "\n";
		cout << corePoint[i].x << "," << corePoint[i].y << "," << corePoint[i].cluster << "\n";
	}
	clustering.close();
}
int main(int argc, char** argv) {
	vector<point> dataset = openFile("dataset3.txt");    //坐标集
	DBSCAN(dataset, 2, 5);
	return 0;
}