Adaptive DBSCAN achievement
I am doing the DBSCAN clustering in python. I want to achieve an adaptive way to return the number of clusters by self calculating its eps and Minpts parameters. Below is my code.
import math
import copy
import numpy as np
import pandas as pd
from sklearn.cluster import DBSCAN
def loadDataSet(fileName, splitChar='\t'):
dataSet = []
with open(fileName) as fr:
for line in fr.readlines():
curline = line.strip().split(splitChar)
fltline = list(map(float, curline))
dataSet.append(fltline)
return dataSet
def dist(a,b):
return math.sqrt(math.pow(a[0]-b[0],2) + math.pow(a[1]-b[1],2))
def returnDk(matrix,k):
Dk = []
for i in range(len(matrix)):
Dk.append(matrix[i][k])
return Dk
def returnDkAverage(Dk):
sum = 0
for i in range(len(Dk)):
sum = sum + Dk[i]
return sum/len(Dk)
def CalculateDistMatrix(dataset):
DistMatrix = [[0 for j in range(len(dataset))] for i in range(len(dataset))]
for i in range(len(dataset)):
for j in range(len(dataset)):
DistMatrix[i][j] = dist(dataset[i], dataset[j])
return DistMatrix
def returnEpsCandidate(dataSet):
DistMatrix = CalculateDistMatrix(dataSet)
tmp_matrix = copy.deepcopy(DistMatrix)
for i in range(len(tmp_matrix)):
tmp_matrix[i].sort()
EpsCandidate = []
for k in range(1,len(dataSet)):
Dk = returnDk(tmp_matrix,k)
DkAverage = returnDkAverage(Dk)
EpsCandidate.append(DkAverage)
return EpsCandidate
def returnMinptsCandidate(DistMatrix,EpsCandidate):
MinptsCandidate = []
for k in range(len(EpsCandidate)):
tmp_eps = EpsCandidate[k]
tmp_count = 0
for i in range(len(DistMatrix)):
for j in range(len(DistMatrix[i])):
if DistMatrix[i][j] <= tmp_eps:
tmp_count = tmp_count + 1
MinptsCandidate.append(tmp_count/len(dataSet))
return MinptsCandidate
def returnClusterNumberList(dataset,EpsCandidate,MinptsCandidate):
np_dataset = np.array(dataset)
ClusterNumberList = []
for i in range(len(EpsCandidate)):
clustering = DBSCAN(eps= EpsCandidate[i],min_samples= MinptsCandidate[i]).fit(np_dataset)
num_clustering = max(clustering.labels_)
ClusterNumberList.append(num_clustering)
return ClusterNumberList
if __name__ == '__main__':
data = pd.read_csv('/Users/Desktop/Mic/recorder_test1/New folder/MFCCresultsforclustering/MFCCresultsforclustering.csv')
dataSet = data.iloc[:,0:13].values
EpsCandidate = returnEpsCandidate(dataSet)
DistMatrix = CalculateDistMatrix(dataSet)
MinptsCandidate = returnMinptsCandidate(DistMatrix,EpsCandidate)
ClusterNumberList = returnClusterNumberList(dataSet,EpsCandidate,MinptsCandidate)
print(EpsCandidate)
print(MinptsCandidate)
print('cluster number list is')
print(ClusterNumberList)
However, the output with the loading data set is all [-1]s. I am wondering where is the mistake. Am I right for this general direction? If not, how can I achieve the adaptive DBSCAN clustering?
Categories
- All Categories
- 2.3K Anaplan Community
- Academy
- Anaplan Talent Builder
- Model Design Course
- The Anaplan Way
- Archive
- 2 Idea exchange
- 62 Enterprise Scale
- 1.1K Extensibility
- 21 Intelligence
- 1.6K Planning & Modeling
- 331 Security
- Community Connections
- Connections
- Experiences
- Groups
- Personas
- Employees
- CS Toolkit
- Customer Care Center
- Forums
- Academy & Training
- Community Feedback & Updates
- Japan
- Anaplan Community Japan
- Anaplan Community Japan Knowledge Base
- HyperCare Japan
- JP-Central
- Support-Japanese
- Partners
- Partner Leadership Council
- Partner Product Council
- 724 Platform
- Anapedia
- App Hub
- Centers Of Excellence
- Extensions
- Planual
- Platform Updates
- 724 User Experience
- Profile Builder
- Resources
- Anaplan Advocates
- Anaplan Live!
- Community
- Community Advancement
- Community Connections
- Partner Program
- The Official Master Anaplanner Program
- Videos
- Welcome to the Anaplan Community!
- Success Central
- Support
- Case Portal Link
- Common Support Questions
- HyperCare Redirect
- Known Issues and Workarounds
- Support test page
- SupportFAQ
- Survey
- 2 Training Day Takeaways