1
|
|
|
import numpy as np |
|
|
|
|
2
|
|
|
import sklearn.cluster as skl_cluster |
|
|
|
|
3
|
|
|
from sklearn.metrics import silhouette_score |
|
|
|
|
4
|
|
|
|
5
|
|
|
from Orange.data import Table, DiscreteVariable, Domain, Instance |
6
|
|
|
from Orange.projection import SklProjector, Projection |
7
|
|
|
from Orange.distance import Euclidean |
8
|
|
|
|
9
|
|
|
|
10
|
|
|
__all__ = ["KMeans"] |
11
|
|
|
|
12
|
|
|
|
13
|
|
|
class KMeans(SklProjector): |
14
|
|
|
__wraps__ = skl_cluster.KMeans |
15
|
|
|
|
16
|
|
|
def __init__(self, n_clusters=8, init='k-means++', n_init=10, max_iter=300, |
|
|
|
|
17
|
|
|
tol=0.0001, random_state=None, preprocessors=None): |
|
|
|
|
18
|
|
|
super().__init__(preprocessors=preprocessors) |
19
|
|
|
self.params = vars() |
20
|
|
|
|
21
|
|
|
def fit(self, X, Y=None): |
22
|
|
|
proj = skl_cluster.KMeans(**self.params) |
23
|
|
|
if isinstance(X, Table): |
24
|
|
|
proj = proj.fit(X.X, Y) |
25
|
|
|
proj.silhouette = silhouette_score(X.X, proj.labels_) |
26
|
|
|
else: |
27
|
|
|
proj = proj.fit(X, Y) |
28
|
|
|
proj.silhouette = silhouette_score(X, proj.labels_) |
29
|
|
|
proj.inertia = proj.inertia_ / len(X) |
30
|
|
|
cluster_dist = Euclidean(proj.cluster_centers_) |
31
|
|
|
proj.inter_cluster = np.mean(cluster_dist[np.triu_indices_from(cluster_dist, 1)]) |
32
|
|
|
return KMeansModel(proj, self.preprocessors) |
33
|
|
|
|
34
|
|
|
|
35
|
|
|
class KMeansModel(Projection): |
36
|
|
|
def __init__(self, proj, preprocessors=None): |
|
|
|
|
37
|
|
|
super().__init__(proj=proj) |
38
|
|
|
self.k = self.proj.get_params()["n_clusters"] |
39
|
|
|
self.centroids = self.proj.cluster_centers_ |
40
|
|
|
|
41
|
|
|
def __call__(self, data): |
42
|
|
|
if isinstance(data, Table): |
43
|
|
|
if data.domain is not self.pre_domain: |
44
|
|
|
data = Table(self.pre_domain, data) |
45
|
|
|
c = DiscreteVariable(name='Cluster id', values=range(self.k)) |
46
|
|
|
domain = Domain([c]) |
47
|
|
|
return Table( |
48
|
|
|
domain, |
49
|
|
|
self.proj.predict(data.X).astype(int).reshape((len(data), 1))) |
50
|
|
|
elif isinstance(data, Instance): |
51
|
|
|
if data.domain is not self.pre_domain: |
52
|
|
|
data = Instance(self.pre_domain, data) |
53
|
|
|
c = DiscreteVariable(name='Cluster id', values=range(self.k)) |
54
|
|
|
domain = Domain([c]) |
55
|
|
|
return Table( |
56
|
|
|
domain, |
57
|
|
|
np.atleast_2d(self.proj.predict(data._x)).astype(int)) |
|
|
|
|
58
|
|
|
else: |
59
|
|
|
return self.proj.predict(data).reshape((len(data), 1)) |
60
|
|
|
|
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.