Issues in clustering.py (master) - Issues in master - hugobuddel/orange3 - Measure and Improve Code Quality continuously with Scrutinizer

Issues (4082)

Orange/evaluation/clustering.py (5 issues)

Labels

Severity

import numpy as np
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
from sklearn.metrics import silhouette_score, adjusted_mutual_info_score, silhouette_samples
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from Orange.data import Table
from Orange.evaluation.testing import Results
from Orange.evaluation.scoring import Score


__all__ = ['ClusteringEvaluation']


class ClusteringResults(Results):
    def __init__(self, store_data=True, **kwargs):
        super().__init__(store_data=True, **kwargs)

    def get_fold(self, fold):
        results = ClusteringResults()
        results.data = self.data

        if self.folds is None:
            raise ValueError("This 'Results' instance does not have folds.")

        if self.models is not None:
            results.models = self.models[fold]

        results.row_indices = self.row_indices
        results.actual = self.actual
        results.predicted = self.predicted[:, fold, :]
        results.domain = self.domain
        return results


class ClusteringScore(Score):
    considers_actual = False

    def from_predicted(self, results, score_function):

        # Clustering scores from labels
        if self.considers_actual:
            return np.fromiter(
                (score_function(results.actual.flatten(), predicted.flatten())
                 for predicted in results.predicted),
                dtype=np.float64, count=len(results.predicted))
        # Clustering scores from data only
        else:
            return np.fromiter(
                (score_function(results.data.X, predicted.flatten())
                 for predicted in results.predicted),
                dtype=np.float64, count=len(results.predicted))


class Silhouette(ClusteringScore):
    separate_folds = True

    def compute_score(self, results):
        return self.from_predicted(results, silhouette_score)


class AdjustedMutualInfoScore(ClusteringScore):
    separate_folds = True
    considers_actual = True

    def compute_score(self, results):
        return self.from_predicted(results, adjusted_mutual_info_score)


class ClusteringEvaluation(ClusteringResults):
    """
    Clustering evaluation.

    If the constructor is given the data and a list of learning algorithms, it
    runs clustering and returns an instance of `Results` containing the
    predicted clustering labels.

    .. attribute:: k
        The number of runs.

    """
    def __init__(self, data, learners, k=1,
                 store_models=False):
        super().__init__(data=data, nmethods=len(learners), store_data=True,
                         store_models=store_models, predicted=None)

        self.k = k
        Y = data.Y.copy().flatten()


        self.predicted = np.empty((len(learners), self.k, len(data)))
        self.folds = range(k)
        self.row_indices = np.arange(len(data))
        self.actual = data.Y.flatten() if hasattr(data, "Y") else None

        if self.store_models:
            self.models = []

        for k in range(self.k):

            if self.store_models:
                fold_models = []
                self.models.append(fold_models)

            for i, learner in enumerate(learners):
                model = learner(data)
                if self.store_models:
                    fold_models.append(model)

                labels = model(data)
                self.predicted[i, k, :] = labels.X.flatten()



def graph_silhouette(X, y, xlim=None, colors=None, figsize=None, filename=None):
    """
    Silhouette plot.
    :param filename:
        Output file name.
    :param X Orange.data.Table or numpy.ndarray
        Data table.
    :param y Orange.data.Table or numpy.ndarray:
        Cluster labels (integers).
    :param colors list, optional (default = None):
            List of colors. If provided, it must equal the number of clusters.
    :param figsize tuple (float, float):
            Figure size (width, height) in inches.
    :param xlim tuple (float, float):
            Limit x-axis values.
    """
    import matplotlib.pyplot as plt
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

    if isinstance(X, Table):
        X = X.X
    if isinstance(y, Table):
        y = y.X
    y = y.ravel()

    # Detect number of clusters and set colors
    N = len(set(y))
    if isinstance(colors, type(None)) :
        colors = ["g" if i % 2 else "b" for i in range(N)]
    elif len(colors) != N:
        import sys
        sys.stderr.write("Number of colors does not match the number of clusters. \n")
        return

    # Silhouette coefficients
    s = silhouette_samples(X, y)
    s = s[np.argsort(y)]  # Sort by clusters
    parts = []
    # Within clusters sort by silhouette scores
    for label, (i, j) in enumerate([(sum(y == c1), sum(y == c1) + sum(y == c2))
                                    for c1, c2 in zip(range(-1, N-1), range(0, N))]):
        scores = sorted(s[i:j])
        parts.append((scores, label))

    # Plot data
    if figsize:
        plt.figure(figsize=figsize)
    else:
        plt.figure()
    plt.title("Silhouette score")
    total = 0
    centers = []
    for i, (scores, label) in enumerate(parts):
        plt.barh(range(total, total + len(scores)),
                 scores, color=colors[i], edgecolor=colors[i])
        centers.append(total+len(scores)/2)
        total += len(scores)
    if not isinstance(xlim, type(None)):
        plt.xlim(xlim)
    plt.yticks(centers)
    plt.gca().set_yticklabels(range(N))
    plt.ylabel("Cluster label")
    if filename:
        plt.savefig(filename)
        plt.close()
    else:
        plt.show()


1			import numpy as np
			0 ignored issues – show Configuration introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report Show Similar Issues like this The import `numpy` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
2			from sklearn.metrics import silhouette_score, adjusted_mutual_info_score, silhouette_samples
			0 ignored issues – show Configuration introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report Show Similar Issues like this The import `sklearn.metrics` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
3
4			from Orange.data import Table
5			from Orange.evaluation.testing import Results
6			from Orange.evaluation.scoring import Score
7
8
9			__all__ = ['ClusteringEvaluation']
10
11
12			class ClusteringResults(Results):
13			def __init__(self, store_data=True, **kwargs):
14			super().__init__(store_data=True, **kwargs)
15
16			def get_fold(self, fold):
17			results = ClusteringResults()
18			results.data = self.data
19
20			if self.folds is None:
21			raise ValueError("This 'Results' instance does not have folds.")
22
23			if self.models is not None:
24			results.models = self.models[fold]
25
26			results.row_indices = self.row_indices
27			results.actual = self.actual
28			results.predicted = self.predicted[:, fold, :]
29			results.domain = self.domain
30			return results
31
32
33			class ClusteringScore(Score):
34			considers_actual = False
35
36			def from_predicted(self, results, score_function):
			0 ignored issues – show Bug introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report Show Similar Issues like this Arguments number differs from overridden 'from_predicted' method Loading history...
37			# Clustering scores from labels
38			if self.considers_actual:
39			return np.fromiter(
40			(score_function(results.actual.flatten(), predicted.flatten())
41			for predicted in results.predicted),
42			dtype=np.float64, count=len(results.predicted))
43			# Clustering scores from data only
44			else:
45			return np.fromiter(
46			(score_function(results.data.X, predicted.flatten())
47			for predicted in results.predicted),
48			dtype=np.float64, count=len(results.predicted))
49
50
51			class Silhouette(ClusteringScore):
52			separate_folds = True
53
54			def compute_score(self, results):
55			return self.from_predicted(results, silhouette_score)
56
57
58			class AdjustedMutualInfoScore(ClusteringScore):
59			separate_folds = True
60			considers_actual = True
61
62			def compute_score(self, results):
63			return self.from_predicted(results, adjusted_mutual_info_score)
64
65
66			class ClusteringEvaluation(ClusteringResults):
67			"""
68			Clustering evaluation.
69
70			If the constructor is given the data and a list of learning algorithms, it
71			runs clustering and returns an instance of `Results` containing the
72			predicted clustering labels.
73
74			.. attribute:: k
75			The number of runs.
76
77			"""
78			def __init__(self, data, learners, k=1,
79			store_models=False):
80			super().__init__(data=data, nmethods=len(learners), store_data=True,
81			store_models=store_models, predicted=None)
82
83			self.k = k
84			Y = data.Y.copy().flatten()
			0 ignored issues – show Unused Code introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report Show Similar Issues like this The variable `Y` seems to be unused. Loading history...
85
86			self.predicted = np.empty((len(learners), self.k, len(data)))
87			self.folds = range(k)
88			self.row_indices = np.arange(len(data))
89			self.actual = data.Y.flatten() if hasattr(data, "Y") else None
90
91			if self.store_models:
92			self.models = []
93
94			for k in range(self.k):
95
96			if self.store_models:
97			fold_models = []
98			self.models.append(fold_models)
99
100			for i, learner in enumerate(learners):
101			model = learner(data)
102			if self.store_models:
103			fold_models.append(model)
104
105			labels = model(data)
106			self.predicted[i, k, :] = labels.X.flatten()
107
108
109
110			def graph_silhouette(X, y, xlim=None, colors=None, figsize=None, filename=None):
111			"""
112			Silhouette plot.
113			:param filename:
114			Output file name.
115			:param X Orange.data.Table or numpy.ndarray
116			Data table.
117			:param y Orange.data.Table or numpy.ndarray:
118			Cluster labels (integers).
119			:param colors list, optional (default = None):
120			List of colors. If provided, it must equal the number of clusters.
121			:param figsize tuple (float, float):
122			Figure size (width, height) in inches.
123			:param xlim tuple (float, float):
124			Limit x-axis values.
125			"""
126			import matplotlib.pyplot as plt
			0 ignored issues – show Configuration introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report Show Similar Issues like this The import `matplotlib.pyplot` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
127
128			if isinstance(X, Table):
129			X = X.X
130			if isinstance(y, Table):
131			y = y.X
132			y = y.ravel()
133
134			# Detect number of clusters and set colors
135			N = len(set(y))
136			if isinstance(colors, type(None)) :
137			colors = ["g" if i % 2 else "b" for i in range(N)]
138			elif len(colors) != N:
139			import sys
140			sys.stderr.write("Number of colors does not match the number of clusters. \n")
141			return
142
143			# Silhouette coefficients
144			s = silhouette_samples(X, y)
145			s = s[np.argsort(y)] # Sort by clusters
146			parts = []
147			# Within clusters sort by silhouette scores
148			for label, (i, j) in enumerate([(sum(y == c1), sum(y == c1) + sum(y == c2))
149			for c1, c2 in zip(range(-1, N-1), range(0, N))]):
150			scores = sorted(s[i:j])
151			parts.append((scores, label))
152
153			# Plot data
154			if figsize:
155			plt.figure(figsize=figsize)
156			else:
157			plt.figure()
158			plt.title("Silhouette score")
159			total = 0
160			centers = []
161			for i, (scores, label) in enumerate(parts):
162			plt.barh(range(total, total + len(scores)),
163			scores, color=colors[i], edgecolor=colors[i])
164			centers.append(total+len(scores)/2)
165			total += len(scores)
166			if not isinstance(xlim, type(None)):
167			plt.xlim(xlim)
168			plt.yticks(centers)
169			plt.gca().set_yticklabels(range(N))
170			plt.ylabel("Cluster label")
171			if filename:
172			plt.savefig(filename)
173			plt.close()
174			else:
175			plt.show()
176

GitHub Access Token became invalid

Issues (4082)

Orange/evaluation/clustering.py (5 issues)

Labels

Severity

Introduced By

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

hugobuddel / orange3

GitHub Access Token became invalid

Issues (4082)

Orange/evaluation/clustering.py (5 issues)

Labels

Severity

Introduced By

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files