GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.

Orange.preprocess.SelectBestFeatures.__init__()   A
last analyzed

Complexity

Conditions 1

Size

Total Lines 5

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 1
dl 0
loc 5
rs 9.4286
1
import random
2
import Orange
3
import numpy as np
0 ignored issues
show
Configuration introduced by
The import numpy could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
4
5
from itertools import takewhile
6
from operator import itemgetter
7
8
from Orange.preprocess.preprocess import Preprocess
9
from Orange.preprocess.score import ANOVA, GainRatio, UnivariateLinearRegression
10
from Orange.data import Domain
0 ignored issues
show
Unused Code introduced by
Unused Domain imported from Orange.data
Loading history...
11
12
__all__ = ["SelectBestFeatures", "RemoveNaNColumns", "SelectRandomFeatures"]
13
14
15
class SelectBestFeatures:
16
    """
17
    A feature selector that builds a new data set consisting of either the top
18
    `k` features or all those that exceed a given `threshold`. Features are
19
    scored using the provided feature scoring `method`. By default it is
20
    assumed that feature importance diminishes with decreasing scores.
21
22
    If both `k` and `threshold` are set, only features satisfying both
23
    conditions will be selected.
24
25
    If `method` is not set, it is automatically selected when presented with
26
    the data set. Data sets with both continuous and discrete features are
27
    scored using a method suitable for the majority of features.
28
29
    Parameters
30
    ----------
31
    method : Orange.preprocess.score.ClassificationScorer, Orange.preprocess.score.SklScorer
32
        Univariate feature scoring method.
33
34
    k : int
35
        The number of top features to select.
36
37
    threshold : float
38
        A threshold that a feature should meet according to the provided method.
39
40
    decreasing : boolean
41
        The order of feature importance when sorted from the most to the least
42
        important feature.
43
    """
44
45
    def __init__(self, method=None, k=None, threshold=None, decreasing=True):
46
        self.method = method
47
        self.k = k
48
        self.threshold = threshold
49
        self.decreasing = decreasing
50
51
    def __call__(self, data):
52
        method = self.method
53
        # select default method according to the provided data
54
        if method is None:
55
            autoMethod = True
0 ignored issues
show
Unused Code introduced by
The variable autoMethod seems to be unused.
Loading history...
56
            discr_ratio = (sum(a.is_discrete
57
                               for a in data.domain.attributes)
58
                           / len(data.domain.attributes))
59
            if data.domain.has_discrete_class:
60
                if discr_ratio >= 0.5:
61
                    method = GainRatio()
62
                else:
63
                    method = ANOVA()
64
            else:
65
                method = UnivariateLinearRegression()
66
67
        if not isinstance(data.domain.class_var, method.class_type):
68
            raise ValueError(("Scoring method {} requires a class variable " +
69
                              "of type {}.").format(
70
                (method if type(method) == type else type(method)).__name__,
71
                method.class_type.__name__)
72
            )
73
        features = data.domain.attributes
74
        try:
75
            scores = method(data)
76
        except ValueError:
77
            scores = self.score_only_nice_features(data, method)
78
        best = sorted(zip(scores, features), key=itemgetter(0),
79
                      reverse=self.decreasing)
80
        if self.k:
81
            best = best[:self.k]
82
        if self.threshold:
83
            pred = ((lambda x: x[0] >= self.threshold) if self.decreasing else
84
                    (lambda x: x[0] <= self.threshold))
85
            best = takewhile(pred, best)
86
87
        domain = Orange.data.Domain([f for s, f in best],
88
                                    data.domain.class_vars, data.domain.metas)
89
        return data.from_table(domain, data)
90
91
    def score_only_nice_features(self, data, method):
92
        mask = np.array([isinstance(a, method.feature_type)
93
                         for a in data.domain.attributes])
94
        features = [f for f in data.domain.attributes
95
                    if isinstance(f, method.feature_type)]
96
        scores = [method(data, f) for f in features]
97
        bad = float('-inf') if self.decreasing else float('inf')
98
        all_scores = np.array([bad] * len(data.domain.attributes))
99
        all_scores[mask] = scores
100
        return all_scores
101
102
103
class SelectRandomFeatures:
104
    """
105
    A feature selector that selects random `k` features from an input
106
    data set and returns a data set with selected features. Parameter
107
    `k` is either an integer (number of feature) or float (from 0.0 to
108
    1.0, proportion of retained features).
109
110
    Parameters
111
    ----------
112
113
    k : int or float (default = 0.1)
114
        The number or proportion of features to retain.
115
    """
116
117
    def __init__(self, k=0.1):
118
        self.k = k
119
120
    def __call__(self, data):
121
        if type(self.k) == float:
122
            self.k = int(len(data.domain.attributes) * self.k)
123
        domain = Orange.data.Domain(
124
            random.sample(data.domain.attributes, self.k),
125
            data.domain.class_vars, data.domain.metas)
126
        return data.from_table(domain, data)
127
128
129
class RemoveNaNColumns(Preprocess):
130
    """
131
    Removes data columns that contain only unknown values. Returns the
132
    resulting data set. Does not check optional class attribute(s).
133
134
    data : data table
135
        an input data table
136
    """
137
    def __call__(self, data):
138
        nan_col = np.all(np.isnan(data.X), axis=0)
139
        att = [a for a, nan in zip(data.domain.attributes, nan_col) if not nan]
140
        domain = Orange.data.Domain(att, data.domain.class_vars,
141
                                    data.domain.metas)
142
        return Orange.data.Table(domain, data)
143