GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.

Issues (4082)

Orange/statistics/contingency.py (10 issues)

1
import math
2
import numpy as np
0 ignored issues
show
The import numpy could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
3
from Orange import data
4
5
6
def _get_variable(variable, dat, attr_name,
7
                  expected_type=None, expected_name=""):
8
    failed = False
9
    if isinstance(variable, data.Variable):
10
        datvar = getattr(dat, "variable", None)
11
        if datvar is not None and datvar is not variable:
12
            raise ValueError("variable does not match the variable"
13
                             "in the data")
14
    elif hasattr(dat, "domain"):
15
        variable = dat.domain[variable]
16
    elif hasattr(dat, attr_name):
17
        variable = dat.variable
18
    else:
19
        failed = True
20
    if failed or (expected_type is not None and
21
                  not isinstance(variable, expected_type)):
22
        if not expected_type or isinstance(variable, data.Variable):
23
            raise ValueError(
24
                "expected %s variable not %s" % (expected_name, variable))
25
        else:
26
            raise ValueError("expected %s, not '%s'" %
27
                             (expected_type.__name__, type(variable).__name__))
28
    return variable
29
30
31
def create_discrete(cls, *args):
32
    return cls(*args)
33
34
35
class Discrete(np.ndarray):
36
    def __new__(cls, dat=None, col_variable=None, row_variable=None, unknowns=None, unknown_rows=None):
37
        if isinstance(dat, data.Storage):
38
            if unknowns is not None:
39
                raise TypeError(
40
                    "incompatible arguments (data storage and 'unknowns'")
41
            return cls.from_data(dat, col_variable, row_variable)
42
43
        if row_variable is not None:
44
            row_variable = _get_variable(row_variable, dat, "row_variable")
45
            rows = len(row_variable.values)
46
        else:
47
            rows = dat.shape[0]
48
        if col_variable is not None:
49
            col_variable = _get_variable(col_variable, dat, "col_variable")
50
            cols = len(col_variable.values)
51
        else:
52
            cols = dat.shape[1]
53
54
        self = super().__new__(cls, (rows, cols))
55
        self.row_variable = row_variable
56
        self.col_variable = col_variable
57
        if dat is None:
58
            self[:] = 0
59
            self.unknowns = unknowns or 0
60
            self.unknown_rows = unknown_rows or 0
61
        else:
62
            self[...] = dat
63
            self.unknowns = (unknowns if unknowns is not None
64
                             else getattr(dat, "unknowns", 0))
65
            self.unknown_rows = unknown_rows if unknown_rows is not None else 0
66
        return self
67
68
69
    @classmethod
70
    def from_data(cls, data, col_variable, row_variable=None):
0 ignored issues
show
Comprehensibility Bug introduced by
data is re-defining a name which is already available in the outer-scope (previously defined on line 3).

It is generally a bad practice to shadow variables from the outer-scope. In most cases, this is done unintentionally and might lead to unexpected behavior:

param = 5

class Foo:
    def __init__(self, param):   # "param" would be flagged here
        self.param = param
Loading history...
71
        if row_variable is None:
72
            row_variable = data.domain.class_var
73
            if row_variable is None:
74
                raise ValueError("row_variable needs to be specified (data "
75
                                 "has no class)")
76
        row_variable = _get_variable(row_variable, data, "row_variable")
77
        col_variable = _get_variable(col_variable, data, "col_variable")
78
        try:
79
            conts, unknown_rows = data._compute_contingency(
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _compute_contingency was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
80
                            [col_variable], row_variable)
81
            dist, unknowns = conts[0]
82
83
            self = super().__new__(cls, dist.shape)
84
            self[...] = dist
85
            self.unknowns = unknowns
86
            self.unknown_rows = unknown_rows
87
        except NotImplementedError:
88
            shape = len(row_variable.values), len(col_variable.values)
89
            self = super().__new__(cls, shape)
90
            self[...] = np.zeros(shape)
91
            self.unknowns = 0
92
            self.unknown_rows = 0
93
            rind = data.domain.index(row_variable)
94
            cind = data.domain.index(col_variable)
95
            for row in data:
96
                rval, cval = row[rind], row[cind]
97
                w = row.weight
98
                if math.isnan(rval):
99
                    self.unknown_rows += w
100
                    continue
101
                if math.isnan(cval):
102
                    self.unknowns[cval] += w
103
                else:
104
                    self[rval, cval] += w
105
        self.row_variable = row_variable
106
        self.col_variable = col_variable
107
        return self
108
109
110
    def __eq__(self, other):
111
        return np.array_equal(self, other) and (
112
            not hasattr(other, "unknowns") or
113
            np.array_equal(self.unknowns, other.unknowns))
114
115
116
    def __getitem__(self, index):
117
        if isinstance(index, str):
118
            if len(self.shape) == 2:  # contingency
119
                index = self.row_variable.to_val(index)
120
                contingency_row = super().__getitem__(index)
121
                contingency_row.col_variable = self.col_variable
122
                return contingency_row
123
            else:  # Contingency row
124
                column = self.strides == self.base.strides[:1]
125
                if column:
126
                    index = self.row_variable.to_val(index)
127
                else:
128
                    index = self.col_variable.to_val(index)
129
130
        elif isinstance(index, tuple):
131
            if isinstance(index[0], str):
132
                index = (self.row_variable.to_val(index[0]), index[1])
133
            if isinstance(index[1], str):
134
                index = (index[0], self.col_variable.to_val(index[1]))
135
        result = super().__getitem__(index)
136
        if result.strides:
137
            result.col_variable = self.col_variable
138
            result.row_variable = self.row_variable
139
        return result
140
141
    def __setitem__(self, index, value):
142
        if isinstance(index, str):
143
            index = self.row_variable.to_val(index)
144
        elif isinstance(index, tuple):
145
            if isinstance(index[0], str):
146
                index = (self.row_variable.to_val(index[0]), index[1])
147
            if isinstance(index[1], str):
148
                index = (index[0], self.col_variable.to_val(index[1]))
149
        super().__setitem__(index, value)
150
151
152
    def normalize(self, axis=None):
153
        t = np.sum(self, axis=axis)
154
        if t > 1e-6:
155
            self[:] /= t
156
            if axis is None or axis == 1:
157
                self.unknowns /= t
158
159
    def __reduce__(self):
160
        return create_discrete, (Discrete, np.copy(self), self.col_variable, self.row_variable, self.unknowns)
161
162
163
class Continuous:
0 ignored issues
show
This abstract class does not seem to be used anywhere.
Loading history...
164
    def __init__(self, dat=None, col_variable=None, row_variable=None,
0 ignored issues
show
You should not explicitly **return** in an __init__ method.

The __init__ method implicitly returns the current object instance. A return value apart from None is not supported.

Loading history...
165
                 unknowns=None, unknown_rows=None):
166
        if isinstance(dat, data.Storage):
167
            if unknowns is not None:
168
                raise TypeError(
169
                    "incompatible arguments (data storage and 'unknowns'")
170
            return self.from_data(dat, col_variable, row_variable)
171
172
        if row_variable is not None:
173
            row_variable = _get_variable(row_variable, dat, "row_variable")
174
        if col_variable is not None:
175
            col_variable = _get_variable(col_variable, dat, "col_variable")
176
177
        self.values, self.counts = dat
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are trying to unpack a non-sequence, which was defined at line 164.
Loading history...
178
179
        self.row_variable = row_variable
180
        self.col_variable = col_variable
181
        if unknowns is not None:
182
            self.unknowns = unknowns
183
        elif row_variable:
184
            self.unknowns = np.zeros(len(row_variable.values))
185
        else:
186
            self.unknowns = None
187
        if unknown_rows is not None:
188
            self.unknown_rows = unknown_rows
189
        elif row_variable:
190
            self.unknown_rows = 0
191
        else:
192
            self.unknown_rows = None
193
194
195
    def from_data(self, data, col_variable, row_variable=None):
0 ignored issues
show
Comprehensibility Bug introduced by
data is re-defining a name which is already available in the outer-scope (previously defined on line 3).

It is generally a bad practice to shadow variables from the outer-scope. In most cases, this is done unintentionally and might lead to unexpected behavior:

param = 5

class Foo:
    def __init__(self, param):   # "param" would be flagged here
        self.param = param
Loading history...
196
        if row_variable is None:
197
            row_variable = data.domain.class_var
198
            if row_variable is None:
199
                raise ValueError("row_variable needs to be specified (data"
200
                                 "has no class)")
201
        self.row_variable = _get_variable(row_variable, data, "row_variable")
202
        self.col_variable = _get_variable(col_variable, data, "col_variable")
203
        try:
204
            conts, self.unknown_rows = data._compute_contingency(
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _compute_contingency was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
205
                [col_variable], row_variable)
206
            (self.values, self.counts), self.unknowns = conts[0]
207
        except NotImplementedError:
208
            raise NotImplementedError("Fallback method for computation of "
209
                                      "contingencies is not implemented yet")
210
211
212
    def __eq__(self, other):
213
        return (np.array_equal(self.values, other.values) and
214
                np.array_equal(self.counts, other.counts) and
215
                (not hasattr(other, "unknowns") or
216
                 np.array_equal(self.unknowns, other.unknowns)))
217
218
219
    def __getitem__(self, index):
220
        """ Return contingencies for a given class value. """
221
        if isinstance(index, (str, float)):
222
            index = self.row_variable.to_val(index)
223
        C = self.counts[index]
224
        ind = C > 0
225
        return np.vstack((self.values[ind], C[ind]))
226
227
228
    def __len__(self):
229
        return self.counts.shape[0]
230
231
232
    def __setitem__(self, index, value):
233
        raise NotImplementedError("Setting individual class contingencies is "
234
                                  "not implemented yet. Set .values and .counts.")
235
236
237
    def normalize(self, axis=None):
238
        if axis is None:
239
            t = sum(np.sum(x[:, 1]) for x in self)
240
            if t > 1e-6:
241
                for x in self:
242
                    x[:, 1] /= t
243
        elif axis != 1:
244
            raise ValueError("contingencies can be normalized only with axis=1"
245
                             " or without axis")
246
        else:
247
            for i, x in enumerate(self):
248
                t = np.sum(x[:, 1])
249
                if t > 1e-6:
250
                    x[:, 1] /= t
251
                    self.unknowns[i] /= t
252
                else:
253
                    if self.unknowns[i] > 1e-6:
254
                        self.unknowns[i] = 1
255
256
257
def get_contingency(dat, col_variable, row_variable=None, unknowns=None, unknown_rows=None):
258
    variable = _get_variable(col_variable, dat, "col_variable")
259
    if variable.is_discrete:
260
        return Discrete(dat, col_variable, row_variable, unknowns, unknown_rows)
261
    elif variable.is_continuous:
262
        return Continuous(dat, col_variable, row_variable, unknowns, unknown_rows)
263
    else:
264
        raise TypeError("cannot compute distribution of '%s'" %
265
                        type(variable).__name__)
266
267
268
def get_contingencies(dat, skipDiscrete=False, skipContinuous=False):
269
    vars = dat.domain.attributes
0 ignored issues
show
Bug Best Practice introduced by
This seems to re-define the built-in vars.

It is generally discouraged to redefine built-ins as this makes code very hard to read.

Loading history...
270
    row_var = dat.domain.class_var
271
    if row_var is None:
272
        raise ValueError("data has no target variable")
273
    if skipDiscrete:
274
        if skipContinuous:
275
            return []
276
        columns = [i for i, var in enumerate(vars) if var.is_continuous]
277
    elif skipContinuous:
278
        columns = [i for i, var in enumerate(vars) if var.is_discrete]
279
    else:
280
        columns = None
281
    try:
282
        dist_unks, unknown_rows = dat._compute_contingency(columns)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _compute_contingency was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
283
        if columns is None:
284
            columns = np.arange(len(vars))
285
        contigs = []
286
        for col, (cont, unks) in zip(columns, dist_unks):
287
            contigs.append(get_contingency(cont, vars[col], row_var, unks, unknown_rows))
288
    except NotImplementedError:
289
        if columns is None:
290
            columns = range(len(vars))
291
        contigs = [get_contingency(dat, i) for i in columns]
292
    return contigs
293