1 | import math |
||
2 | import numpy as np |
||
0 ignored issues
–
show
|
|||
3 | from Orange import data |
||
4 | |||
5 | |||
6 | def _get_variable(variable, dat, attr_name, |
||
7 | expected_type=None, expected_name=""): |
||
8 | failed = False |
||
9 | if isinstance(variable, data.Variable): |
||
10 | datvar = getattr(dat, "variable", None) |
||
11 | if datvar is not None and datvar is not variable: |
||
12 | raise ValueError("variable does not match the variable" |
||
13 | "in the data") |
||
14 | elif hasattr(dat, "domain"): |
||
15 | variable = dat.domain[variable] |
||
16 | elif hasattr(dat, attr_name): |
||
17 | variable = dat.variable |
||
18 | else: |
||
19 | failed = True |
||
20 | if failed or (expected_type is not None and |
||
21 | not isinstance(variable, expected_type)): |
||
22 | if not expected_type or isinstance(variable, data.Variable): |
||
23 | raise ValueError( |
||
24 | "expected %s variable not %s" % (expected_name, variable)) |
||
25 | else: |
||
26 | raise ValueError("expected %s, not '%s'" % |
||
27 | (expected_type.__name__, type(variable).__name__)) |
||
28 | return variable |
||
29 | |||
30 | |||
31 | def create_discrete(cls, *args): |
||
32 | return cls(*args) |
||
33 | |||
34 | |||
35 | class Discrete(np.ndarray): |
||
36 | def __new__(cls, dat=None, col_variable=None, row_variable=None, unknowns=None, unknown_rows=None): |
||
37 | if isinstance(dat, data.Storage): |
||
38 | if unknowns is not None: |
||
39 | raise TypeError( |
||
40 | "incompatible arguments (data storage and 'unknowns'") |
||
41 | return cls.from_data(dat, col_variable, row_variable) |
||
42 | |||
43 | if row_variable is not None: |
||
44 | row_variable = _get_variable(row_variable, dat, "row_variable") |
||
45 | rows = len(row_variable.values) |
||
46 | else: |
||
47 | rows = dat.shape[0] |
||
48 | if col_variable is not None: |
||
49 | col_variable = _get_variable(col_variable, dat, "col_variable") |
||
50 | cols = len(col_variable.values) |
||
51 | else: |
||
52 | cols = dat.shape[1] |
||
53 | |||
54 | self = super().__new__(cls, (rows, cols)) |
||
55 | self.row_variable = row_variable |
||
56 | self.col_variable = col_variable |
||
57 | if dat is None: |
||
58 | self[:] = 0 |
||
59 | self.unknowns = unknowns or 0 |
||
60 | self.unknown_rows = unknown_rows or 0 |
||
61 | else: |
||
62 | self[...] = dat |
||
63 | self.unknowns = (unknowns if unknowns is not None |
||
64 | else getattr(dat, "unknowns", 0)) |
||
65 | self.unknown_rows = unknown_rows if unknown_rows is not None else 0 |
||
66 | return self |
||
67 | |||
68 | |||
69 | @classmethod |
||
70 | def from_data(cls, data, col_variable, row_variable=None): |
||
0 ignored issues
–
show
data is re-defining a name which is already available in the outer-scope (previously defined on line 3 ).
It is generally a bad practice to shadow variables from the outer-scope. In most cases, this is done unintentionally and might lead to unexpected behavior: param = 5
class Foo:
def __init__(self, param): # "param" would be flagged here
self.param = param
![]() |
|||
71 | if row_variable is None: |
||
72 | row_variable = data.domain.class_var |
||
73 | if row_variable is None: |
||
74 | raise ValueError("row_variable needs to be specified (data " |
||
75 | "has no class)") |
||
76 | row_variable = _get_variable(row_variable, data, "row_variable") |
||
77 | col_variable = _get_variable(col_variable, data, "col_variable") |
||
78 | try: |
||
79 | conts, unknown_rows = data._compute_contingency( |
||
0 ignored issues
–
show
It seems like
_compute_contingency was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
80 | [col_variable], row_variable) |
||
81 | dist, unknowns = conts[0] |
||
82 | |||
83 | self = super().__new__(cls, dist.shape) |
||
84 | self[...] = dist |
||
85 | self.unknowns = unknowns |
||
86 | self.unknown_rows = unknown_rows |
||
87 | except NotImplementedError: |
||
88 | shape = len(row_variable.values), len(col_variable.values) |
||
89 | self = super().__new__(cls, shape) |
||
90 | self[...] = np.zeros(shape) |
||
91 | self.unknowns = 0 |
||
92 | self.unknown_rows = 0 |
||
93 | rind = data.domain.index(row_variable) |
||
94 | cind = data.domain.index(col_variable) |
||
95 | for row in data: |
||
96 | rval, cval = row[rind], row[cind] |
||
97 | w = row.weight |
||
98 | if math.isnan(rval): |
||
99 | self.unknown_rows += w |
||
100 | continue |
||
101 | if math.isnan(cval): |
||
102 | self.unknowns[cval] += w |
||
103 | else: |
||
104 | self[rval, cval] += w |
||
105 | self.row_variable = row_variable |
||
106 | self.col_variable = col_variable |
||
107 | return self |
||
108 | |||
109 | |||
110 | def __eq__(self, other): |
||
111 | return np.array_equal(self, other) and ( |
||
112 | not hasattr(other, "unknowns") or |
||
113 | np.array_equal(self.unknowns, other.unknowns)) |
||
114 | |||
115 | |||
116 | def __getitem__(self, index): |
||
117 | if isinstance(index, str): |
||
118 | if len(self.shape) == 2: # contingency |
||
119 | index = self.row_variable.to_val(index) |
||
120 | contingency_row = super().__getitem__(index) |
||
121 | contingency_row.col_variable = self.col_variable |
||
122 | return contingency_row |
||
123 | else: # Contingency row |
||
124 | column = self.strides == self.base.strides[:1] |
||
125 | if column: |
||
126 | index = self.row_variable.to_val(index) |
||
127 | else: |
||
128 | index = self.col_variable.to_val(index) |
||
129 | |||
130 | elif isinstance(index, tuple): |
||
131 | if isinstance(index[0], str): |
||
132 | index = (self.row_variable.to_val(index[0]), index[1]) |
||
133 | if isinstance(index[1], str): |
||
134 | index = (index[0], self.col_variable.to_val(index[1])) |
||
135 | result = super().__getitem__(index) |
||
136 | if result.strides: |
||
137 | result.col_variable = self.col_variable |
||
138 | result.row_variable = self.row_variable |
||
139 | return result |
||
140 | |||
141 | def __setitem__(self, index, value): |
||
142 | if isinstance(index, str): |
||
143 | index = self.row_variable.to_val(index) |
||
144 | elif isinstance(index, tuple): |
||
145 | if isinstance(index[0], str): |
||
146 | index = (self.row_variable.to_val(index[0]), index[1]) |
||
147 | if isinstance(index[1], str): |
||
148 | index = (index[0], self.col_variable.to_val(index[1])) |
||
149 | super().__setitem__(index, value) |
||
150 | |||
151 | |||
152 | def normalize(self, axis=None): |
||
153 | t = np.sum(self, axis=axis) |
||
154 | if t > 1e-6: |
||
155 | self[:] /= t |
||
156 | if axis is None or axis == 1: |
||
157 | self.unknowns /= t |
||
158 | |||
159 | def __reduce__(self): |
||
160 | return create_discrete, (Discrete, np.copy(self), self.col_variable, self.row_variable, self.unknowns) |
||
161 | |||
162 | |||
163 | class Continuous: |
||
0 ignored issues
–
show
|
|||
164 | def __init__(self, dat=None, col_variable=None, row_variable=None, |
||
0 ignored issues
–
show
|
|||
165 | unknowns=None, unknown_rows=None): |
||
166 | if isinstance(dat, data.Storage): |
||
167 | if unknowns is not None: |
||
168 | raise TypeError( |
||
169 | "incompatible arguments (data storage and 'unknowns'") |
||
170 | return self.from_data(dat, col_variable, row_variable) |
||
171 | |||
172 | if row_variable is not None: |
||
173 | row_variable = _get_variable(row_variable, dat, "row_variable") |
||
174 | if col_variable is not None: |
||
175 | col_variable = _get_variable(col_variable, dat, "col_variable") |
||
176 | |||
177 | self.values, self.counts = dat |
||
0 ignored issues
–
show
|
|||
178 | |||
179 | self.row_variable = row_variable |
||
180 | self.col_variable = col_variable |
||
181 | if unknowns is not None: |
||
182 | self.unknowns = unknowns |
||
183 | elif row_variable: |
||
184 | self.unknowns = np.zeros(len(row_variable.values)) |
||
185 | else: |
||
186 | self.unknowns = None |
||
187 | if unknown_rows is not None: |
||
188 | self.unknown_rows = unknown_rows |
||
189 | elif row_variable: |
||
190 | self.unknown_rows = 0 |
||
191 | else: |
||
192 | self.unknown_rows = None |
||
193 | |||
194 | |||
195 | def from_data(self, data, col_variable, row_variable=None): |
||
0 ignored issues
–
show
data is re-defining a name which is already available in the outer-scope (previously defined on line 3 ).
It is generally a bad practice to shadow variables from the outer-scope. In most cases, this is done unintentionally and might lead to unexpected behavior: param = 5
class Foo:
def __init__(self, param): # "param" would be flagged here
self.param = param
![]() |
|||
196 | if row_variable is None: |
||
197 | row_variable = data.domain.class_var |
||
198 | if row_variable is None: |
||
199 | raise ValueError("row_variable needs to be specified (data" |
||
200 | "has no class)") |
||
201 | self.row_variable = _get_variable(row_variable, data, "row_variable") |
||
202 | self.col_variable = _get_variable(col_variable, data, "col_variable") |
||
203 | try: |
||
204 | conts, self.unknown_rows = data._compute_contingency( |
||
0 ignored issues
–
show
It seems like
_compute_contingency was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
205 | [col_variable], row_variable) |
||
206 | (self.values, self.counts), self.unknowns = conts[0] |
||
207 | except NotImplementedError: |
||
208 | raise NotImplementedError("Fallback method for computation of " |
||
209 | "contingencies is not implemented yet") |
||
210 | |||
211 | |||
212 | def __eq__(self, other): |
||
213 | return (np.array_equal(self.values, other.values) and |
||
214 | np.array_equal(self.counts, other.counts) and |
||
215 | (not hasattr(other, "unknowns") or |
||
216 | np.array_equal(self.unknowns, other.unknowns))) |
||
217 | |||
218 | |||
219 | def __getitem__(self, index): |
||
220 | """ Return contingencies for a given class value. """ |
||
221 | if isinstance(index, (str, float)): |
||
222 | index = self.row_variable.to_val(index) |
||
223 | C = self.counts[index] |
||
224 | ind = C > 0 |
||
225 | return np.vstack((self.values[ind], C[ind])) |
||
226 | |||
227 | |||
228 | def __len__(self): |
||
229 | return self.counts.shape[0] |
||
230 | |||
231 | |||
232 | def __setitem__(self, index, value): |
||
233 | raise NotImplementedError("Setting individual class contingencies is " |
||
234 | "not implemented yet. Set .values and .counts.") |
||
235 | |||
236 | |||
237 | def normalize(self, axis=None): |
||
238 | if axis is None: |
||
239 | t = sum(np.sum(x[:, 1]) for x in self) |
||
240 | if t > 1e-6: |
||
241 | for x in self: |
||
242 | x[:, 1] /= t |
||
243 | elif axis != 1: |
||
244 | raise ValueError("contingencies can be normalized only with axis=1" |
||
245 | " or without axis") |
||
246 | else: |
||
247 | for i, x in enumerate(self): |
||
248 | t = np.sum(x[:, 1]) |
||
249 | if t > 1e-6: |
||
250 | x[:, 1] /= t |
||
251 | self.unknowns[i] /= t |
||
252 | else: |
||
253 | if self.unknowns[i] > 1e-6: |
||
254 | self.unknowns[i] = 1 |
||
255 | |||
256 | |||
257 | def get_contingency(dat, col_variable, row_variable=None, unknowns=None, unknown_rows=None): |
||
258 | variable = _get_variable(col_variable, dat, "col_variable") |
||
259 | if variable.is_discrete: |
||
260 | return Discrete(dat, col_variable, row_variable, unknowns, unknown_rows) |
||
261 | elif variable.is_continuous: |
||
262 | return Continuous(dat, col_variable, row_variable, unknowns, unknown_rows) |
||
263 | else: |
||
264 | raise TypeError("cannot compute distribution of '%s'" % |
||
265 | type(variable).__name__) |
||
266 | |||
267 | |||
268 | def get_contingencies(dat, skipDiscrete=False, skipContinuous=False): |
||
269 | vars = dat.domain.attributes |
||
0 ignored issues
–
show
|
|||
270 | row_var = dat.domain.class_var |
||
271 | if row_var is None: |
||
272 | raise ValueError("data has no target variable") |
||
273 | if skipDiscrete: |
||
274 | if skipContinuous: |
||
275 | return [] |
||
276 | columns = [i for i, var in enumerate(vars) if var.is_continuous] |
||
277 | elif skipContinuous: |
||
278 | columns = [i for i, var in enumerate(vars) if var.is_discrete] |
||
279 | else: |
||
280 | columns = None |
||
281 | try: |
||
282 | dist_unks, unknown_rows = dat._compute_contingency(columns) |
||
0 ignored issues
–
show
It seems like
_compute_contingency was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
283 | if columns is None: |
||
284 | columns = np.arange(len(vars)) |
||
285 | contigs = [] |
||
286 | for col, (cont, unks) in zip(columns, dist_unks): |
||
287 | contigs.append(get_contingency(cont, vars[col], row_var, unks, unknown_rows)) |
||
288 | except NotImplementedError: |
||
289 | if columns is None: |
||
290 | columns = range(len(vars)) |
||
291 | contigs = [get_contingency(dat, i) for i in columns] |
||
292 | return contigs |
||
293 |
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.