1 | import random |
||
2 | import zlib |
||
3 | import math |
||
4 | from numbers import Real |
||
5 | import numpy as np |
||
0 ignored issues
–
show
|
|||
6 | from Orange import data |
||
7 | |||
8 | |||
9 | def _get_variable(dat, variable, expected_type=None, expected_name=""): |
||
10 | failed = False |
||
11 | if isinstance(variable, data.Variable): |
||
12 | datvar = getattr(dat, "variable", None) |
||
13 | if datvar is not None and datvar is not variable: |
||
14 | raise ValueError("variable does not match the variable" |
||
15 | "in the data") |
||
16 | elif hasattr(dat, "domain"): |
||
17 | variable = dat.domain[variable] |
||
18 | elif hasattr(dat, "variable"): |
||
19 | variable = dat.variable |
||
20 | else: |
||
21 | failed = True |
||
22 | if failed or (expected_type is not None |
||
23 | and not isinstance(variable, expected_type)): |
||
24 | if isinstance(variable, data.Variable): |
||
25 | raise ValueError( |
||
26 | "expected %s variable not %s" % (expected_name, variable)) |
||
27 | else: |
||
28 | raise ValueError("expected %s, not '%s'" % |
||
29 | (expected_type.__name__, type(variable).__name__)) |
||
30 | return variable |
||
31 | |||
32 | |||
33 | class Discrete(np.ndarray): |
||
34 | def __new__(cls, dat, variable=None, unknowns=None): |
||
35 | if isinstance(dat, data.Storage): |
||
36 | if unknowns is not None: |
||
37 | raise TypeError( |
||
38 | "incompatible arguments (data storage and 'unknowns'") |
||
39 | return cls.from_data(dat, variable) |
||
40 | |||
41 | if variable is not None: |
||
42 | variable = _get_variable(dat, variable) |
||
43 | n = len(variable.values) |
||
44 | else: |
||
45 | n = len(dat) |
||
46 | |||
47 | self = super().__new__(cls, n) |
||
48 | self.variable = variable |
||
49 | if dat is None: |
||
50 | self[:] = 0 |
||
51 | self.unknowns = unknowns or 0 |
||
52 | else: |
||
53 | self[:] = dat |
||
54 | self.unknowns = (unknowns if unknowns is not None |
||
55 | else getattr(dat, "unknowns", 0)) |
||
56 | return self |
||
57 | |||
58 | |||
59 | @classmethod |
||
60 | def from_data(cls, data, variable): |
||
0 ignored issues
–
show
data is re-defining a name which is already available in the outer-scope (previously defined on line 6 ).
It is generally a bad practice to shadow variables from the outer-scope. In most cases, this is done unintentionally and might lead to unexpected behavior: param = 5
class Foo:
def __init__(self, param): # "param" would be flagged here
self.param = param
![]() |
|||
61 | variable = _get_variable(data, variable) |
||
62 | try: |
||
63 | dist, unknowns = data._compute_distributions([variable])[0] |
||
0 ignored issues
–
show
It seems like
_compute_distributions was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
64 | self = super().__new__(cls, len(dist)) |
||
65 | self[:] = dist |
||
66 | self.unknowns = unknowns |
||
67 | except NotImplementedError: |
||
68 | self = super().__new__(cls, len(variable.values)) |
||
69 | self[:] = np.zeros(len(variable.values)) |
||
70 | self.unknowns = 0 |
||
71 | if data.has_weights(): |
||
72 | for val, w in zip(data[:, variable], data.W): |
||
73 | if not math.isnan(val): |
||
74 | self[val] += w |
||
75 | else: |
||
76 | self.unknowns += w |
||
77 | else: |
||
78 | for inst in data: |
||
79 | val = inst[variable] |
||
80 | if val == val: |
||
81 | self[val] += 1 |
||
82 | else: |
||
83 | self.unknowns += 1 |
||
84 | self.variable = variable |
||
85 | return self |
||
86 | |||
87 | |||
88 | def __eq__(self, other): |
||
89 | return np.array_equal(self, other) and ( |
||
90 | not hasattr(other, "unknowns") or self.unknowns == other.unknowns) |
||
91 | |||
92 | def __ne__(self, other): |
||
93 | return not self == other |
||
94 | |||
95 | def __getitem__(self, index): |
||
96 | if isinstance(index, str): |
||
97 | index = self.variable.to_val(index) |
||
98 | return super().__getitem__(index) |
||
99 | |||
100 | |||
101 | def __setitem__(self, index, value): |
||
102 | if isinstance(index, str): |
||
103 | index = self.variable.to_val(index) |
||
104 | super().__setitem__(index, value) |
||
105 | |||
106 | |||
107 | def __hash__(self): |
||
108 | return zlib.adler32(self) ^ hash(self.unknowns) |
||
109 | |||
110 | |||
111 | def __add__(self, other): |
||
112 | s = super().__add__(other) |
||
113 | s.unknowns = self.unknowns + getattr(other, "unknowns", 0) |
||
114 | return s |
||
115 | |||
116 | |||
117 | def __iadd__(self, other): |
||
118 | super().__iadd__(other) |
||
119 | self.unknowns += getattr(other, "unknowns", 0) |
||
120 | return self |
||
121 | |||
122 | |||
123 | def __sub__(self, other): |
||
124 | s = super().__sub__(other) |
||
125 | s.unknowns = self.unknowns - getattr(other, "unknowns", 0) |
||
126 | return s |
||
127 | |||
128 | |||
129 | def __isub__(self, other): |
||
130 | super().__isub__(other) |
||
131 | self.unknowns -= getattr(other, "unknowns", 0) |
||
132 | return self |
||
133 | |||
134 | |||
135 | def __mul__(self, other): |
||
136 | s = super().__mul__(other) |
||
137 | if isinstance(other, Real): |
||
138 | s.unknowns = self.unknowns / other |
||
139 | return s |
||
140 | |||
141 | |||
142 | def __imul__(self, other): |
||
143 | super().__imul__(other) |
||
144 | if isinstance(other, Real): |
||
145 | self.unknowns *= other |
||
146 | return self |
||
147 | |||
148 | |||
149 | def __div__(self, other): |
||
150 | s = super().__mul__(other) |
||
151 | if isinstance(other, Real): |
||
152 | s.unknowns = self.unknowns / other |
||
153 | return s |
||
154 | |||
155 | |||
156 | def __idiv__(self, other): |
||
157 | super().__imul__(other) |
||
158 | if isinstance(other, Real): |
||
159 | self.unknowns /= other |
||
160 | return self |
||
161 | |||
162 | |||
163 | def normalize(self): |
||
164 | t = np.sum(self) |
||
165 | if t > 1e-6: |
||
166 | self[:] /= t |
||
167 | self.unknowns /= t |
||
168 | elif self.shape[0]: |
||
169 | self[:] = 1 / self.shape[0] |
||
170 | |||
171 | |||
172 | def modus(self): |
||
173 | val = np.argmax(self) |
||
174 | return data.Value(self.variable, |
||
175 | val) if self.variable is not None else val |
||
176 | |||
177 | |||
178 | def random(self): |
||
179 | v = random.random() * np.sum(self) |
||
180 | s = i = 0 |
||
181 | for i, e in enumerate(self): |
||
182 | s += e |
||
183 | if s > v: |
||
184 | break |
||
185 | return data.Value(self.variable, i) if self.variable is not None else i |
||
186 | |||
187 | |||
188 | class Continuous(np.ndarray): |
||
189 | def __new__(cls, dat, variable=None, unknowns=None): |
||
190 | if isinstance(dat, data.Storage): |
||
191 | if unknowns is not None: |
||
192 | raise TypeError( |
||
193 | "incompatible arguments (data storage and 'unknowns'") |
||
194 | return cls.from_data(variable, dat) |
||
195 | if isinstance(dat, int): |
||
196 | self = super().__new__(cls, (2, dat)) |
||
197 | self[:] = 0 |
||
198 | self.unknowns = unknowns or 0 |
||
199 | else: |
||
200 | if not isinstance(dat, np.ndarray): |
||
201 | dat = np.asarray(dat) |
||
202 | self = super().__new__(cls, dat.shape) |
||
203 | self[:] = dat |
||
204 | self.unknowns = (unknowns if unknowns is not None |
||
205 | else getattr(dat, "unknowns", 0)) |
||
206 | self.variable = variable |
||
207 | return self |
||
208 | |||
209 | @classmethod |
||
210 | def from_data(cls, variable, data): |
||
0 ignored issues
–
show
data is re-defining a name which is already available in the outer-scope (previously defined on line 6 ).
It is generally a bad practice to shadow variables from the outer-scope. In most cases, this is done unintentionally and might lead to unexpected behavior: param = 5
class Foo:
def __init__(self, param): # "param" would be flagged here
self.param = param
![]() |
|||
211 | variable = _get_variable(data, variable) |
||
212 | try: |
||
213 | dist, unknowns = data._compute_distributions([variable])[0] |
||
0 ignored issues
–
show
It seems like
_compute_distributions was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
214 | except NotImplementedError: |
||
215 | col = data[:, variable] |
||
216 | dtype = col.dtype |
||
217 | if data.has_weights(): |
||
218 | if not "float" in dtype.name and "float" in col.dtype.name: |
||
219 | dtype = col.dtype.name |
||
220 | dist = np.empty((2, len(col)), dtype=dtype) |
||
221 | dist[0, :] = col |
||
222 | dist[1, :] = data.W |
||
223 | else: |
||
224 | dist = np.ones((2, len(col)), dtype=dtype) |
||
225 | dist[0, :] = col |
||
226 | dist.sort(axis=0) |
||
227 | dist = np.array(_orange.valuecount(dist)) |
||
228 | unknowns = len(col) - dist.shape[1] |
||
229 | |||
230 | self = super().__new__(cls, dist.shape) |
||
231 | self[:] = dist |
||
232 | self.unknowns = unknowns |
||
233 | self.variable = variable |
||
234 | return self |
||
235 | |||
236 | def __eq__(self, other): |
||
237 | return np.array_equal(self, other) and ( |
||
238 | not hasattr(other, "unknowns") or self.unknowns == other.unknowns) |
||
239 | |||
240 | def __hash__(self): |
||
241 | return zlib.adler32(self) ^ hash(self.unknowns) |
||
242 | |||
243 | def normalize(self): |
||
244 | t = np.sum(self[1, :]) |
||
245 | if t > 1e-6: |
||
246 | self[1, :] /= t |
||
247 | self.unknowns /= t |
||
248 | elif self.shape[1]: |
||
249 | self[1, :] = 1 / self.shape[1] |
||
250 | |||
251 | def modus(self): |
||
252 | val = np.argmax(self[1, :]) |
||
253 | return self[0, val] |
||
254 | |||
255 | # TODO implement __getitem__ that will return a normal array, not Continuous |
||
0 ignored issues
–
show
|
|||
256 | def min(self): |
||
257 | return self[0, 0] |
||
258 | |||
259 | def max(self): |
||
260 | return self[0, -1] |
||
261 | |||
262 | def random(self): |
||
263 | v = random.random() * np.sum(self[1, :]) |
||
264 | s = 0 |
||
265 | for x, prob in self.T: |
||
266 | s += prob |
||
267 | if s > v: |
||
268 | return x |
||
269 | |||
270 | def mean(self): |
||
271 | return np.average(self[0], weights=self[1]) |
||
272 | |||
273 | def variance(self): |
||
274 | avg = self.mean() |
||
275 | return sum([((x-avg)**2)*w for x, w in zip(self[0], self[1])])/sum(self[1]) |
||
276 | |||
277 | def standard_deviation(self): |
||
278 | return math.sqrt(self.variance()) |
||
279 | |||
280 | |||
281 | |||
282 | def class_distribution(data): |
||
0 ignored issues
–
show
data is re-defining a name which is already available in the outer-scope (previously defined on line 6 ).
It is generally a bad practice to shadow variables from the outer-scope. In most cases, this is done unintentionally and might lead to unexpected behavior: param = 5
class Foo:
def __init__(self, param): # "param" would be flagged here
self.param = param
![]() |
|||
283 | if data.domain.class_var: |
||
284 | return get_distribution(data, data.domain.class_var) |
||
285 | elif data.domain.class_vars: |
||
286 | return [get_distribution(cls, data) for cls in data.domain.class_vars] |
||
287 | else: |
||
288 | raise ValueError("domain has no class attribute") |
||
289 | |||
290 | |||
291 | def get_distribution(dat, variable, unknowns=None): |
||
292 | variable = _get_variable(dat, variable) |
||
293 | if variable.is_discrete: |
||
294 | return Discrete(dat, variable, unknowns) |
||
295 | elif variable.is_continuous: |
||
296 | return Continuous(dat, variable, unknowns) |
||
297 | else: |
||
298 | raise TypeError("cannot compute distribution of '%s'" % |
||
299 | type(variable).__name__) |
||
300 | |||
301 | |||
302 | def get_distributions(dat, skipDiscrete=False, skipContinuous=False): |
||
303 | vars = dat.domain.variables |
||
0 ignored issues
–
show
|
|||
304 | if skipDiscrete: |
||
305 | if skipContinuous: |
||
306 | return [] |
||
307 | columns = [i for i, var in enumerate(vars) if var.is_continuous] |
||
308 | elif skipContinuous: |
||
309 | columns = [i for i, var in enumerate(vars) if var.is_discrete] |
||
310 | else: |
||
311 | columns = None |
||
312 | try: |
||
313 | dist_unks = dat._compute_distributions(columns) |
||
0 ignored issues
–
show
It seems like
_compute_distributions was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
314 | if columns is None: |
||
315 | columns = np.arange(len(vars)) |
||
316 | distributions = [] |
||
317 | for col, (dist, unks) in zip(columns, dist_unks): |
||
318 | distributions.append(get_distribution(dist, vars[col], unks)) |
||
319 | except NotImplementedError: |
||
320 | if columns is None: |
||
321 | columns = np.arange(len(vars)) |
||
322 | distributions = [get_distribution(dat, i) for i in columns] |
||
323 | return distributions |
||
324 | |||
325 | |||
326 | def get_distributions_for_columns(data, columns): |
||
0 ignored issues
–
show
data is re-defining a name which is already available in the outer-scope (previously defined on line 6 ).
It is generally a bad practice to shadow variables from the outer-scope. In most cases, this is done unintentionally and might lead to unexpected behavior: param = 5
class Foo:
def __init__(self, param): # "param" would be flagged here
self.param = param
![]() |
|||
327 | """ |
||
328 | Compute the distributions for columns. |
||
329 | |||
330 | :param Orange.data.Table data: |
||
331 | :param list columns: |
||
332 | List of column indices into the `data.domain` (indices can be |
||
333 | :class:`int` or instances of `Orange.data.Variable`) |
||
334 | |||
335 | """ |
||
336 | domain = data.domain |
||
337 | # Normailze the columns to int indices |
||
338 | columns = [col if isinstance(col, int) else domain.index(col) |
||
339 | for col in columns] |
||
340 | try: |
||
341 | # Try the optimized code path (query the table|storage directly). |
||
342 | dist_unks = data._compute_distributions(columns) |
||
0 ignored issues
–
show
It seems like
_compute_distributions was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
343 | except NotImplementedError: |
||
344 | # Use default slow(er) implementation. |
||
345 | return [get_distribution(data, i) for i in columns] |
||
346 | else: |
||
347 | # dist_unkn is a list of (values, unknowns) |
||
348 | return [get_distribution(dist, domain[col], unknown) |
||
349 | for col, (dist, unknown) in zip(columns, dist_unks)] |
||
350 |
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.