1 | from math import log |
||
2 | from collections import Iterable |
||
3 | from itertools import chain |
||
4 | from numbers import Integral |
||
5 | |||
6 | import weakref |
||
7 | from .variable import * |
||
0 ignored issues
–
show
Coding Style
introduced
by
![]() |
|||
8 | import numpy as np |
||
0 ignored issues
–
show
The import
numpy could not be resolved.
This can be caused by one of the following: 1. Missing DependenciesThis error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml
before_commands:
- sudo pip install abc # Python2
- sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use
the command for the correct version.
2. Missing __init__.py filesThis error could also result from missing ![]() |
|||
9 | |||
10 | |||
11 | class DomainConversion: |
||
12 | """ |
||
13 | Indices and functions for conversion between domains. |
||
14 | |||
15 | Every list contains indices (instances of int) of variables in the |
||
16 | source domain, or the variable's compute_value function if the source |
||
17 | domain does not contain the variable. |
||
18 | |||
19 | .. attribute:: source |
||
20 | |||
21 | The source domain. The destination is not stored since destination |
||
22 | domain is the one which contains the instance of DomainConversion. |
||
23 | |||
24 | .. attribute:: attributes |
||
25 | |||
26 | Indices for attribute values. |
||
27 | |||
28 | .. attribute:: class_vars |
||
29 | |||
30 | Indices for class variables |
||
31 | |||
32 | .. attribute:: variables |
||
33 | |||
34 | Indices for attributes and class variables |
||
35 | (:obj:`attributes`+:obj:`class_vars`). |
||
36 | |||
37 | .. attribute:: metas |
||
38 | |||
39 | Indices for meta attributes |
||
40 | """ |
||
41 | |||
42 | def __init__(self, source, destination): |
||
43 | """ |
||
44 | Compute the conversion indices from the given `source` to `destination` |
||
45 | """ |
||
46 | self.source = source |
||
47 | |||
48 | self.attributes = [ |
||
49 | source.index(var) if var in source |
||
50 | else var.compute_value for var in destination.attributes] |
||
51 | self.class_vars = [ |
||
52 | source.index(var) if var in source |
||
53 | else var.compute_value for var in destination.class_vars] |
||
54 | self.variables = self.attributes + self.class_vars |
||
55 | self.metas = [ |
||
56 | source.index(var) if var in source |
||
57 | else var.compute_value for var in destination.metas] |
||
58 | |||
59 | |||
60 | class Domain: |
||
61 | def __init__(self, attributes, class_vars=None, metas=None, source=None): |
||
62 | """ |
||
63 | Initialize a new domain descriptor. Arguments give the features and |
||
64 | the class attribute(s). They can be described by descriptors (instances |
||
65 | of :class:`Variable`), or by indices or names if the source domain is |
||
66 | given. |
||
67 | |||
68 | :param attributes: a list of attributes |
||
69 | :type attributes: list of :class:`Variable` |
||
70 | :param class_vars: target variable or a list of target variables |
||
71 | :type class_vars: :class:`Variable` or list of :class:`Variable` |
||
72 | :param metas: a list of meta attributes |
||
73 | :type metas: list of :class:`Variable` |
||
74 | :param source: the source domain for attributes |
||
75 | :type source: Orange.data.Domain |
||
76 | :return: a new domain |
||
77 | :rtype: :class:`Domain` |
||
78 | """ |
||
79 | |||
80 | if class_vars is None: |
||
81 | class_vars = [] |
||
82 | elif isinstance(class_vars, (Variable, Integral, str)): |
||
83 | class_vars = [class_vars] |
||
84 | elif isinstance(class_vars, Iterable): |
||
85 | class_vars = list(class_vars) |
||
86 | |||
87 | if not isinstance(attributes, list): |
||
88 | attributes = list(attributes) |
||
89 | metas = list(metas) if metas else [] |
||
90 | |||
91 | # Replace str's and int's with descriptors if 'source' is given; |
||
92 | # complain otherwise |
||
93 | for lst in (attributes, class_vars, metas): |
||
94 | for i, var in enumerate(lst): |
||
95 | if not isinstance(var, Variable): |
||
96 | if source and isinstance(var, (str, int)): |
||
97 | lst[i] = source[var] |
||
98 | else: |
||
99 | raise TypeError( |
||
100 | "descriptors must be instances of Variable, " |
||
101 | "not '%s'" % type(var).__name__) |
||
102 | |||
103 | # Store everything |
||
104 | self.attributes = tuple(attributes) |
||
105 | self.class_vars = tuple(class_vars) |
||
106 | self._variables = self.attributes + self.class_vars |
||
107 | self._metas = tuple(metas) |
||
108 | self.class_var = \ |
||
109 | self.class_vars[0] if len(self.class_vars) == 1 else None |
||
110 | if not all(var.is_primitive() for var in self._variables): |
||
111 | raise TypeError("variables must be primitive") |
||
112 | |||
113 | self._indices = dict(chain.from_iterable( |
||
114 | ((var, idx), (var.name, idx), (idx, idx)) |
||
115 | for idx, var in enumerate(self._variables))) |
||
116 | self._indices.update(chain.from_iterable( |
||
117 | ((var, -1-idx), (var.name, -1-idx), (-1-idx, -1-idx)) |
||
118 | for idx, var in enumerate(self.metas))) |
||
119 | |||
120 | self.anonymous = False |
||
121 | self._known_domains = weakref.WeakKeyDictionary() |
||
122 | self._last_conversion = None |
||
123 | |||
124 | # noinspection PyPep8Naming |
||
125 | @classmethod |
||
126 | def from_numpy(cls, X, Y=None, metas=None): |
||
127 | """ |
||
128 | Create a domain corresponding to the given numpy arrays. This method |
||
129 | is usually invoked from :meth:`Orange.data.Table.from_numpy`. |
||
130 | |||
131 | All attributes are assumed to be continuous and are named |
||
132 | "Feature <n>". Target variables are discrete if all values are |
||
133 | integers between 0 and 19; otherwise they are continuous. Discrete |
||
134 | targets are named "Class <n>" and continuous are named "Target <n>". |
||
135 | Domain is marked as :attr:`anonymous`, so data from any other domain of |
||
136 | the same shape can be converted into this one and vice-versa. |
||
137 | |||
138 | :param `numpy.ndarray` X: 2-dimensional array with data |
||
139 | :param Y: 1- of 2- dimensional data for target |
||
140 | :type Y: `numpy.ndarray` or None |
||
141 | :param `numpy.ndarray` metas: meta attributes |
||
142 | :type metas: `numpy.ndarray` or None |
||
143 | :return: a new domain |
||
144 | :rtype: :class:`Domain` |
||
145 | """ |
||
146 | def get_places(max_index): |
||
147 | return 0 if max_index == 1 else int(log(max_index, 10)) + 1 |
||
148 | |||
149 | def get_name(base, index, places): |
||
150 | return base if not places \ |
||
151 | else "{} {:0{}}".format(base, index + 1, places) |
||
152 | |||
153 | if X.ndim != 2: |
||
154 | raise ValueError('X must be a 2-dimensional array') |
||
155 | n_attrs = X.shape[1] |
||
156 | places = get_places(n_attrs) |
||
157 | attr_vars = [ContinuousVariable(name=get_name("Feature", a, places)) |
||
158 | for a in range(n_attrs)] |
||
159 | class_vars = [] |
||
160 | if Y is not None: |
||
161 | if Y.ndim == 1: |
||
162 | Y = Y.reshape(len(Y), 1) |
||
163 | elif Y.ndim != 2: |
||
164 | raise ValueError('Y has invalid shape') |
||
165 | n_classes = Y.shape[1] |
||
166 | places = get_places(n_classes) |
||
167 | for i, class_ in enumerate(Y.T): |
||
168 | mn, mx = np.min(class_), np.max(class_) |
||
169 | if 0 <= mn <= mx <= 20: |
||
170 | values = np.unique(class_) |
||
171 | if all(int(x) == x and 0 <= x <= 19 for x in values): |
||
172 | mx = int(mx) |
||
173 | val_places = 1 + (mx >= 10) |
||
174 | values = ["v%*i" % (val_places, i + 1) |
||
175 | for i in range(mx + 1)] |
||
176 | name = get_name("Class", i, places) |
||
177 | class_vars.append(DiscreteVariable(name, values)) |
||
178 | continue |
||
179 | class_vars.append( |
||
180 | ContinuousVariable(name=get_name("Target", i + 1, places))) |
||
181 | if metas is not None: |
||
182 | n_metas = metas.shape[1] |
||
183 | places = get_places(n_metas) |
||
184 | meta_vars = [StringVariable(get_name("Meta", m, places)) |
||
185 | for m in range(n_metas)] |
||
186 | else: |
||
187 | meta_vars = [] |
||
188 | |||
189 | domain = cls(attr_vars, class_vars, meta_vars) |
||
190 | domain.anonymous = True |
||
191 | return domain |
||
192 | |||
193 | @property |
||
194 | def variables(self): |
||
195 | return self._variables |
||
196 | |||
197 | @property |
||
198 | def metas(self): |
||
199 | return self._metas |
||
200 | |||
201 | def __len__(self): |
||
202 | """The number of variables (features and class attributes).""" |
||
203 | return len(self._variables) |
||
204 | |||
205 | def __getitem__(self, idx): |
||
206 | """ |
||
207 | Return a variable descriptor from the given argument, which can be |
||
208 | a descriptor, index or name. If `var` is a descriptor, the function |
||
209 | returns this same object. |
||
210 | |||
211 | :param idx: index, name or descriptor |
||
212 | :type idx: int, str or :class:`Variable` |
||
213 | :return: an instance of :class:`Variable` described by `var` |
||
214 | :rtype: :class:`Variable` |
||
215 | """ |
||
216 | if isinstance(idx, slice): |
||
217 | return self._variables[idx] |
||
218 | |||
219 | idx = self._indices[idx] |
||
220 | if idx >= 0: |
||
221 | return self.variables[idx] |
||
222 | else: |
||
223 | return self.metas[-1-idx] |
||
224 | |||
225 | def __contains__(self, item): |
||
226 | """ |
||
227 | Return `True` if the item (`str`, `int`, :class:`Variable`) is |
||
228 | in the domain. |
||
229 | """ |
||
230 | return item in self._indices |
||
231 | |||
232 | def __iter__(self): |
||
233 | """ |
||
234 | Return an iterator through variables (features and class attributes). |
||
235 | """ |
||
236 | return iter(self._variables) |
||
237 | |||
238 | def __str__(self): |
||
239 | """ |
||
240 | Return a list-like string with the domain's features, class attributes |
||
241 | and meta attributes. |
||
242 | """ |
||
243 | s = "[" + ", ".join(attr.name for attr in self.attributes) |
||
244 | if self.class_vars: |
||
245 | s += " | " + ", ".join(cls.name for cls in self.class_vars) |
||
246 | s += "]" |
||
247 | if self._metas: |
||
248 | s += " {" + ", ".join(meta.name for meta in self._metas) + "}" |
||
249 | return s |
||
250 | |||
251 | __repr__ = __str__ |
||
252 | |||
253 | def __getstate__(self): |
||
254 | state = self.__dict__.copy() |
||
255 | state.pop("_known_domains", None) |
||
256 | return state |
||
257 | |||
258 | def __setstate__(self, state): |
||
259 | self.__dict__.update(state) |
||
260 | self._known_domains = weakref.WeakKeyDictionary() |
||
261 | |||
262 | def index(self, var): |
||
263 | """ |
||
264 | Return the index of the given variable or meta attribute, represented |
||
265 | with an instance of :class:`Variable`, `int` or `str`. |
||
266 | """ |
||
267 | |||
268 | try: |
||
269 | return self._indices[var] |
||
270 | except KeyError: |
||
271 | raise ValueError("'%s' is not in domain" % var) |
||
272 | |||
273 | def has_discrete_attributes(self, include_class=False): |
||
274 | """ |
||
275 | Return `True` if domain has any discrete attributes. If `include_class` |
||
276 | is set, the check includes the class attribute(s). |
||
277 | """ |
||
278 | if not include_class: |
||
279 | return any(var.is_discrete for var in self.attributes) |
||
280 | else: |
||
281 | return any(var.is_discrete for var in self.variables) |
||
282 | |||
283 | def has_continuous_attributes(self, include_class=False): |
||
284 | """ |
||
285 | Return `True` if domain has any continuous attributes. If |
||
286 | `include_class` is set, the check includes the class attribute(s). |
||
287 | """ |
||
288 | if not include_class: |
||
289 | return any(var.is_continuous for var in self.attributes) |
||
290 | else: |
||
291 | return any(var.is_continuous for var in self.variables) |
||
292 | |||
293 | @property |
||
294 | def has_continuous_class(self): |
||
295 | return self.class_var and self.class_var.is_continuous |
||
296 | |||
297 | @property |
||
298 | def has_discrete_class(self): |
||
299 | return bool(self.class_var and self.class_var.is_discrete) |
||
300 | |||
301 | def get_conversion(self, source): |
||
302 | """ |
||
303 | Return an instance of :class:`DomainConversion` for conversion from the |
||
304 | given source domain to this domain. Domain conversions are cached to |
||
305 | speed-up the conversion in the common case in which the domain |
||
306 | is based on another domain, for instance, when the domain contains |
||
307 | discretized variables from another domain. |
||
308 | |||
309 | :param source: the source domain |
||
310 | :type source: Orange.data.Domain |
||
311 | """ |
||
312 | # the method is thread-safe |
||
313 | c = self._last_conversion |
||
314 | if c and c.source is source: |
||
315 | return c |
||
316 | c = self._known_domains.get(source, None) |
||
317 | if not c: |
||
318 | c = DomainConversion(source, self) |
||
319 | self._known_domains[source] = self._last_conversion = c |
||
320 | return c |
||
321 | |||
322 | # noinspection PyProtectedMember |
||
323 | def convert(self, inst): |
||
324 | """ |
||
325 | Convert a data instance from another domain to this domain. |
||
326 | |||
327 | :param inst: The data instance to be converted |
||
328 | :return: The data instance in this domain |
||
329 | """ |
||
330 | from .instance import Instance |
||
331 | |||
332 | if isinstance(inst, Instance): |
||
333 | if inst.domain == self: |
||
334 | return inst._x, inst._y, inst._metas |
||
0 ignored issues
–
show
It seems like
_x was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() It seems like
_y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() It seems like
_metas was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
335 | c = self.get_conversion(inst.domain) |
||
336 | l = len(inst.domain.attributes) |
||
337 | values = [(inst._x[i] if 0 <= i < l |
||
0 ignored issues
–
show
It seems like
_x was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
338 | else inst._y[i - l] if i >= l |
||
0 ignored issues
–
show
It seems like
_y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
339 | else inst._metas[-i - 1]) |
||
0 ignored issues
–
show
It seems like
_metas was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
340 | if isinstance(i, int) |
||
341 | else (Unknown if not i else i(inst)) |
||
342 | for i in c.variables] |
||
343 | metas = [(inst._x[i] if 0 <= i < l |
||
0 ignored issues
–
show
It seems like
_x was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
344 | else inst._y[i - l] if i >= l |
||
0 ignored issues
–
show
It seems like
_y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
345 | else inst._metas[-i - 1]) |
||
0 ignored issues
–
show
It seems like
_metas was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
346 | if isinstance(i, int) |
||
347 | else (Unknown if not i else i(inst)) |
||
348 | for i in c.metas] |
||
349 | else: |
||
350 | nvars = len(self._variables) |
||
351 | nmetas = len(self._metas) |
||
352 | if len(inst) != nvars and len(inst) != nvars + nmetas: |
||
353 | raise ValueError("invalid data length for domain") |
||
354 | values = [var.to_val(val) |
||
355 | for var, val in zip(self._variables, inst)] |
||
356 | if len(inst) == nvars + nmetas: |
||
357 | metas = [var.to_val(val) |
||
358 | for var, val in zip(self._metas, inst[nvars:])] |
||
359 | else: |
||
360 | metas = [var.Unknown for var in self._metas] |
||
361 | nattrs = len(self.attributes) |
||
362 | # Let np.array decide dtype for values |
||
363 | return np.array(values[:nattrs]), np.array(values[nattrs:]),\ |
||
364 | np.array(metas, dtype=object) |
||
365 | |||
366 | def select_columns(self, col_idx): |
||
367 | attributes, col_indices = self._compute_col_indices(col_idx) |
||
368 | if attributes is not None: |
||
369 | n_attrs = len(self.attributes) |
||
370 | r_attrs = [attributes[i] |
||
371 | for i, col in enumerate(col_indices) |
||
372 | if 0 <= col < n_attrs] |
||
373 | r_classes = [attributes[i] |
||
374 | for i, col in enumerate(col_indices) |
||
375 | if col >= n_attrs] |
||
376 | r_metas = [attributes[i] |
||
377 | for i, col in enumerate(col_indices) if col < 0] |
||
378 | return Domain(r_attrs, r_classes, r_metas) |
||
379 | else: |
||
380 | return self |
||
381 | |||
382 | def _compute_col_indices(self, col_idx): |
||
383 | if col_idx is ...: |
||
384 | return None, None |
||
385 | if isinstance(col_idx, np.ndarray) and col_idx.dtype == bool: |
||
386 | return ([attr for attr, c in zip(self, col_idx) if c], |
||
387 | np.nonzero(col_idx)) |
||
388 | elif isinstance(col_idx, slice): |
||
389 | s = len(self.variables) |
||
390 | start, end, stride = col_idx.indices(s) |
||
391 | if col_idx.indices(s) == (0, s, 1): |
||
392 | return None, None |
||
393 | else: |
||
394 | return (self.variables[col_idx], |
||
395 | np.arange(start, end, stride)) |
||
396 | elif isinstance(col_idx, Iterable) and not isinstance(col_idx, str): |
||
397 | attributes = [self[col] for col in col_idx] |
||
398 | if attributes == self.attributes: |
||
399 | return None, None |
||
400 | return attributes, np.fromiter( |
||
401 | (self.index(attr) for attr in attributes), int) |
||
402 | elif isinstance(col_idx, Integral): |
||
403 | attr = self[col_idx] |
||
404 | else: |
||
405 | attr = self[col_idx] |
||
406 | col_idx = self.index(attr) |
||
407 | return [attr], np.array([col_idx]) |
||
408 | |||
409 | def checksum(self): |
||
410 | return hash(self) |
||
411 | |||
412 | def __eq__(self, other): |
||
413 | if not isinstance(other, Domain): |
||
414 | return False |
||
415 | |||
416 | return (self.attributes == other.attributes and |
||
417 | self.class_vars == other.class_vars and |
||
418 | self.metas == other.metas) |
||
419 | |||
420 | def __hash__(self): |
||
421 | return hash(self.attributes) ^ hash(self.class_vars) ^ hash(self.metas) |
||
422 |