1 | import os |
||
2 | import zlib |
||
3 | from collections import MutableSequence, Iterable, Sequence, Sized |
||
4 | from itertools import chain |
||
5 | from numbers import Real, Integral |
||
6 | import operator |
||
7 | from functools import reduce |
||
8 | from warnings import warn |
||
9 | from threading import Lock |
||
10 | import tempfile |
||
11 | import urllib.parse |
||
12 | import urllib.request |
||
13 | |||
14 | import bottlechest as bn |
||
0 ignored issues
–
show
|
|||
15 | from scipy import sparse as sp |
||
0 ignored issues
–
show
The import
scipy could not be resolved.
This can be caused by one of the following: 1. Missing DependenciesThis error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml
before_commands:
- sudo pip install abc # Python2
- sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use
the command for the correct version.
2. Missing __init__.py filesThis error could also result from missing ![]() |
|||
16 | |||
17 | from .instance import * |
||
0 ignored issues
–
show
|
|||
18 | from Orange.util import flatten |
||
19 | from Orange.data import (Domain, io, Variable, StringVariable) |
||
20 | from Orange.data.storage import Storage |
||
21 | from . import _contingency |
||
0 ignored issues
–
show
|
|||
22 | from . import _valuecount |
||
0 ignored issues
–
show
|
|||
23 | |||
24 | |||
25 | def get_sample_datasets_dir(): |
||
26 | orange_data_table = os.path.dirname(__file__) |
||
27 | dataset_dir = os.path.join(orange_data_table, '..', 'datasets') |
||
28 | return os.path.realpath(dataset_dir) |
||
29 | |||
30 | |||
31 | dataset_dirs = ['', get_sample_datasets_dir()] |
||
32 | |||
33 | |||
34 | class RowInstance(Instance): |
||
35 | sparse_x = None |
||
36 | sparse_y = None |
||
37 | sparse_metas = None |
||
38 | _weight = None |
||
39 | |||
40 | def __init__(self, table, row_index): |
||
0 ignored issues
–
show
The
__init__ method of the super-class Instance is not called.
It is generally advisable to initialize the super-class by calling its class SomeParent:
def __init__(self):
self.x = 1
class SomeChild(SomeParent):
def __init__(self):
# Initialize the super class
SomeParent.__init__(self)
![]() |
|||
41 | """ |
||
42 | Construct a data instance representing the given row of the table. |
||
43 | """ |
||
44 | self.table = table |
||
45 | self._domain = table.domain |
||
46 | self.row_index = row_index |
||
47 | self.id = table.ids[row_index] |
||
48 | self._x = table.X[row_index] |
||
49 | if sp.issparse(self._x): |
||
50 | self.sparse_x = self._x |
||
51 | self._x = np.asarray(self._x.todense())[0] |
||
52 | self._y = table._Y[row_index] |
||
0 ignored issues
–
show
It seems like
_Y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
53 | if sp.issparse(self._y): |
||
54 | self.sparse_y = self._y |
||
55 | self._y = np.asarray(self._y.todense())[0] |
||
56 | self._metas = table.metas[row_index] |
||
57 | if sp.issparse(self._metas): |
||
58 | self.sparse_metas = self._metas |
||
59 | self._metas = np.asarray(self._metas.todense())[0] |
||
60 | |||
61 | @property |
||
62 | def weight(self): |
||
63 | if not self.table.has_weights(): |
||
64 | return 1 |
||
65 | return self.table.W[self.row_index] |
||
66 | |||
67 | @weight.setter |
||
68 | def weight(self, weight): |
||
0 ignored issues
–
show
|
|||
69 | if not self.table.has_weights(): |
||
70 | self.table.set_weights() |
||
71 | self.table.W[self.row_index] = weight |
||
72 | |||
73 | def set_class(self, value): |
||
74 | self._check_single_class() |
||
75 | if not isinstance(value, Real): |
||
76 | value = self.table.domain.class_var.to_val(value) |
||
77 | self._y[0] = value |
||
78 | if self.sparse_y: |
||
79 | self.table._Y[self.row_index, 0] = value |
||
0 ignored issues
–
show
It seems like
_Y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
80 | |||
81 | def __setitem__(self, key, value): |
||
82 | if not isinstance(key, Integral): |
||
83 | key = self._domain.index(key) |
||
84 | if isinstance(value, str): |
||
85 | var = self._domain[key] |
||
86 | value = var.to_val(value) |
||
87 | if key >= 0: |
||
88 | if not isinstance(value, Real): |
||
89 | raise TypeError("Expected primitive value, got '%s'" % |
||
90 | type(value).__name__) |
||
91 | if key < len(self._x): |
||
92 | self._x[key] = value |
||
93 | if self.sparse_x: |
||
94 | self.table.X[self.row_index, key] = value |
||
95 | else: |
||
96 | self._y[key - len(self._x)] = value |
||
97 | if self.sparse_y: |
||
98 | self.table._Y[self.row_index, key - len(self._x)] = value |
||
0 ignored issues
–
show
It seems like
_Y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
99 | else: |
||
100 | self._metas[-1 - key] = value |
||
101 | if self.sparse_metas: |
||
102 | self.table.metas[self.row_index, -1 - key] = value |
||
103 | |||
104 | def _str(self, limit): |
||
105 | def sp_values(matrix, variables): |
||
106 | if not sp.issparse(matrix): |
||
107 | return Instance.str_values(matrix[row], variables, limit) |
||
108 | begptr, endptr = matrix.indptr[row:row + 2] |
||
109 | rendptr = endptr if not limit else min(endptr, begptr + 5) |
||
110 | variables = [variables[var] |
||
111 | for var in matrix.indices[begptr:rendptr]] |
||
112 | s = ", ".join( |
||
113 | "{}={}".format(var.name, var.str_val(val)) |
||
114 | for var, val in zip(variables, matrix.data[begptr:rendptr])) |
||
115 | if limit and rendptr != endptr: |
||
116 | s += ", ..." |
||
117 | return s |
||
118 | |||
119 | table = self.table |
||
120 | domain = table.domain |
||
121 | row = self.row_index |
||
122 | s = "[" + sp_values(table.X, domain.attributes) |
||
123 | if domain.class_vars: |
||
124 | s += " | " + sp_values(table._Y, domain.class_vars) |
||
0 ignored issues
–
show
It seems like
_Y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
125 | s += "]" |
||
126 | if self._domain.metas: |
||
127 | s += " {" + sp_values(table.metas, domain.metas) + "}" |
||
128 | return s |
||
129 | |||
130 | def __str__(self): |
||
131 | return self._str(False) |
||
132 | |||
133 | def __repr__(self): |
||
134 | return self._str(True) |
||
135 | |||
136 | |||
137 | class Columns: |
||
138 | def __init__(self, domain): |
||
139 | for v in chain(domain, domain.metas): |
||
140 | setattr(self, v.name.replace(" ", "_"), v) |
||
141 | |||
142 | |||
143 | # noinspection PyPep8Naming |
||
144 | class Table(MutableSequence, Storage): |
||
145 | __file__ = None |
||
146 | |||
147 | @property |
||
148 | def columns(self): |
||
149 | """ |
||
150 | A class whose attributes contain attribute descriptors for columns. |
||
151 | For a table `table`, setting `c = table.columns` will allow accessing |
||
152 | the table's variables with, for instance `c.gender`, `c.age` ets. |
||
153 | Spaces are replaced with underscores. |
||
154 | """ |
||
155 | return Columns(self.domain) |
||
156 | |||
157 | _next_instance_id = 0 |
||
158 | _next_instance_lock = Lock() |
||
159 | |||
160 | @property |
||
161 | def Y(self): |
||
162 | if self._Y.shape[1] == 1: |
||
163 | return self._Y[:, 0] |
||
164 | return self._Y |
||
165 | |||
166 | @Y.setter |
||
167 | def Y(self, value): |
||
168 | if len(value.shape) == 1: |
||
169 | value = value[:, None] |
||
170 | self._Y = value |
||
0 ignored issues
–
show
|
|||
171 | |||
172 | def __new__(cls, *args, **kwargs): |
||
173 | if not args and not kwargs: |
||
174 | return super().__new__(cls) |
||
175 | |||
176 | if 'filename' in kwargs: |
||
177 | args = [kwargs.pop('filename')] |
||
178 | |||
179 | if not args: |
||
180 | raise TypeError( |
||
181 | "Table takes at least 1 positional argument (0 given))") |
||
182 | |||
183 | if isinstance(args[0], str): |
||
184 | if args[0].startswith('https://') or args[0].startswith('http://'): |
||
185 | return cls.from_url(args[0], **kwargs) |
||
186 | else: |
||
187 | return cls.from_file(args[0], **kwargs) |
||
188 | elif isinstance(args[0], Table): |
||
189 | return cls.from_table(args[0].domain, args[0]) |
||
190 | elif isinstance(args[0], Domain): |
||
191 | domain, args = args[0], args[1:] |
||
192 | if not args: |
||
193 | return cls.from_domain(domain, **kwargs) |
||
194 | if isinstance(args[0], Table): |
||
195 | return cls.from_table(domain, *args) |
||
196 | elif isinstance(args[0], list): |
||
197 | return cls.from_list(domain, *args) |
||
198 | else: |
||
199 | domain = None |
||
200 | |||
201 | return cls.from_numpy(domain, *args, **kwargs) |
||
202 | |||
203 | @classmethod |
||
204 | def from_domain(cls, domain, n_rows=0, weights=False): |
||
205 | """ |
||
206 | Construct a new `Table` with the given number of rows for the given |
||
207 | domain. The optional vector of weights is initialized to 1's. |
||
208 | |||
209 | :param domain: domain for the `Table` |
||
210 | :type domain: Orange.data.Domain |
||
211 | :param n_rows: number of rows in the new table |
||
212 | :type n_rows: int |
||
213 | :param weights: indicates whether to construct a vector of weights |
||
214 | :type weights: bool |
||
215 | :return: a new table |
||
216 | :rtype: Orange.data.Table |
||
217 | """ |
||
218 | #self = cls.__new__(Table) |
||
219 | self = cls() |
||
220 | self.domain = domain |
||
221 | self.n_rows = n_rows |
||
222 | self.X = np.zeros((n_rows, len(domain.attributes))) |
||
223 | self.Y = np.zeros((n_rows, len(domain.class_vars))) |
||
224 | if weights: |
||
225 | self.W = np.ones(n_rows) |
||
226 | else: |
||
227 | self.W = np.empty((n_rows, 0)) |
||
228 | self.metas = np.empty((n_rows, len(self.domain.metas)), object) |
||
229 | cls._init_ids(self) |
||
230 | return self |
||
231 | |||
232 | conversion_cache = None |
||
233 | |||
234 | @classmethod |
||
235 | def from_table(cls, domain, source, row_indices=...): |
||
236 | """ |
||
237 | Create a new table from selected columns and/or rows of an existing |
||
238 | one. The columns are chosen using a domain. The domain may also include |
||
239 | variables that do not appear in the source table; they are computed |
||
240 | from source variables if possible. |
||
241 | |||
242 | The resulting data may be a view or a copy of the existing data. |
||
243 | |||
244 | :param domain: the domain for the new table |
||
245 | :type domain: Orange.data.Domain |
||
246 | :param source: the source table |
||
247 | :type source: Orange.data.Table |
||
248 | :param row_indices: indices of the rows to include |
||
249 | :type row_indices: a slice or a sequence |
||
250 | :return: a new table |
||
251 | :rtype: Orange.data.Table |
||
252 | """ |
||
253 | |||
254 | def get_columns(row_indices, src_cols, n_rows, dtype=np.float64): |
||
255 | if not len(src_cols): |
||
256 | return np.zeros((n_rows, 0), dtype=source.X.dtype) |
||
257 | |||
258 | n_src_attrs = len(source.domain.attributes) |
||
259 | if all(isinstance(x, Integral) and 0 <= x < n_src_attrs |
||
260 | for x in src_cols): |
||
261 | return _subarray(source.X, row_indices, src_cols) |
||
262 | if all(isinstance(x, Integral) and x < 0 for x in src_cols): |
||
263 | arr = _subarray(source.metas, row_indices, |
||
264 | [-1 - x for x in src_cols]) |
||
265 | if arr.dtype != dtype: |
||
266 | return arr.astype(dtype) |
||
267 | return arr |
||
268 | if all(isinstance(x, Integral) and x >= n_src_attrs |
||
269 | for x in src_cols): |
||
270 | return _subarray(source._Y, row_indices, |
||
0 ignored issues
–
show
It seems like
_Y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
271 | [x - n_src_attrs for x in src_cols]) |
||
272 | |||
273 | a = np.empty((n_rows, len(src_cols)), dtype=dtype) |
||
274 | for i, col in enumerate(src_cols): |
||
275 | if col is None: |
||
276 | a[:, i] = Unknown |
||
277 | elif not isinstance(col, Integral): |
||
278 | if row_indices is not ...: |
||
279 | a[:, i] = col(source)[row_indices] |
||
280 | else: |
||
281 | a[:, i] = col(source) |
||
282 | elif col < 0: |
||
283 | a[:, i] = source.metas[row_indices, -1 - col] |
||
284 | elif col < n_src_attrs: |
||
285 | a[:, i] = source.X[row_indices, col] |
||
286 | else: |
||
287 | a[:, i] = source._Y[row_indices, col - n_src_attrs] |
||
0 ignored issues
–
show
It seems like
_Y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
288 | return a |
||
289 | |||
290 | new_cache = Table.conversion_cache is None |
||
291 | try: |
||
292 | if new_cache: |
||
293 | Table.conversion_cache = {} |
||
294 | else: |
||
295 | cached = Table.conversion_cache.get((id(domain), id(source))) |
||
296 | if cached: |
||
297 | return cached |
||
298 | if domain == source.domain: |
||
299 | return cls.from_table_rows(source, row_indices) |
||
300 | |||
301 | if isinstance(row_indices, slice): |
||
302 | start, stop, stride = row_indices.indices(source.X.shape[0]) |
||
303 | n_rows = (stop - start) // stride |
||
304 | if n_rows < 0: |
||
305 | n_rows = 0 |
||
306 | elif row_indices is ...: |
||
307 | n_rows = len(source.X) |
||
308 | else: |
||
309 | n_rows = len(row_indices) |
||
310 | |||
311 | #self = cls.__new__(Table) |
||
312 | #self = cls.__new__(cls) |
||
313 | self = cls() |
||
314 | self.domain = domain |
||
315 | conversion = domain.get_conversion(source.domain) |
||
316 | self.X = get_columns(row_indices, conversion.attributes, n_rows) |
||
317 | if self.X.ndim == 1: |
||
318 | self.X = self.X.reshape(-1, len(self.domain.attributes)) |
||
319 | self.Y = get_columns(row_indices, conversion.class_vars, n_rows) |
||
320 | |||
321 | dtype = np.float64 |
||
322 | if any(isinstance(var, StringVariable) for var in domain.metas): |
||
323 | dtype = np.object |
||
324 | self.metas = get_columns(row_indices, conversion.metas, |
||
325 | n_rows, dtype) |
||
326 | if self.metas.ndim == 1: |
||
327 | self.metas = self.metas.reshape(-1, len(self.domain.metas)) |
||
328 | if source.has_weights(): |
||
329 | self.W = np.array(source.W[row_indices]) |
||
330 | else: |
||
331 | self.W = np.empty((n_rows, 0)) |
||
332 | self.name = getattr(source, 'name', '') |
||
333 | if hasattr(source, 'ids'): |
||
334 | self.ids = np.array(source.ids[row_indices]) |
||
335 | else: |
||
336 | cls._init_ids(self) |
||
337 | Table.conversion_cache[(id(domain), id(source))] = self |
||
338 | return self |
||
339 | finally: |
||
340 | if new_cache: |
||
341 | Table.conversion_cache = None |
||
342 | |||
343 | @classmethod |
||
344 | def from_table_rows(cls, source, row_indices): |
||
345 | """ |
||
346 | Construct a new table by selecting rows from the source table. |
||
347 | |||
348 | :param source: an existing table |
||
349 | :type source: Orange.data.Table |
||
350 | :param row_indices: indices of the rows to include |
||
351 | :type row_indices: a slice or a sequence |
||
352 | :return: a new table |
||
353 | :rtype: Orange.data.Table |
||
354 | """ |
||
355 | #self = cls.__new__(Table) |
||
356 | self = cls() |
||
357 | self.domain = source.domain |
||
358 | self.X = source.X[row_indices] |
||
0 ignored issues
–
show
|
|||
359 | if self.X.ndim == 1: |
||
360 | self.X = self.X.reshape(-1, len(self.domain.attributes)) |
||
0 ignored issues
–
show
|
|||
361 | self.Y = source._Y[row_indices] |
||
0 ignored issues
–
show
It seems like
_Y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
362 | self.metas = source.metas[row_indices] |
||
0 ignored issues
–
show
|
|||
363 | if self.metas.ndim == 1: |
||
364 | self.metas = self.metas.reshape(-1, len(self.domain.metas)) |
||
0 ignored issues
–
show
|
|||
365 | self.W = source.W[row_indices] |
||
0 ignored issues
–
show
|
|||
366 | self.name = getattr(source, 'name', '') |
||
367 | self.ids = np.array(source.ids[row_indices]) |
||
0 ignored issues
–
show
|
|||
368 | return self |
||
369 | |||
370 | @classmethod |
||
371 | def from_numpy(cls, domain, X, Y=None, metas=None, W=None): |
||
372 | """ |
||
373 | Construct a table from numpy arrays with the given domain. The number |
||
374 | of variables in the domain must match the number of columns in the |
||
375 | corresponding arrays. All arrays must have the same number of rows. |
||
376 | Arrays may be of different numpy types, and may be dense or sparse. |
||
377 | |||
378 | :param domain: the domain for the new table |
||
379 | :type domain: Orange.data.Domain |
||
380 | :param X: array with attribute values |
||
381 | :type X: np.array |
||
382 | :param Y: array with class values |
||
383 | :type Y: np.array |
||
384 | :param metas: array with meta attributes |
||
385 | :type metas: np.array |
||
386 | :param W: array with weights |
||
387 | :type W: np.array |
||
388 | :return: |
||
389 | """ |
||
390 | X, Y, W = _check_arrays(X, Y, W, dtype='float64') |
||
391 | metas, = _check_arrays(metas) |
||
392 | |||
393 | if Y is not None and Y.ndim == 1: |
||
394 | Y = Y.reshape(Y.shape[0], 1) |
||
395 | if domain is None: |
||
396 | domain = Domain.from_numpy(X, Y, metas) |
||
397 | |||
398 | if Y is None: |
||
399 | if sp.issparse(X): |
||
400 | Y = np.empty((X.shape[0], 0), object) |
||
401 | else: |
||
402 | Y = X[:, len(domain.attributes):] |
||
403 | X = X[:, :len(domain.attributes)] |
||
404 | if metas is None: |
||
405 | metas = np.empty((X.shape[0], 0), object) |
||
406 | if W is None or W.size == 0: |
||
407 | W = np.empty((X.shape[0], 0)) |
||
408 | else: |
||
409 | W = W.reshape(W.size, 1) |
||
410 | |||
411 | if X.shape[1] != len(domain.attributes): |
||
412 | raise ValueError( |
||
413 | "Invalid number of variable columns ({} != {})".format( |
||
414 | X.shape[1], len(domain.attributes)) |
||
415 | ) |
||
416 | if Y.shape[1] != len(domain.class_vars): |
||
417 | raise ValueError( |
||
418 | "Invalid number of class columns ({} != {})".format( |
||
419 | Y.shape[1], len(domain.class_vars)) |
||
420 | ) |
||
421 | if metas.shape[1] != len(domain.metas): |
||
422 | raise ValueError( |
||
423 | "Invalid number of meta attribute columns ({} != {})".format( |
||
424 | metas.shape[1], len(domain.metas)) |
||
425 | ) |
||
426 | if not X.shape[0] == Y.shape[0] == metas.shape[0] == W.shape[0]: |
||
427 | raise ValueError( |
||
428 | "Parts of data contain different numbers of rows.") |
||
429 | |||
430 | #self = Table.__new__(Table) |
||
431 | self = cls() |
||
432 | self.domain = domain |
||
433 | self.X = X |
||
434 | self.Y = Y |
||
435 | self.metas = metas |
||
436 | self.W = W |
||
437 | self.n_rows = self.X.shape[0] |
||
438 | cls._init_ids(self) |
||
439 | return self |
||
440 | |||
441 | @classmethod |
||
442 | def from_list(cls, domain, rows, weights=None): |
||
443 | if weights is not None and len(rows) != len(weights): |
||
444 | raise ValueError("mismatching number of instances and weights") |
||
445 | self = cls.from_domain(domain, len(rows), weights is not None) |
||
446 | attrs, classes = domain.attributes, domain.class_vars |
||
447 | metas = domain.metas |
||
448 | nattrs, ncls = len(domain.attributes), len(domain.class_vars) |
||
449 | for i, row in enumerate(rows): |
||
450 | if isinstance(row, Instance): |
||
451 | row = row.list |
||
452 | for j, (var, val) in enumerate(zip(attrs, row)): |
||
453 | self.X[i, j] = var.to_val(val) |
||
454 | for j, (var, val) in enumerate(zip(classes, row[nattrs:])): |
||
455 | self._Y[i, j] = var.to_val(val) |
||
0 ignored issues
–
show
It seems like
_Y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
456 | for j, (var, val) in enumerate(zip(metas, row[nattrs + ncls:])): |
||
457 | self.metas[i, j] = var.to_val(val) |
||
458 | if weights is not None: |
||
459 | self.W = np.array(weights) |
||
460 | return self |
||
461 | |||
462 | @classmethod |
||
463 | def _init_ids(cls, obj): |
||
464 | with cls._next_instance_lock: |
||
465 | obj.ids = np.array(range(cls._next_instance_id, cls._next_instance_id + obj.X.shape[0])) |
||
466 | cls._next_instance_id += obj.X.shape[0] |
||
467 | |||
468 | @classmethod |
||
469 | def new_id(cls): |
||
470 | with cls._next_instance_lock: |
||
471 | id = cls._next_instance_id |
||
0 ignored issues
–
show
|
|||
472 | cls._next_instance_id += 1 |
||
473 | return id |
||
474 | |||
475 | FILE_FORMATS = { |
||
476 | ".tab": (io.TabDelimFormat, ) |
||
477 | } |
||
478 | |||
479 | def save(self, filename): |
||
480 | """ |
||
481 | Save a data table to a file. The path can be absolute or relative. |
||
482 | |||
483 | :param filename: File name |
||
484 | :type filename: str |
||
485 | """ |
||
486 | ext = os.path.splitext(filename)[1] |
||
487 | writer = io.FileFormats.writers.get(ext) |
||
488 | if not writer: |
||
489 | desc = io.FileFormats.names.get(ext) |
||
490 | if desc: |
||
491 | raise IOError("Writing of {}s is not supported". |
||
492 | format(desc.lower())) |
||
493 | else: |
||
494 | raise IOError("Unknown file name extension.") |
||
495 | writer().write_file(filename, self) |
||
496 | |||
497 | @classmethod |
||
498 | def from_file(cls, filename): |
||
499 | """ |
||
500 | Read a data table from a file. The path can be absolute or relative. |
||
501 | |||
502 | :param filename: File name |
||
503 | :type filename: str |
||
504 | :return: a new data table |
||
505 | :rtype: Orange.data.Table |
||
506 | """ |
||
507 | for dir in dataset_dirs: |
||
0 ignored issues
–
show
|
|||
508 | ext = os.path.splitext(filename)[1] |
||
509 | absolute_filename = os.path.join(dir, filename) |
||
510 | if not ext: |
||
511 | for ext in io.FileFormats.readers: |
||
512 | if os.path.exists(absolute_filename + ext): |
||
513 | absolute_filename += ext |
||
514 | break |
||
515 | if os.path.exists(absolute_filename): |
||
516 | break |
||
517 | else: |
||
518 | absolute_filename = ext = "" |
||
519 | |||
520 | if not os.path.exists(absolute_filename): |
||
521 | raise IOError('File "{}" was not found.'.format(filename)) |
||
522 | reader = io.FileFormats.readers.get(ext) |
||
523 | if not reader: |
||
524 | desc = io.FileFormats.names.get(ext) |
||
525 | if desc: |
||
526 | raise IOError("Reading {}s is not supported". |
||
527 | format(desc.lower())) |
||
528 | else: |
||
529 | raise IOError("Unknown file name extension.") |
||
530 | data = reader().read_file(absolute_filename, cls) |
||
531 | data.name = os.path.splitext(os.path.split(filename)[-1])[0] |
||
532 | # no need to call _init_ids as fuctions from .io already |
||
533 | # construct a table with .ids |
||
534 | |||
535 | data.__file__ = absolute_filename |
||
536 | return data |
||
537 | |||
538 | @classmethod |
||
539 | def from_url(cls, url): |
||
540 | name = os.path.basename(urllib.parse.urlparse(url)[2]) |
||
541 | f = tempfile.NamedTemporaryFile(suffix=name, delete=False) |
||
542 | fname = f.name |
||
543 | f.close() |
||
544 | urllib.request.urlretrieve(url, fname) |
||
545 | data = cls.from_file(f.name) |
||
546 | os.remove(fname) |
||
547 | return data |
||
548 | |||
549 | # Helper function for __setitem__ and insert: |
||
550 | # Set the row of table data matrices |
||
551 | # noinspection PyProtectedMember |
||
552 | def _set_row(self, example, row): |
||
553 | domain = self.domain |
||
554 | if isinstance(example, Instance): |
||
555 | if example.domain == domain: |
||
556 | if isinstance(example, RowInstance): |
||
557 | self.X[row] = example._x |
||
0 ignored issues
–
show
It seems like
_x was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
558 | self._Y[row] = example._y |
||
0 ignored issues
–
show
It seems like
_y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
559 | else: |
||
560 | self.X[row] = example._x |
||
0 ignored issues
–
show
It seems like
_x was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
561 | self._Y[row] = example._y |
||
0 ignored issues
–
show
It seems like
_y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
562 | self.metas[row] = example._metas |
||
0 ignored issues
–
show
It seems like
_metas was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
563 | return |
||
564 | c = self.domain.get_conversion(example.domain) |
||
0 ignored issues
–
show
|
|||
565 | |||
566 | self.X[row], self._Y[row], self.metas[row] = \ |
||
567 | self.domain.convert(example) |
||
568 | try: |
||
569 | self.ids[row] = example.id |
||
570 | except: |
||
0 ignored issues
–
show
General except handlers without types should be used sparingly.
Typically, you would use general except handlers when you intend to specifically handle all types of errors, f.e. when logging. Otherwise, such general error handlers can mask errors in your application that you want to know of. ![]() |
|||
571 | with type(self)._next_instance_lock: |
||
0 ignored issues
–
show
It seems like
_next_instance_lock was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
572 | self.ids[row] = type(self)._next_instance_id |
||
0 ignored issues
–
show
It seems like
_next_instance_id was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
573 | type(self)._next_instance_id += 1 |
||
574 | |||
575 | else: |
||
576 | self.X[row] = [var.to_val(val) |
||
577 | for var, val in zip(domain.attributes, example)] |
||
578 | self._Y[row] = [var.to_val(val) |
||
579 | for var, val in |
||
580 | zip(domain.class_vars, |
||
581 | example[len(domain.attributes):])] |
||
582 | self.metas[row] = np.array([var.Unknown for var in domain.metas], |
||
583 | dtype=object) |
||
584 | |||
585 | def _check_all_dense(self): |
||
586 | return all(x in (Storage.DENSE, Storage.MISSING) |
||
587 | for x in (self.X_density(), self.Y_density(), |
||
588 | self.metas_density())) |
||
589 | |||
590 | # A helper function for extend and insert |
||
591 | # Resize X, Y, metas and W. |
||
592 | def _resize_all(self, new_length): |
||
593 | old_length = self.X.shape[0] |
||
594 | if old_length == new_length: |
||
595 | return |
||
596 | if not self._check_all_dense(): |
||
597 | raise ValueError("Tables with sparse data cannot be resized") |
||
598 | try: |
||
599 | self.X.resize(new_length, self.X.shape[1], refcheck=False) |
||
600 | self._Y.resize(new_length, self._Y.shape[1], refcheck=False) |
||
601 | self.metas.resize(new_length, self.metas.shape[1], refcheck=False) |
||
602 | if self.W.ndim == 2: |
||
603 | self.W.resize((new_length, 0), refcheck=False) |
||
604 | else: |
||
605 | self.W.resize(new_length, refcheck=False) |
||
606 | self.ids.resize(new_length, refcheck=False) |
||
607 | except Exception: |
||
608 | if self.X.shape[0] == new_length: |
||
609 | self.X.resize(old_length, self.X.shape[1], refcheck=False) |
||
610 | if self._Y.shape[0] == new_length: |
||
611 | self._Y.resize(old_length, self._Y.shape[1], refcheck=False) |
||
612 | if self.metas.shape[0] == new_length: |
||
613 | self.metas.resize(old_length, self.metas.shape[1], refcheck=False) |
||
614 | if self.W.shape[0] == new_length: |
||
615 | if self.W.ndim == 2: |
||
616 | self.W.resize((old_length, 0), refcheck=False) |
||
617 | else: |
||
618 | self.W.resize(old_length, refcheck=False) |
||
619 | if self.ids.shape[0] == new_length: |
||
620 | self.ids.resize(old_length, refcheck=False) |
||
621 | raise |
||
622 | |||
623 | def __getitem__(self, key): |
||
624 | if isinstance(key, Integral): |
||
625 | return RowInstance(self, key) |
||
626 | if not isinstance(key, tuple): |
||
627 | return Table.from_table_rows(self, key) |
||
628 | |||
629 | if len(key) != 2: |
||
630 | raise IndexError("Table indices must be one- or two-dimensional") |
||
631 | |||
632 | row_idx, col_idx = key |
||
633 | if isinstance(row_idx, Integral): |
||
634 | if isinstance(col_idx, (str, Integral, Variable)): |
||
635 | col_idx = self.domain.index(col_idx) |
||
636 | var = self.domain[col_idx] |
||
637 | if 0 <= col_idx < len(self.domain.attributes): |
||
638 | return Value(var, self.X[row_idx, col_idx]) |
||
639 | elif col_idx >= len(self.domain.attributes): |
||
640 | return Value( |
||
641 | var, |
||
642 | self._Y[row_idx, |
||
643 | col_idx - len(self.domain.attributes)]) |
||
644 | elif col_idx < 0: |
||
645 | return Value(var, self.metas[row_idx, -1 - col_idx]) |
||
646 | else: |
||
647 | row_idx = [row_idx] |
||
648 | |||
649 | # multiple rows OR single row but multiple columns: |
||
650 | # construct a new table |
||
651 | attributes, col_indices = self.domain._compute_col_indices(col_idx) |
||
0 ignored issues
–
show
It seems like
_compute_col_indices was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
652 | if attributes is not None: |
||
653 | n_attrs = len(self.domain.attributes) |
||
654 | r_attrs = [attributes[i] |
||
655 | for i, col in enumerate(col_indices) |
||
656 | if 0 <= col < n_attrs] |
||
657 | r_classes = [attributes[i] |
||
658 | for i, col in enumerate(col_indices) |
||
659 | if col >= n_attrs] |
||
660 | r_metas = [attributes[i] |
||
661 | for i, col in enumerate(col_indices) if col < 0] |
||
662 | domain = Domain(r_attrs, r_classes, r_metas) |
||
663 | else: |
||
664 | domain = self.domain |
||
665 | return self.__class__.from_table(domain, self, row_idx) |
||
666 | |||
667 | def __setitem__(self, key, value): |
||
668 | if not self._check_all_dense(): |
||
669 | raise ValueError( |
||
670 | "Assignment to rows of sparse data is not supported") |
||
671 | if not isinstance(key, tuple): |
||
672 | if isinstance(value, Real): |
||
673 | self.X[key, :] = value |
||
674 | return |
||
675 | self._set_row(value, key) |
||
676 | return |
||
677 | |||
678 | if len(key) != 2: |
||
679 | raise IndexError("Table indices must be one- or two-dimensional") |
||
680 | row_idx, col_idx = key |
||
681 | |||
682 | # single row |
||
683 | if isinstance(row_idx, Integral): |
||
684 | if isinstance(col_idx, slice): |
||
685 | col_idx = range(*slice.indices(col_idx, self.X.shape[1])) |
||
686 | if not isinstance(col_idx, str) and isinstance(col_idx, Iterable): |
||
687 | col_idx = list(col_idx) |
||
688 | if not isinstance(col_idx, str) and isinstance(col_idx, Sized): |
||
689 | if isinstance(value, (Sequence, np.ndarray)): |
||
690 | values = value |
||
691 | elif isinstance(value, Iterable): |
||
692 | values = list(value) |
||
693 | else: |
||
694 | raise TypeError("Setting multiple values requires a " |
||
695 | "sequence or numpy array") |
||
696 | if len(values) != len(col_idx): |
||
697 | raise ValueError("Invalid number of values") |
||
698 | else: |
||
699 | col_idx, values = [col_idx], [value] |
||
700 | for value, col_idx in zip(values, col_idx): |
||
701 | if not isinstance(value, Integral): |
||
702 | value = self.domain[col_idx].to_val(value) |
||
703 | if not isinstance(col_idx, Integral): |
||
704 | col_idx = self.domain.index(col_idx) |
||
705 | if col_idx >= 0: |
||
706 | if col_idx < self.X.shape[1]: |
||
707 | self.X[row_idx, col_idx] = value |
||
708 | else: |
||
709 | self._Y[row_idx, col_idx - self.X.shape[1]] = value |
||
710 | else: |
||
711 | self.metas[row_idx, -1 - col_idx] = value |
||
712 | |||
713 | # multiple rows, multiple columns |
||
714 | attributes, col_indices = self.domain._compute_col_indices(col_idx) |
||
0 ignored issues
–
show
It seems like
_compute_col_indices was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
715 | if col_indices is ...: |
||
716 | col_indices = range(len(self.domain)) |
||
717 | n_attrs = self.X.shape[1] |
||
718 | if isinstance(value, str): |
||
719 | if not attributes: |
||
720 | attributes = self.domain.attributes |
||
721 | for var, col in zip(attributes, col_indices): |
||
722 | if 0 <= col < n_attrs: |
||
723 | self.X[row_idx, col] = var.to_val(value) |
||
724 | elif col >= n_attrs: |
||
725 | self._Y[row_idx, col - n_attrs] = var.to_val(value) |
||
726 | else: |
||
727 | self.metas[row_idx, -1 - col] = var.to_val(value) |
||
728 | else: |
||
729 | attr_cols = np.fromiter( |
||
730 | (col for col in col_indices if 0 <= col < n_attrs), int) |
||
731 | class_cols = np.fromiter( |
||
732 | (col - n_attrs for col in col_indices if col >= n_attrs), int) |
||
733 | meta_cols = np.fromiter( |
||
734 | (-1 - col for col in col_indices if col < 0), int) |
||
735 | if value is None: |
||
736 | value = Unknown |
||
737 | |||
738 | if not isinstance(value, Real) and \ |
||
739 | (len(attr_cols) or len(class_cols)): |
||
740 | raise TypeError( |
||
741 | "Ordinary attributes can only have primitive values") |
||
742 | if len(attr_cols): |
||
743 | if len(attr_cols) == 1: |
||
744 | # scipy.sparse matrices only allow primitive indices. |
||
745 | attr_cols = attr_cols[0] |
||
746 | self.X[row_idx, attr_cols] = value |
||
747 | if len(class_cols): |
||
748 | if len(class_cols) == 1: |
||
749 | # scipy.sparse matrices only allow primitive indices. |
||
750 | class_cols = class_cols[0] |
||
751 | self._Y[row_idx, class_cols] = value |
||
752 | if len(meta_cols): |
||
753 | self.metas[row_idx, meta_cols] = value |
||
754 | |||
755 | def __delitem__(self, key): |
||
756 | if not self._check_all_dense(): |
||
757 | raise ValueError("Rows of sparse data cannot be deleted") |
||
758 | if key is ...: |
||
759 | key = range(len(self)) |
||
760 | self.X = np.delete(self.X, key, axis=0) |
||
0 ignored issues
–
show
|
|||
761 | self.Y = np.delete(self._Y, key, axis=0) |
||
762 | self.metas = np.delete(self.metas, key, axis=0) |
||
0 ignored issues
–
show
|
|||
763 | self.W = np.delete(self.W, key, axis=0) |
||
0 ignored issues
–
show
|
|||
764 | |||
765 | def __len__(self): |
||
766 | return self.X.shape[0] |
||
767 | |||
768 | def __str__(self): |
||
769 | return "[" + ",\n ".join(str(ex) for ex in self) |
||
770 | |||
771 | def __repr__(self): |
||
772 | s = "[" + ",\n ".join(repr(ex) for ex in self[:5]) |
||
773 | if len(self) > 5: |
||
774 | s += ",\n ..." |
||
775 | s += "\n]" |
||
776 | return s |
||
777 | |||
778 | def clear(self): |
||
779 | """Remove all rows from the table.""" |
||
780 | if not self._check_all_dense(): |
||
781 | raise ValueError("Tables with sparse data cannot be cleared") |
||
782 | del self[...] |
||
783 | |||
784 | def append(self, instance): |
||
785 | """ |
||
786 | Append a data instance to the table. |
||
787 | |||
788 | :param instance: a data instance |
||
789 | :type instance: Orange.data.Instance or a sequence of values |
||
790 | """ |
||
791 | self.insert(len(self), instance) |
||
792 | |||
793 | def insert(self, row, instance): |
||
794 | """ |
||
795 | Insert a data instance into the table. |
||
796 | |||
797 | :param row: row index |
||
798 | :type row: int |
||
799 | :param instance: a data instance |
||
800 | :type instance: Orange.data.Instance or a sequence of values |
||
801 | """ |
||
802 | if row < 0: |
||
803 | row += len(self) |
||
804 | if row < 0 or row > len(self): |
||
805 | raise IndexError("Index out of range") |
||
806 | self._resize_all(len(self) + 1) |
||
807 | if row < len(self): |
||
808 | self.X[row + 1:] = self.X[row:-1] |
||
809 | self._Y[row + 1:] = self._Y[row:-1] |
||
810 | self.metas[row + 1:] = self.metas[row:-1] |
||
811 | self.W[row + 1:] = self.W[row:-1] |
||
812 | self.ids[row + 1:] = self.ids[row:-1] |
||
813 | try: |
||
814 | self._set_row(instance, row) |
||
815 | if self.W.shape[-1]: |
||
816 | self.W[row] = 1 |
||
817 | except Exception: |
||
818 | self.X[row:-1] = self.X[row + 1:] |
||
819 | self._Y[row:-1] = self._Y[row + 1:] |
||
820 | self.metas[row:-1] = self.metas[row + 1:] |
||
821 | self.W[row:-1] = self.W[row + 1:] |
||
822 | self.ids[row:-1] = self.ids[row + 1:] |
||
823 | self._resize_all(len(self) - 1) |
||
824 | raise |
||
825 | |||
826 | def extend(self, instances): |
||
827 | """ |
||
828 | Extend the table with the given instances. The instances can be given |
||
829 | as a table of the same or a different domain, or a sequence. In the |
||
830 | latter case, each instances can be given as |
||
831 | :obj:`~Orange.data.Instance` or a sequence of values (e.g. list, |
||
832 | tuple, numpy.array). |
||
833 | |||
834 | :param instances: additional instances |
||
835 | :type instances: Orange.data.Table or a sequence of instances |
||
836 | """ |
||
837 | old_length = len(self) |
||
838 | self._resize_all(old_length + len(instances)) |
||
839 | try: |
||
840 | # shortcut |
||
841 | if isinstance(instances, Table) and instances.domain == self.domain: |
||
842 | self.X[old_length:] = instances.X |
||
843 | self._Y[old_length:] = instances._Y |
||
0 ignored issues
–
show
It seems like
_Y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
844 | self.metas[old_length:] = instances.metas |
||
845 | if self.W.shape[-1]: |
||
846 | if instances.W.shape[-1]: |
||
847 | self.W[old_length:] = instances.W |
||
848 | else: |
||
849 | self.W[old_length:] = 1 |
||
850 | self.ids[old_length:] = instances.ids |
||
851 | else: |
||
852 | for i, example in enumerate(instances): |
||
853 | self[old_length + i] = example |
||
854 | try: |
||
855 | self.ids[old_length + i] = example.id |
||
856 | except AttributeError: |
||
857 | self.ids[old_length + i] = self.new_id() |
||
858 | except Exception: |
||
859 | self._resize_all(old_length) |
||
860 | raise |
||
861 | |||
862 | @staticmethod |
||
863 | def concatenate(tables, axis=1): |
||
864 | """Return concatenation of `tables` by `axis`.""" |
||
865 | if not tables: |
||
866 | raise ValueError('need at least one table to concatenate') |
||
867 | if 1 == len(tables): |
||
868 | return tables[0].copy() |
||
869 | CONCAT_ROWS, CONCAT_COLS = 0, 1 |
||
870 | if axis == CONCAT_ROWS: |
||
871 | table = tables[0].copy() |
||
872 | for t in tables[1:]: |
||
873 | table.extend(t) |
||
874 | return table |
||
875 | elif axis == CONCAT_COLS: |
||
876 | from operator import iand, attrgetter |
||
877 | from functools import reduce |
||
0 ignored issues
–
show
reduce is re-defining a name which is already available in the outer-scope (previously defined on line 7 ).
It is generally a bad practice to shadow variables from the outer-scope. In most cases, this is done unintentionally and might lead to unexpected behavior: param = 5
class Foo:
def __init__(self, param): # "param" would be flagged here
self.param = param
![]() |
|||
878 | if reduce(iand, |
||
879 | (set(map(attrgetter('name'), |
||
880 | chain(t.domain.variables, t.domain.metas))) |
||
881 | for t in tables)): |
||
882 | raise ValueError('Concatenating two domains with variables ' |
||
883 | 'with same name is undefined') |
||
884 | domain = Domain(flatten(t.domain.attributes for t in tables), |
||
885 | flatten(t.domain.class_vars for t in tables), |
||
886 | flatten(t.domain.metas for t in tables)) |
||
887 | |||
888 | def ndmin(A): |
||
889 | return A if A.ndim > 1 else A.reshape(A.shape[0], 1) |
||
890 | |||
891 | table = Table.from_numpy(domain, |
||
892 | np.hstack(tuple(ndmin(t.X) for t in tables)), |
||
893 | np.hstack(tuple(ndmin(t.Y) for t in tables)), |
||
894 | np.hstack(tuple(ndmin(t.metas) for t in tables)), |
||
895 | np.hstack(tuple(ndmin(t.W) for t in tables))) |
||
896 | return table |
||
897 | raise ValueError('axis {} out of bounds [0, 2)'.format(axis)) |
||
898 | |||
899 | def is_view(self): |
||
900 | """ |
||
901 | Return `True` if all arrays represent a view referring to another table |
||
902 | """ |
||
903 | return ((not self.X.shape[-1] or self.X.base is not None) and |
||
904 | (not self._Y.shape[-1] or self._Y.base is not None) and |
||
905 | (not self.metas.shape[-1] or self.metas.base is not None) and |
||
906 | (not self._weights.shape[-1] or self.W.base is not None)) |
||
907 | |||
908 | def is_copy(self): |
||
909 | """ |
||
910 | Return `True` if the table owns its data |
||
911 | """ |
||
912 | return ((not self.X.shape[-1] or self.X.base is None) and |
||
913 | (self._Y.base is None) and |
||
914 | (self.metas.base is None) and |
||
915 | (self.W.base is None)) |
||
916 | |||
917 | def ensure_copy(self): |
||
918 | """ |
||
919 | Ensure that the table owns its data; copy arrays when necessary |
||
920 | """ |
||
921 | if self.X.base is not None: |
||
922 | self.X = self.X.copy() |
||
0 ignored issues
–
show
|
|||
923 | if self._Y.base is not None: |
||
924 | self._Y = self._Y.copy() |
||
0 ignored issues
–
show
|
|||
925 | if self.metas.base is not None: |
||
926 | self.metas = self.metas.copy() |
||
0 ignored issues
–
show
|
|||
927 | if self.W.base is not None: |
||
928 | self.W = self.W.copy() |
||
0 ignored issues
–
show
|
|||
929 | |||
930 | def copy(self): |
||
931 | """ |
||
932 | Return a copy of the table |
||
933 | """ |
||
934 | t = Table(self) |
||
935 | t.ensure_copy() |
||
936 | return t |
||
937 | |||
938 | @staticmethod |
||
939 | def __determine_density(data): |
||
940 | if data is None: |
||
941 | return Storage.Missing |
||
942 | if data is not None and sp.issparse(data): |
||
943 | try: |
||
944 | if bn.bincount(data.data, 1)[0][0] == 0: |
||
945 | return Storage.SPARSE_BOOL |
||
946 | except ValueError as e: |
||
0 ignored issues
–
show
|
|||
947 | pass |
||
948 | return Storage.SPARSE |
||
949 | else: |
||
950 | return Storage.DENSE |
||
951 | |||
952 | def X_density(self): |
||
953 | if not hasattr(self, "_X_density"): |
||
954 | self._X_density = Table.__determine_density(self.X) |
||
0 ignored issues
–
show
|
|||
955 | return self._X_density |
||
956 | |||
957 | def Y_density(self): |
||
958 | if not hasattr(self, "_Y_density"): |
||
959 | self._Y_density = Table.__determine_density(self._Y) |
||
0 ignored issues
–
show
|
|||
960 | return self._Y_density |
||
961 | |||
962 | def metas_density(self): |
||
963 | if not hasattr(self, "_metas_density"): |
||
964 | self._metas_density = Table.__determine_density(self.metas) |
||
0 ignored issues
–
show
|
|||
965 | return self._metas_density |
||
966 | |||
967 | def set_weights(self, weight=1): |
||
968 | """ |
||
969 | Set weights of data instances; create a vector of weights if necessary. |
||
970 | """ |
||
971 | if not self.W.shape[-1]: |
||
972 | self.W = np.empty(len(self)) |
||
0 ignored issues
–
show
|
|||
973 | self.W[:] = weight |
||
974 | |||
975 | def has_weights(self): |
||
976 | """Return `True` if the data instances are weighed. """ |
||
977 | return self.W.shape[-1] != 0 |
||
978 | |||
979 | def total_weight(self): |
||
980 | """ |
||
981 | Return the total weight of instances in the table, or their number if |
||
982 | they are unweighted. |
||
983 | """ |
||
984 | if self.W.shape[-1]: |
||
985 | return sum(self.W) |
||
986 | return len(self) |
||
987 | |||
988 | def has_missing(self): |
||
989 | """Return `True` if there are any missing attribute or class values.""" |
||
990 | return bn.anynan(self.X) or bn.anynan(self._Y) |
||
991 | |||
992 | def has_missing_class(self): |
||
993 | """Return `True` if there are any missing class values.""" |
||
994 | return bn.anynan(self._Y) |
||
995 | |||
996 | def checksum(self, include_metas=True): |
||
997 | # TODO: zlib.adler32 does not work for numpy arrays with dtype object |
||
0 ignored issues
–
show
|
|||
998 | # (after pickling and unpickling such arrays, checksum changes) |
||
999 | # Why, and should we fix it or remove it? |
||
1000 | """Return a checksum over X, Y, metas and W.""" |
||
1001 | cs = zlib.adler32(np.ascontiguousarray(self.X)) |
||
1002 | cs = zlib.adler32(np.ascontiguousarray(self._Y), cs) |
||
1003 | if include_metas: |
||
1004 | cs = zlib.adler32(np.ascontiguousarray(self.metas), cs) |
||
1005 | cs = zlib.adler32(np.ascontiguousarray(self.W), cs) |
||
1006 | return cs |
||
1007 | |||
1008 | def shuffle(self): |
||
1009 | """Randomly shuffle the rows of the table.""" |
||
1010 | if not self._check_all_dense(): |
||
1011 | raise ValueError("Rows of sparse data cannot be shuffled") |
||
1012 | ind = np.arange(self.X.shape[0]) |
||
1013 | np.random.shuffle(ind) |
||
1014 | self.X = self.X[ind] |
||
0 ignored issues
–
show
|
|||
1015 | self._Y = self._Y[ind] |
||
0 ignored issues
–
show
|
|||
1016 | self.metas = self.metas[ind] |
||
0 ignored issues
–
show
|
|||
1017 | self.W = self.W[ind] |
||
0 ignored issues
–
show
|
|||
1018 | |||
1019 | def get_column_view(self, index): |
||
1020 | """ |
||
1021 | Return a vector - as a view, not a copy - with a column of the table, |
||
1022 | and a bool flag telling whether this column is sparse. Note that |
||
1023 | vertical slicing of sparse matrices is inefficient. |
||
1024 | |||
1025 | :param index: the index of the column |
||
1026 | :type index: int, str or Orange.data.Variable |
||
1027 | :return: (one-dimensional numpy array, sparse) |
||
1028 | """ |
||
1029 | |||
1030 | def rx(M): |
||
1031 | if sp.issparse(M): |
||
1032 | return np.asarray(M.todense())[:, 0], True |
||
1033 | else: |
||
1034 | return M, False |
||
1035 | |||
1036 | if not isinstance(index, Integral): |
||
1037 | index = self.domain.index(index) |
||
1038 | if index >= 0: |
||
1039 | if index < self.X.shape[1]: |
||
1040 | return rx(self.X[:, index]) |
||
1041 | else: |
||
1042 | return rx(self._Y[:, index - self.X.shape[1]]) |
||
1043 | else: |
||
1044 | return rx(self.metas[:, -1 - index]) |
||
1045 | |||
1046 | def _filter_is_defined(self, columns=None, negate=False): |
||
1047 | if columns is None: |
||
1048 | if sp.issparse(self.X): |
||
1049 | remove = (self.X.indptr[1:] != |
||
1050 | self.X.indptr[-1:] + self.X.shape[1]) |
||
1051 | else: |
||
1052 | remove = bn.anynan(self.X, axis=1) |
||
1053 | if sp.issparse(self._Y): |
||
1054 | remove = np.logical_or(remove, self._Y.indptr[1:] != |
||
1055 | self._Y.indptr[-1:] + self._Y.shape[1]) |
||
1056 | else: |
||
1057 | remove = np.logical_or(remove, bn.anynan(self._Y, axis=1)) |
||
1058 | else: |
||
1059 | remove = np.zeros(len(self), dtype=bool) |
||
1060 | for column in columns: |
||
1061 | col, sparse = self.get_column_view(column) |
||
1062 | if sparse: |
||
1063 | remove = np.logical_or(remove, col == 0) |
||
1064 | else: |
||
1065 | remove = np.logical_or(remove, bn.anynan([col], axis=0)) |
||
1066 | retain = remove if negate else np.logical_not(remove) |
||
1067 | return Table.from_table_rows(self, retain) |
||
1068 | |||
1069 | def _filter_has_class(self, negate=False): |
||
1070 | if sp.issparse(self._Y): |
||
1071 | if negate: |
||
1072 | retain = (self._Y.indptr[1:] != |
||
1073 | self._Y.indptr[-1:] + self._Y.shape[1]) |
||
1074 | else: |
||
1075 | retain = (self._Y.indptr[1:] == |
||
1076 | self._Y.indptr[-1:] + self._Y.shape[1]) |
||
1077 | else: |
||
1078 | retain = bn.anynan(self._Y, axis=1) |
||
1079 | if not negate: |
||
1080 | retain = np.logical_not(retain) |
||
1081 | return Table.from_table_rows(self, retain) |
||
1082 | |||
1083 | def _filter_same_value(self, column, value, negate=False): |
||
1084 | if not isinstance(value, Real): |
||
1085 | value = self.domain[column].to_val(value) |
||
1086 | sel = self.get_column_view(column)[0] == value |
||
1087 | if negate: |
||
1088 | sel = np.logical_not(sel) |
||
1089 | return Table.from_table_rows(self, sel) |
||
1090 | |||
1091 | def _filter_values(self, filter): |
||
0 ignored issues
–
show
|
|||
1092 | from Orange.data import filter as data_filter |
||
1093 | |||
1094 | if isinstance(filter, data_filter.Values): |
||
1095 | conditions = filter.conditions |
||
1096 | conjunction = filter.conjunction |
||
1097 | else: |
||
1098 | conditions = [filter] |
||
1099 | conjunction = True |
||
1100 | if conjunction: |
||
1101 | sel = np.ones(len(self), dtype=bool) |
||
1102 | else: |
||
1103 | sel = np.zeros(len(self), dtype=bool) |
||
1104 | |||
1105 | for f in conditions: |
||
1106 | col = self.get_column_view(f.column)[0] |
||
1107 | if isinstance(f, data_filter.FilterDiscrete) and f.values is None \ |
||
1108 | or isinstance(f, data_filter.FilterContinuous) and \ |
||
1109 | f.oper == f.IsDefined: |
||
1110 | if conjunction: |
||
1111 | sel *= ~np.isnan(col) |
||
1112 | else: |
||
1113 | sel += ~np.isnan(col) |
||
1114 | elif isinstance(f, data_filter.FilterString) and \ |
||
1115 | f.oper == f.IsDefined: |
||
1116 | if conjunction: |
||
1117 | sel *= (col != "") |
||
1118 | else: |
||
1119 | sel += (col != "") |
||
1120 | elif isinstance(f, data_filter.FilterDiscrete): |
||
1121 | if conjunction: |
||
1122 | s2 = np.zeros(len(self), dtype=bool) |
||
1123 | for val in f.values: |
||
1124 | if not isinstance(val, Real): |
||
1125 | val = self.domain[f.column].to_val(val) |
||
1126 | s2 += (col == val) |
||
1127 | sel *= s2 |
||
1128 | else: |
||
1129 | for val in f.values: |
||
1130 | if not isinstance(val, Real): |
||
1131 | val = self.domain[f.column].to_val(val) |
||
1132 | sel += (col == val) |
||
1133 | elif isinstance(f, data_filter.FilterStringList): |
||
1134 | if not f.case_sensitive: |
||
1135 | # noinspection PyTypeChecker |
||
1136 | col = np.char.lower(np.array(col, dtype=str)) |
||
1137 | vals = [val.lower() for val in f.values] |
||
1138 | else: |
||
1139 | vals = f.values |
||
1140 | if conjunction: |
||
1141 | sel *= reduce(operator.add, |
||
1142 | (col == val for val in vals)) |
||
1143 | else: |
||
1144 | sel = reduce(operator.add, |
||
1145 | (col == val for val in vals), sel) |
||
1146 | elif isinstance(f, data_filter.FilterRegex): |
||
1147 | sel = np.vectorize(f)(col) |
||
1148 | elif isinstance(f, (data_filter.FilterContinuous, |
||
1149 | data_filter.FilterString)): |
||
1150 | if (isinstance(f, data_filter.FilterString) and |
||
1151 | not f.case_sensitive): |
||
1152 | # noinspection PyTypeChecker |
||
1153 | col = np.char.lower(np.array(col, dtype=str)) |
||
1154 | fmin = f.min.lower() |
||
1155 | if f.oper in [f.Between, f.Outside]: |
||
1156 | fmax = f.max.lower() |
||
1157 | else: |
||
1158 | fmin, fmax = f.min, f.max |
||
1159 | if f.oper == f.Equal: |
||
1160 | col = (col == fmin) |
||
1161 | elif f.oper == f.NotEqual: |
||
1162 | col = (col != fmin) |
||
1163 | elif f.oper == f.Less: |
||
1164 | col = (col < fmin) |
||
1165 | elif f.oper == f.LessEqual: |
||
1166 | col = (col <= fmin) |
||
1167 | elif f.oper == f.Greater: |
||
1168 | col = (col > fmin) |
||
1169 | elif f.oper == f.GreaterEqual: |
||
1170 | col = (col >= fmin) |
||
1171 | elif f.oper == f.Between: |
||
1172 | col = (col >= fmin) * (col <= fmax) |
||
1173 | elif f.oper == f.Outside: |
||
1174 | col = (col < fmin) + (col > fmax) |
||
1175 | elif not isinstance(f, data_filter.FilterString): |
||
1176 | raise TypeError("Invalid operator") |
||
1177 | elif f.oper == f.Contains: |
||
1178 | col = np.fromiter((fmin in e for e in col), |
||
1179 | dtype=bool) |
||
1180 | elif f.oper == f.StartsWith: |
||
1181 | col = np.fromiter((e.startswith(fmin) for e in col), |
||
1182 | dtype=bool) |
||
1183 | elif f.oper == f.EndsWith: |
||
1184 | col = np.fromiter((e.endswith(fmin) for e in col), |
||
1185 | dtype=bool) |
||
1186 | else: |
||
1187 | raise TypeError("Invalid operator") |
||
1188 | if conjunction: |
||
1189 | sel *= col |
||
1190 | else: |
||
1191 | sel += col |
||
1192 | else: |
||
1193 | raise TypeError("Invalid filter") |
||
1194 | |||
1195 | if filter.negate: |
||
1196 | sel = ~sel |
||
1197 | return Table.from_table_rows(self, sel) |
||
1198 | |||
1199 | def _compute_basic_stats(self, columns=None, |
||
0 ignored issues
–
show
|
|||
1200 | include_metas=False, compute_variance=False): |
||
1201 | if compute_variance: |
||
1202 | raise NotImplementedError("computation of variance is " |
||
1203 | "not implemented yet") |
||
1204 | W = self.W if self.has_weights() else None |
||
1205 | rr = [] |
||
1206 | stats = [] |
||
1207 | if not columns: |
||
1208 | if self.domain.attributes: |
||
1209 | rr.append(bn.stats(self.X, W)) |
||
1210 | if self.domain.class_vars: |
||
1211 | rr.append(bn.stats(self._Y, W)) |
||
1212 | if include_metas and self.domain.metas: |
||
1213 | rr.append(bn.stats(self.metas, W)) |
||
1214 | if len(rr): |
||
1215 | stats = np.vstack(tuple(rr)) |
||
1216 | else: |
||
1217 | columns = [self.domain.index(c) for c in columns] |
||
1218 | nattrs = len(self.domain.attributes) |
||
1219 | Xs = any(0 <= c < nattrs for c in columns) and bn.stats(self.X, W) |
||
1220 | Ys = any(c >= nattrs for c in columns) and bn.stats(self._Y, W) |
||
1221 | ms = any(c < 0 for c in columns) and bn.stats(self.metas, W) |
||
1222 | for column in columns: |
||
1223 | if 0 <= column < nattrs: |
||
1224 | stats.append(Xs[column, :]) |
||
1225 | elif column >= nattrs: |
||
1226 | stats.append(Ys[column - nattrs, :]) |
||
1227 | else: |
||
1228 | stats.append(ms[-1 - column]) |
||
1229 | return stats |
||
1230 | |||
1231 | def _compute_distributions(self, columns=None): |
||
1232 | def _get_matrix(M, cachedM, col): |
||
1233 | nonlocal single_column |
||
1234 | if not sp.issparse(M): |
||
1235 | return M[:, col], self.W if self.has_weights() else None, None |
||
1236 | if cachedM is None: |
||
1237 | if single_column: |
||
1238 | warn(ResourceWarning, |
||
1239 | "computing distributions on sparse data " |
||
1240 | "for a single column is inefficient") |
||
1241 | cachedM = sp.csc_matrix(self.X) |
||
1242 | data = cachedM.data[cachedM.indptr[col]:cachedM.indptr[col + 1]] |
||
1243 | if self.has_weights(): |
||
1244 | weights = self.W[ |
||
1245 | cachedM.indices[cachedM.indptr[col]:cachedM.indptr[col + 1]]] |
||
1246 | else: |
||
1247 | weights = None |
||
1248 | return data, weights, cachedM |
||
1249 | |||
1250 | if columns is None: |
||
1251 | columns = range(len(self.domain.variables)) |
||
1252 | single_column = False |
||
1253 | else: |
||
1254 | columns = [self.domain.index(var) for var in columns] |
||
1255 | single_column = len(columns) == 1 and len(self.domain) > 1 |
||
1256 | distributions = [] |
||
1257 | Xcsc = Ycsc = None |
||
1258 | for col in columns: |
||
1259 | var = self.domain[col] |
||
1260 | if col < self.X.shape[1]: |
||
1261 | m, W, Xcsc = _get_matrix(self.X, Xcsc, col) |
||
1262 | else: |
||
1263 | m, W, Ycsc = _get_matrix(self._Y, Ycsc, col - self.X.shape[1]) |
||
1264 | if var.is_discrete: |
||
1265 | if W is not None: |
||
1266 | W = W.ravel() |
||
1267 | dist, unknowns = bn.bincount(m, len(var.values) - 1, W) |
||
1268 | elif not len(m): |
||
1269 | dist, unknowns = np.zeros((2, 0)), 0 |
||
1270 | else: |
||
1271 | if W is not None: |
||
1272 | ranks = np.argsort(m) |
||
1273 | vals = np.vstack((m[ranks], W[ranks].flatten())) |
||
1274 | unknowns = bn.countnans(m, W) |
||
1275 | else: |
||
1276 | vals = np.ones((2, m.shape[0])) |
||
1277 | vals[0, :] = m |
||
1278 | vals[0, :].sort() |
||
1279 | unknowns = bn.countnans(m) |
||
1280 | dist = np.array(_valuecount.valuecount(vals)) |
||
1281 | distributions.append((dist, unknowns)) |
||
1282 | |||
1283 | return distributions |
||
1284 | |||
1285 | def _compute_contingency(self, col_vars=None, row_var=None): |
||
1286 | n_atts = self.X.shape[1] |
||
1287 | |||
1288 | if col_vars is None: |
||
1289 | col_vars = range(len(self.domain.variables)) |
||
1290 | single_column = False |
||
0 ignored issues
–
show
|
|||
1291 | else: |
||
1292 | col_vars = [self.domain.index(var) for var in col_vars] |
||
1293 | single_column = len(col_vars) == 1 and len(self.domain) > 1 |
||
1294 | if row_var is None: |
||
1295 | row_var = self.domain.class_var |
||
1296 | if row_var is None: |
||
1297 | raise ValueError("No row variable") |
||
1298 | |||
1299 | row_desc = self.domain[row_var] |
||
1300 | if not row_desc.is_discrete: |
||
1301 | raise TypeError("Row variable must be discrete") |
||
1302 | row_indi = self.domain.index(row_var) |
||
1303 | n_rows = len(row_desc.values) |
||
1304 | if 0 <= row_indi < n_atts: |
||
1305 | row_data = self.X[:, row_indi] |
||
1306 | elif row_indi < 0: |
||
1307 | row_data = self.metas[:, -1 - row_indi] |
||
1308 | else: |
||
1309 | row_data = self._Y[:, row_indi - n_atts] |
||
1310 | |||
1311 | W = self.W if self.has_weights() else None |
||
1312 | nan_inds = None |
||
1313 | |||
1314 | col_desc = [self.domain[var] for var in col_vars] |
||
1315 | col_indi = [self.domain.index(var) for var in col_vars] |
||
1316 | |||
1317 | if any(not (var.is_discrete or var.is_continuous) |
||
1318 | for var in col_desc): |
||
1319 | raise ValueError("contingency can be computed only for discrete " |
||
1320 | "and continuous values") |
||
1321 | |||
1322 | if row_data.dtype.kind != "f": #meta attributes can be stored as type object |
||
1323 | row_data = row_data.astype(float) |
||
1324 | |||
1325 | unknown_rows = bn.countnans(row_data) |
||
1326 | if unknown_rows: |
||
1327 | nan_inds = np.isnan(row_data) |
||
1328 | row_data = row_data[~nan_inds] |
||
1329 | if W: |
||
1330 | W = W[~nan_inds] |
||
1331 | unknown_rows = np.sum(W[nan_inds]) |
||
1332 | |||
1333 | contingencies = [None] * len(col_desc) |
||
1334 | for arr, f_cond, f_ind in ( |
||
1335 | (self.X, lambda i: 0 <= i < n_atts, lambda i: i), |
||
1336 | (self._Y, lambda i: i >= n_atts, lambda i: i - n_atts), |
||
1337 | (self.metas, lambda i: i < 0, lambda i: -1 - i)): |
||
1338 | |||
1339 | if nan_inds is not None: |
||
1340 | arr = arr[~nan_inds] |
||
1341 | |||
1342 | arr_indi = [e for e, ind in enumerate(col_indi) if f_cond(ind)] |
||
1343 | |||
1344 | vars = [(e, f_ind(col_indi[e]), col_desc[e]) for e in arr_indi] |
||
0 ignored issues
–
show
|
|||
1345 | disc_vars = [v for v in vars if v[2].is_discrete] |
||
1346 | if disc_vars: |
||
1347 | if sp.issparse(arr): |
||
1348 | max_vals = max(len(v[2].values) for v in disc_vars) |
||
1349 | disc_indi = {i for _, i, _ in disc_vars} |
||
1350 | mask = [i in disc_indi for i in range(arr.shape[1])] |
||
1351 | conts, nans = bn.contingency(arr, row_data, max_vals - 1, |
||
1352 | n_rows - 1, W, mask) |
||
1353 | for col_i, arr_i, _ in disc_vars: |
||
1354 | contingencies[col_i] = (conts[arr_i], nans[arr_i]) |
||
1355 | else: |
||
1356 | for col_i, arr_i, var in disc_vars: |
||
1357 | contingencies[col_i] = bn.contingency(arr[:, arr_i], |
||
1358 | row_data, len(var.values) - 1, n_rows - 1, W) |
||
1359 | |||
1360 | cont_vars = [v for v in vars if v[2].is_continuous] |
||
1361 | if cont_vars: |
||
1362 | |||
1363 | classes = row_data.astype(dtype=np.int8) |
||
1364 | if W is not None: |
||
1365 | W = W.astype(dtype=np.float64) |
||
1366 | if sp.issparse(arr): |
||
1367 | arr = sp.csc_matrix(arr) |
||
1368 | |||
1369 | for col_i, arr_i, _ in cont_vars: |
||
1370 | if sp.issparse(arr): |
||
1371 | col_data = arr.data[arr.indptr[arr_i]: |
||
1372 | arr.indptr[arr_i + 1]] |
||
1373 | rows = arr.indices[arr.indptr[arr_i]: |
||
1374 | arr.indptr[arr_i + 1]] |
||
1375 | W_ = None if W is None else W[rows] |
||
1376 | classes_ = classes[rows] |
||
1377 | else: |
||
1378 | col_data, W_, classes_ = arr[:, arr_i], W, classes |
||
1379 | |||
1380 | col_data = col_data.astype(dtype=np.float64) |
||
1381 | U, C, unknown = _contingency.contingency_floatarray( |
||
1382 | col_data, classes_, n_rows, W_) |
||
1383 | contingencies[col_i] = ([U, C], unknown) |
||
1384 | |||
1385 | return contingencies, unknown_rows |
||
1386 | |||
1387 | |||
1388 | def _check_arrays(*arrays, dtype=None): |
||
1389 | checked = [] |
||
1390 | if not len(arrays): |
||
1391 | return checked |
||
1392 | |||
1393 | def ninstances(array): |
||
1394 | if hasattr(array, "shape"): |
||
1395 | return array.shape[0] |
||
1396 | else: |
||
1397 | return len(array) if array is not None else 0 |
||
1398 | |||
1399 | shape_1 = ninstances(arrays[0]) |
||
1400 | |||
1401 | for array in arrays: |
||
1402 | if array is None: |
||
1403 | checked.append(array) |
||
1404 | continue |
||
1405 | |||
1406 | if ninstances(array) != shape_1: |
||
1407 | raise ValueError("Leading dimension mismatch (%d != %d)" |
||
1408 | % (len(array), shape_1)) |
||
1409 | |||
1410 | if sp.issparse(array): |
||
1411 | array.data = np.asarray(array.data) |
||
1412 | has_inf = _check_inf(array.data) |
||
1413 | else: |
||
1414 | if dtype is not None: |
||
1415 | array = np.asarray(array, dtype=dtype) |
||
1416 | else: |
||
1417 | array = np.asarray(array) |
||
1418 | has_inf = _check_inf(array) |
||
1419 | |||
1420 | if has_inf: |
||
1421 | raise ValueError("Array contains infinity.") |
||
1422 | checked.append(array) |
||
1423 | |||
1424 | return checked |
||
1425 | |||
1426 | |||
1427 | def _check_inf(array): |
||
1428 | return array.dtype.char in np.typecodes['AllFloat'] and \ |
||
1429 | np.isinf(array.data).any() |
||
1430 | |||
1431 | |||
1432 | def _subarray(arr, rows, cols): |
||
1433 | return arr[_rxc_ix(rows, cols)] |
||
1434 | |||
1435 | |||
1436 | def _rxc_ix(rows, cols): |
||
1437 | """ |
||
1438 | Construct an index object to index the `rows` x `cols` cross product. |
||
1439 | |||
1440 | Rows and columns can be a 1d bool or int sequence, a slice or an |
||
1441 | Ellipsis (`...`). The later is a convenience and is interpreted the same |
||
1442 | as `slice(None, None, -1)` |
||
1443 | |||
1444 | Parameters |
||
1445 | ---------- |
||
1446 | rows : 1D sequence, slice or Ellipsis |
||
1447 | Row indices. |
||
1448 | cols : 1D sequence, slice or Ellipsis |
||
1449 | Column indices. |
||
1450 | |||
1451 | See Also |
||
1452 | -------- |
||
1453 | numpy.ix_ |
||
1454 | |||
1455 | Examples |
||
1456 | -------- |
||
1457 | >>> a = np.arange(10).reshape(2, 5) |
||
1458 | >>> a[_rxc_ix([0, 1], [3, 4])] |
||
1459 | array([[3, 4], |
||
1460 | [8, 9]]) |
||
1461 | >>> a[_rxc_ix([False, True], ...)] |
||
1462 | array([[5, 6, 7, 8, 9]]) |
||
1463 | |||
1464 | """ |
||
1465 | rows = slice(None, None, 1) if rows is ... else rows |
||
1466 | cols = slice(None, None, 1) if cols is ... else cols |
||
1467 | |||
1468 | isslice = (isinstance(rows, slice), isinstance(cols, slice)) |
||
1469 | if isslice == (True, True): |
||
1470 | return rows, cols |
||
1471 | elif isslice == (True, False): |
||
1472 | return rows, np.asarray(np.ix_(cols), int).ravel() |
||
1473 | elif isslice == (False, True): |
||
1474 | return np.asarray(np.ix_(rows), int).ravel(), cols |
||
1475 | else: |
||
1476 | r, c = np.ix_(rows, cols) |
||
1477 | return np.asarray(r, int), np.asarray(c, int) |
||
1478 |
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.