GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.

Issues (4082)

Orange/data/table.py (61 issues)

1
import os
2
import zlib
3
from collections import MutableSequence, Iterable, Sequence, Sized
4
from itertools import chain
5
from numbers import Real, Integral
6
import operator
7
from functools import reduce
8
from warnings import warn
9
from threading import Lock
10
import tempfile
11
import urllib.parse
12
import urllib.request
13
14
import bottlechest as bn
0 ignored issues
show
The import bottlechest could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
15
from scipy import sparse as sp
0 ignored issues
show
The import scipy could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
16
17
from .instance import *
0 ignored issues
show
The usage of wildcard imports like instance should generally be avoided.
Loading history...
isnan was imported with wildcard, but is not used.
Loading history...
18
from Orange.util import flatten
19
from Orange.data import (Domain, io, Variable, StringVariable)
20
from Orange.data.storage import Storage
21
from . import _contingency
0 ignored issues
show
The name _contingency does not seem to exist in module Orange.data.
Loading history...
22
from . import _valuecount
0 ignored issues
show
The name _valuecount does not seem to exist in module Orange.data.
Loading history...
23
24
25
def get_sample_datasets_dir():
26
    orange_data_table = os.path.dirname(__file__)
27
    dataset_dir = os.path.join(orange_data_table, '..', 'datasets')
28
    return os.path.realpath(dataset_dir)
29
30
31
dataset_dirs = ['', get_sample_datasets_dir()]
32
33
34
class RowInstance(Instance):
35
    sparse_x = None
36
    sparse_y = None
37
    sparse_metas = None
38
    _weight = None
39
40
    def __init__(self, table, row_index):
0 ignored issues
show
The __init__ method of the super-class Instance is not called.

It is generally advisable to initialize the super-class by calling its __init__ method:

class SomeParent:
    def __init__(self):
        self.x = 1

class SomeChild(SomeParent):
    def __init__(self):
        # Initialize the super class
        SomeParent.__init__(self)
Loading history...
41
        """
42
        Construct a data instance representing the given row of the table.
43
        """
44
        self.table = table
45
        self._domain = table.domain
46
        self.row_index = row_index
47
        self.id = table.ids[row_index]
48
        self._x = table.X[row_index]
49
        if sp.issparse(self._x):
50
            self.sparse_x = self._x
51
            self._x = np.asarray(self._x.todense())[0]
52
        self._y = table._Y[row_index]
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _Y was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
53
        if sp.issparse(self._y):
54
            self.sparse_y = self._y
55
            self._y = np.asarray(self._y.todense())[0]
56
        self._metas = table.metas[row_index]
57
        if sp.issparse(self._metas):
58
            self.sparse_metas = self._metas
59
            self._metas = np.asarray(self._metas.todense())[0]
60
61
    @property
62
    def weight(self):
63
        if not self.table.has_weights():
64
            return 1
65
        return self.table.W[self.row_index]
66
67
    @weight.setter
68
    def weight(self, weight):
0 ignored issues
show
Arguments number differs from overridden 'weight' method
Loading history...
69
        if not self.table.has_weights():
70
            self.table.set_weights()
71
        self.table.W[self.row_index] = weight
72
73
    def set_class(self, value):
74
        self._check_single_class()
75
        if not isinstance(value, Real):
76
            value = self.table.domain.class_var.to_val(value)
77
        self._y[0] = value
78
        if self.sparse_y:
79
            self.table._Y[self.row_index, 0] = value
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _Y was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
80
81
    def __setitem__(self, key, value):
82
        if not isinstance(key, Integral):
83
            key = self._domain.index(key)
84
        if isinstance(value, str):
85
            var = self._domain[key]
86
            value = var.to_val(value)
87
        if key >= 0:
88
            if not isinstance(value, Real):
89
                raise TypeError("Expected primitive value, got '%s'" %
90
                                type(value).__name__)
91
            if key < len(self._x):
92
                self._x[key] = value
93
                if self.sparse_x:
94
                    self.table.X[self.row_index, key] = value
95
            else:
96
                self._y[key - len(self._x)] = value
97
                if self.sparse_y:
98
                    self.table._Y[self.row_index, key - len(self._x)] = value
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _Y was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
99
        else:
100
            self._metas[-1 - key] = value
101
            if self.sparse_metas:
102
                self.table.metas[self.row_index, -1 - key] = value
103
104
    def _str(self, limit):
105
        def sp_values(matrix, variables):
106
            if not sp.issparse(matrix):
107
                return Instance.str_values(matrix[row], variables, limit)
108
            begptr, endptr = matrix.indptr[row:row + 2]
109
            rendptr = endptr if not limit else min(endptr, begptr + 5)
110
            variables = [variables[var]
111
                         for var in matrix.indices[begptr:rendptr]]
112
            s = ", ".join(
113
                "{}={}".format(var.name, var.str_val(val))
114
                for var, val in zip(variables, matrix.data[begptr:rendptr]))
115
            if limit and rendptr != endptr:
116
                s += ", ..."
117
            return s
118
119
        table = self.table
120
        domain = table.domain
121
        row = self.row_index
122
        s = "[" + sp_values(table.X, domain.attributes)
123
        if domain.class_vars:
124
            s += " | " + sp_values(table._Y, domain.class_vars)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _Y was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
125
        s += "]"
126
        if self._domain.metas:
127
            s += " {" + sp_values(table.metas, domain.metas) + "}"
128
        return s
129
130
    def __str__(self):
131
        return self._str(False)
132
133
    def __repr__(self):
134
        return self._str(True)
135
136
137
class Columns:
138
    def __init__(self, domain):
139
        for v in chain(domain, domain.metas):
140
            setattr(self, v.name.replace(" ", "_"), v)
141
142
143
# noinspection PyPep8Naming
144
class Table(MutableSequence, Storage):
145
    __file__ = None
146
147
    @property
148
    def columns(self):
149
        """
150
        A class whose attributes contain attribute descriptors for columns.
151
        For a table `table`, setting `c = table.columns` will allow accessing
152
        the table's variables with, for instance `c.gender`, `c.age` ets.
153
        Spaces are replaced with underscores.
154
        """
155
        return Columns(self.domain)
156
157
    _next_instance_id = 0
158
    _next_instance_lock = Lock()
159
160
    @property
161
    def Y(self):
162
        if self._Y.shape[1] == 1:
163
            return self._Y[:, 0]
164
        return self._Y
165
166
    @Y.setter
167
    def Y(self, value):
168
        if len(value.shape) == 1:
169
            value = value[:, None]
170
        self._Y = value
0 ignored issues
show
The attribute _Y was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
171
172
    def __new__(cls, *args, **kwargs):
173
        if not args and not kwargs:
174
            return super().__new__(cls)
175
176
        if 'filename' in kwargs:
177
            args = [kwargs.pop('filename')]
178
179
        if not args:
180
            raise TypeError(
181
                "Table takes at least 1 positional argument (0 given))")
182
183
        if isinstance(args[0], str):
184
            if args[0].startswith('https://') or args[0].startswith('http://'):
185
                return cls.from_url(args[0], **kwargs)
186
            else:
187
                return cls.from_file(args[0], **kwargs)
188
        elif isinstance(args[0], Table):
189
            return cls.from_table(args[0].domain, args[0])
190
        elif isinstance(args[0], Domain):
191
            domain, args = args[0], args[1:]
192
            if not args:
193
                return cls.from_domain(domain, **kwargs)
194
            if isinstance(args[0], Table):
195
                return cls.from_table(domain, *args)
196
            elif isinstance(args[0], list):
197
                return cls.from_list(domain, *args)
198
        else:
199
            domain = None
200
201
        return cls.from_numpy(domain, *args, **kwargs)
202
203
    @classmethod
204
    def from_domain(cls, domain, n_rows=0, weights=False):
205
        """
206
        Construct a new `Table` with the given number of rows for the given
207
        domain. The optional vector of weights is initialized to 1's.
208
209
        :param domain: domain for the `Table`
210
        :type domain: Orange.data.Domain
211
        :param n_rows: number of rows in the new table
212
        :type n_rows: int
213
        :param weights: indicates whether to construct a vector of weights
214
        :type weights: bool
215
        :return: a new table
216
        :rtype: Orange.data.Table
217
        """
218
        #self = cls.__new__(Table)
219
        self = cls()
220
        self.domain = domain
221
        self.n_rows = n_rows
222
        self.X = np.zeros((n_rows, len(domain.attributes)))
223
        self.Y = np.zeros((n_rows, len(domain.class_vars)))
224
        if weights:
225
            self.W = np.ones(n_rows)
226
        else:
227
            self.W = np.empty((n_rows, 0))
228
        self.metas = np.empty((n_rows, len(self.domain.metas)), object)
229
        cls._init_ids(self)
230
        return self
231
232
    conversion_cache = None
233
234
    @classmethod
235
    def from_table(cls, domain, source, row_indices=...):
236
        """
237
        Create a new table from selected columns and/or rows of an existing
238
        one. The columns are chosen using a domain. The domain may also include
239
        variables that do not appear in the source table; they are computed
240
        from source variables if possible.
241
242
        The resulting data may be a view or a copy of the existing data.
243
244
        :param domain: the domain for the new table
245
        :type domain: Orange.data.Domain
246
        :param source: the source table
247
        :type source: Orange.data.Table
248
        :param row_indices: indices of the rows to include
249
        :type row_indices: a slice or a sequence
250
        :return: a new table
251
        :rtype: Orange.data.Table
252
        """
253
254
        def get_columns(row_indices, src_cols, n_rows, dtype=np.float64):
255
            if not len(src_cols):
256
                return np.zeros((n_rows, 0), dtype=source.X.dtype)
257
258
            n_src_attrs = len(source.domain.attributes)
259
            if all(isinstance(x, Integral) and 0 <= x < n_src_attrs
260
                   for x in src_cols):
261
                return _subarray(source.X, row_indices, src_cols)
262
            if all(isinstance(x, Integral) and x < 0 for x in src_cols):
263
                arr = _subarray(source.metas, row_indices,
264
                                 [-1 - x for x in src_cols])
265
                if arr.dtype != dtype:
266
                    return arr.astype(dtype)
267
                return arr
268
            if all(isinstance(x, Integral) and x >= n_src_attrs
269
                   for x in src_cols):
270
                return _subarray(source._Y, row_indices,
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _Y was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
271
                                 [x - n_src_attrs for x in src_cols])
272
273
            a = np.empty((n_rows, len(src_cols)), dtype=dtype)
274
            for i, col in enumerate(src_cols):
275
                if col is None:
276
                    a[:, i] = Unknown
277
                elif not isinstance(col, Integral):
278
                    if row_indices is not ...:
279
                        a[:, i] = col(source)[row_indices]
280
                    else:
281
                        a[:, i] = col(source)
282
                elif col < 0:
283
                    a[:, i] = source.metas[row_indices, -1 - col]
284
                elif col < n_src_attrs:
285
                    a[:, i] = source.X[row_indices, col]
286
                else:
287
                    a[:, i] = source._Y[row_indices, col - n_src_attrs]
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _Y was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
288
            return a
289
290
        new_cache = Table.conversion_cache is None
291
        try:
292
            if new_cache:
293
                Table.conversion_cache = {}
294
            else:
295
                cached = Table.conversion_cache.get((id(domain), id(source)))
296
                if cached:
297
                    return cached
298
            if domain == source.domain:
299
                return cls.from_table_rows(source, row_indices)
300
301
            if isinstance(row_indices, slice):
302
                start, stop, stride = row_indices.indices(source.X.shape[0])
303
                n_rows = (stop - start) // stride
304
                if n_rows < 0:
305
                    n_rows = 0
306
            elif row_indices is ...:
307
                n_rows = len(source.X)
308
            else:
309
                n_rows = len(row_indices)
310
311
            #self = cls.__new__(Table)
312
            #self = cls.__new__(cls)
313
            self = cls()
314
            self.domain = domain
315
            conversion = domain.get_conversion(source.domain)
316
            self.X = get_columns(row_indices, conversion.attributes, n_rows)
317
            if self.X.ndim == 1:
318
                self.X = self.X.reshape(-1, len(self.domain.attributes))
319
            self.Y = get_columns(row_indices, conversion.class_vars, n_rows)
320
321
            dtype = np.float64
322
            if any(isinstance(var, StringVariable) for var in domain.metas):
323
                dtype = np.object
324
            self.metas = get_columns(row_indices, conversion.metas,
325
                                     n_rows, dtype)
326
            if self.metas.ndim == 1:
327
                self.metas = self.metas.reshape(-1, len(self.domain.metas))
328
            if source.has_weights():
329
                self.W = np.array(source.W[row_indices])
330
            else:
331
                self.W = np.empty((n_rows, 0))
332
            self.name = getattr(source, 'name', '')
333
            if hasattr(source, 'ids'):
334
                self.ids = np.array(source.ids[row_indices])
335
            else:
336
                cls._init_ids(self)
337
            Table.conversion_cache[(id(domain), id(source))] = self
338
            return self
339
        finally:
340
            if new_cache:
341
                Table.conversion_cache = None
342
343
    @classmethod
344
    def from_table_rows(cls, source, row_indices):
345
        """
346
        Construct a new table by selecting rows from the source table.
347
348
        :param source: an existing table
349
        :type source: Orange.data.Table
350
        :param row_indices: indices of the rows to include
351
        :type row_indices: a slice or a sequence
352
        :return: a new table
353
        :rtype: Orange.data.Table
354
        """
355
        #self = cls.__new__(Table)
356
        self = cls()
357
        self.domain = source.domain
358
        self.X = source.X[row_indices]
0 ignored issues
show
The attribute X was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
359
        if self.X.ndim == 1:
360
            self.X = self.X.reshape(-1, len(self.domain.attributes))
0 ignored issues
show
The attribute X was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
361
        self.Y = source._Y[row_indices]
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _Y was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
362
        self.metas = source.metas[row_indices]
0 ignored issues
show
The attribute metas was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
363
        if self.metas.ndim == 1:
364
            self.metas = self.metas.reshape(-1, len(self.domain.metas))
0 ignored issues
show
The attribute metas was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
365
        self.W = source.W[row_indices]
0 ignored issues
show
The attribute W was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
366
        self.name = getattr(source, 'name', '')
367
        self.ids = np.array(source.ids[row_indices])
0 ignored issues
show
The attribute ids was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
368
        return self
369
370
    @classmethod
371
    def from_numpy(cls, domain, X, Y=None, metas=None, W=None):
372
        """
373
        Construct a table from numpy arrays with the given domain. The number
374
        of variables in the domain must match the number of columns in the
375
        corresponding arrays. All arrays must have the same number of rows.
376
        Arrays may be of different numpy types, and may be dense or sparse.
377
378
        :param domain: the domain for the new table
379
        :type domain: Orange.data.Domain
380
        :param X: array with attribute values
381
        :type X: np.array
382
        :param Y: array with class values
383
        :type Y: np.array
384
        :param metas: array with meta attributes
385
        :type metas: np.array
386
        :param W: array with weights
387
        :type W: np.array
388
        :return:
389
        """
390
        X, Y, W = _check_arrays(X, Y, W, dtype='float64')
391
        metas, = _check_arrays(metas)
392
393
        if Y is not None and Y.ndim == 1:
394
            Y = Y.reshape(Y.shape[0], 1)
395
        if domain is None:
396
            domain = Domain.from_numpy(X, Y, metas)
397
398
        if Y is None:
399
            if sp.issparse(X):
400
                Y = np.empty((X.shape[0], 0), object)
401
            else:
402
                Y = X[:, len(domain.attributes):]
403
                X = X[:, :len(domain.attributes)]
404
        if metas is None:
405
            metas = np.empty((X.shape[0], 0), object)
406
        if W is None or W.size == 0:
407
            W = np.empty((X.shape[0], 0))
408
        else:
409
            W = W.reshape(W.size, 1)
410
411
        if X.shape[1] != len(domain.attributes):
412
            raise ValueError(
413
                "Invalid number of variable columns ({} != {})".format(
414
                    X.shape[1], len(domain.attributes))
415
            )
416
        if Y.shape[1] != len(domain.class_vars):
417
            raise ValueError(
418
                "Invalid number of class columns ({} != {})".format(
419
                    Y.shape[1], len(domain.class_vars))
420
            )
421
        if metas.shape[1] != len(domain.metas):
422
            raise ValueError(
423
                "Invalid number of meta attribute columns ({} != {})".format(
424
                    metas.shape[1], len(domain.metas))
425
            )
426
        if not X.shape[0] == Y.shape[0] == metas.shape[0] == W.shape[0]:
427
            raise ValueError(
428
                "Parts of data contain different numbers of rows.")
429
430
        #self = Table.__new__(Table)
431
        self = cls()
432
        self.domain = domain
433
        self.X = X
434
        self.Y = Y
435
        self.metas = metas
436
        self.W = W
437
        self.n_rows = self.X.shape[0]
438
        cls._init_ids(self)
439
        return self
440
441
    @classmethod
442
    def from_list(cls, domain, rows, weights=None):
443
        if weights is not None and len(rows) != len(weights):
444
            raise ValueError("mismatching number of instances and weights")
445
        self = cls.from_domain(domain, len(rows), weights is not None)
446
        attrs, classes = domain.attributes, domain.class_vars
447
        metas = domain.metas
448
        nattrs, ncls = len(domain.attributes), len(domain.class_vars)
449
        for i, row in enumerate(rows):
450
            if isinstance(row, Instance):
451
                row = row.list
452
            for j, (var, val) in enumerate(zip(attrs, row)):
453
                self.X[i, j] = var.to_val(val)
454
            for j, (var, val) in enumerate(zip(classes, row[nattrs:])):
455
                self._Y[i, j] = var.to_val(val)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _Y was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
456
            for j, (var, val) in enumerate(zip(metas, row[nattrs + ncls:])):
457
                self.metas[i, j] = var.to_val(val)
458
        if weights is not None:
459
            self.W = np.array(weights)
460
        return self
461
462
    @classmethod
463
    def _init_ids(cls, obj):
464
        with cls._next_instance_lock:
465
            obj.ids = np.array(range(cls._next_instance_id, cls._next_instance_id + obj.X.shape[0]))
466
            cls._next_instance_id += obj.X.shape[0]
467
468
    @classmethod
469
    def new_id(cls):
470
        with cls._next_instance_lock:
471
            id = cls._next_instance_id
0 ignored issues
show
Bug Best Practice introduced by
This seems to re-define the built-in id.

It is generally discouraged to redefine built-ins as this makes code very hard to read.

Loading history...
472
            cls._next_instance_id += 1
473
            return id
474
475
    FILE_FORMATS = {
476
        ".tab": (io.TabDelimFormat, )
477
    }
478
479
    def save(self, filename):
480
        """
481
        Save a data table to a file. The path can be absolute or relative.
482
483
        :param filename: File name
484
        :type filename: str
485
        """
486
        ext = os.path.splitext(filename)[1]
487
        writer = io.FileFormats.writers.get(ext)
488
        if not writer:
489
            desc = io.FileFormats.names.get(ext)
490
            if desc:
491
                raise IOError("Writing of {}s is not supported".
492
                    format(desc.lower()))
493
            else:
494
                raise IOError("Unknown file name extension.")
495
        writer().write_file(filename, self)
496
497
    @classmethod
498
    def from_file(cls, filename):
499
        """
500
        Read a data table from a file. The path can be absolute or relative.
501
502
        :param filename: File name
503
        :type filename: str
504
        :return: a new data table
505
        :rtype: Orange.data.Table
506
        """
507
        for dir in dataset_dirs:
0 ignored issues
show
Bug Best Practice introduced by
This seems to re-define the built-in dir.

It is generally discouraged to redefine built-ins as this makes code very hard to read.

Loading history...
508
            ext = os.path.splitext(filename)[1]
509
            absolute_filename = os.path.join(dir, filename)
510
            if not ext:
511
                for ext in io.FileFormats.readers:
512
                    if os.path.exists(absolute_filename + ext):
513
                        absolute_filename += ext
514
                        break
515
            if os.path.exists(absolute_filename):
516
                break
517
        else:
518
            absolute_filename = ext = ""
519
520
        if not os.path.exists(absolute_filename):
521
            raise IOError('File "{}" was not found.'.format(filename))
522
        reader = io.FileFormats.readers.get(ext)
523
        if not reader:
524
            desc = io.FileFormats.names.get(ext)
525
            if desc:
526
                raise IOError("Reading {}s is not supported".
527
                    format(desc.lower()))
528
            else:
529
                raise IOError("Unknown file name extension.")
530
        data = reader().read_file(absolute_filename, cls)
531
        data.name = os.path.splitext(os.path.split(filename)[-1])[0]
532
        # no need to call _init_ids as fuctions from .io already
533
        # construct a table with .ids
534
535
        data.__file__ = absolute_filename
536
        return data
537
538
    @classmethod
539
    def from_url(cls, url):
540
        name = os.path.basename(urllib.parse.urlparse(url)[2])
541
        f = tempfile.NamedTemporaryFile(suffix=name, delete=False)
542
        fname = f.name
543
        f.close()
544
        urllib.request.urlretrieve(url, fname)
545
        data = cls.from_file(f.name)
546
        os.remove(fname)
547
        return data
548
549
    # Helper function for __setitem__ and insert:
550
    # Set the row of table data matrices
551
    # noinspection PyProtectedMember
552
    def _set_row(self, example, row):
553
        domain = self.domain
554
        if isinstance(example, Instance):
555
            if example.domain == domain:
556
                if isinstance(example, RowInstance):
557
                    self.X[row] = example._x
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _x was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
558
                    self._Y[row] = example._y
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _y was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
559
                else:
560
                    self.X[row] = example._x
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _x was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
561
                    self._Y[row] = example._y
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _y was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
562
                self.metas[row] = example._metas
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _metas was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
563
                return
564
            c = self.domain.get_conversion(example.domain)
0 ignored issues
show
The variable c seems to be unused.
Loading history...
565
566
            self.X[row], self._Y[row], self.metas[row] = \
567
                self.domain.convert(example)
568
            try:
569
                self.ids[row] = example.id
570
            except:
0 ignored issues
show
Coding Style Best Practice introduced by
General except handlers without types should be used sparingly.

Typically, you would use general except handlers when you intend to specifically handle all types of errors, f.e. when logging. Otherwise, such general error handlers can mask errors in your application that you want to know of.

Loading history...
571
                with type(self)._next_instance_lock:
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _next_instance_lock was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
572
                    self.ids[row] = type(self)._next_instance_id
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _next_instance_id was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
573
                    type(self)._next_instance_id += 1
574
575
        else:
576
            self.X[row] = [var.to_val(val)
577
                           for var, val in zip(domain.attributes, example)]
578
            self._Y[row] = [var.to_val(val)
579
                            for var, val in
580
                            zip(domain.class_vars,
581
                                example[len(domain.attributes):])]
582
            self.metas[row] = np.array([var.Unknown for var in domain.metas],
583
                                       dtype=object)
584
585
    def _check_all_dense(self):
586
        return all(x in (Storage.DENSE, Storage.MISSING)
587
                   for x in (self.X_density(), self.Y_density(),
588
                             self.metas_density()))
589
590
    # A helper function for extend and insert
591
    # Resize X, Y, metas and W.
592
    def _resize_all(self, new_length):
593
        old_length = self.X.shape[0]
594
        if old_length == new_length:
595
            return
596
        if not self._check_all_dense():
597
            raise ValueError("Tables with sparse data cannot be resized")
598
        try:
599
            self.X.resize(new_length, self.X.shape[1], refcheck=False)
600
            self._Y.resize(new_length, self._Y.shape[1], refcheck=False)
601
            self.metas.resize(new_length, self.metas.shape[1], refcheck=False)
602
            if self.W.ndim == 2:
603
                self.W.resize((new_length, 0), refcheck=False)
604
            else:
605
                self.W.resize(new_length, refcheck=False)
606
            self.ids.resize(new_length, refcheck=False)
607
        except Exception:
608
            if self.X.shape[0] == new_length:
609
                self.X.resize(old_length, self.X.shape[1], refcheck=False)
610
            if self._Y.shape[0] == new_length:
611
                self._Y.resize(old_length, self._Y.shape[1], refcheck=False)
612
            if self.metas.shape[0] == new_length:
613
                self.metas.resize(old_length, self.metas.shape[1], refcheck=False)
614
            if self.W.shape[0] == new_length:
615
                if self.W.ndim == 2:
616
                    self.W.resize((old_length, 0), refcheck=False)
617
                else:
618
                    self.W.resize(old_length, refcheck=False)
619
            if self.ids.shape[0] == new_length:
620
                self.ids.resize(old_length, refcheck=False)
621
            raise
622
623
    def __getitem__(self, key):
624
        if isinstance(key, Integral):
625
            return RowInstance(self, key)
626
        if not isinstance(key, tuple):
627
            return Table.from_table_rows(self, key)
628
629
        if len(key) != 2:
630
            raise IndexError("Table indices must be one- or two-dimensional")
631
632
        row_idx, col_idx = key
633
        if isinstance(row_idx, Integral):
634
            if isinstance(col_idx, (str, Integral, Variable)):
635
                col_idx = self.domain.index(col_idx)
636
                var = self.domain[col_idx]
637
                if 0 <= col_idx < len(self.domain.attributes):
638
                    return Value(var, self.X[row_idx, col_idx])
639
                elif col_idx >= len(self.domain.attributes):
640
                    return Value(
641
                        var,
642
                        self._Y[row_idx,
643
                                col_idx - len(self.domain.attributes)])
644
                elif col_idx < 0:
645
                    return Value(var, self.metas[row_idx, -1 - col_idx])
646
            else:
647
                row_idx = [row_idx]
648
649
        # multiple rows OR single row but multiple columns:
650
        # construct a new table
651
        attributes, col_indices = self.domain._compute_col_indices(col_idx)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _compute_col_indices was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
652
        if attributes is not None:
653
            n_attrs = len(self.domain.attributes)
654
            r_attrs = [attributes[i]
655
                       for i, col in enumerate(col_indices)
656
                       if 0 <= col < n_attrs]
657
            r_classes = [attributes[i]
658
                         for i, col in enumerate(col_indices)
659
                         if col >= n_attrs]
660
            r_metas = [attributes[i]
661
                       for i, col in enumerate(col_indices) if col < 0]
662
            domain = Domain(r_attrs, r_classes, r_metas)
663
        else:
664
            domain = self.domain
665
        return self.__class__.from_table(domain, self, row_idx)
666
667
    def __setitem__(self, key, value):
668
        if not self._check_all_dense():
669
            raise ValueError(
670
                "Assignment to rows of sparse data is not supported")
671
        if not isinstance(key, tuple):
672
            if isinstance(value, Real):
673
                self.X[key, :] = value
674
                return
675
            self._set_row(value, key)
676
            return
677
678
        if len(key) != 2:
679
            raise IndexError("Table indices must be one- or two-dimensional")
680
        row_idx, col_idx = key
681
682
        # single row
683
        if isinstance(row_idx, Integral):
684
            if isinstance(col_idx, slice):
685
                col_idx = range(*slice.indices(col_idx, self.X.shape[1]))
686
            if not isinstance(col_idx, str) and isinstance(col_idx, Iterable):
687
                col_idx = list(col_idx)
688
            if not isinstance(col_idx, str) and isinstance(col_idx, Sized):
689
                if isinstance(value, (Sequence, np.ndarray)):
690
                    values = value
691
                elif isinstance(value, Iterable):
692
                    values = list(value)
693
                else:
694
                    raise TypeError("Setting multiple values requires a "
695
                                    "sequence or numpy array")
696
                if len(values) != len(col_idx):
697
                    raise ValueError("Invalid number of values")
698
            else:
699
                col_idx, values = [col_idx], [value]
700
            for value, col_idx in zip(values, col_idx):
701
                if not isinstance(value, Integral):
702
                    value = self.domain[col_idx].to_val(value)
703
                if not isinstance(col_idx, Integral):
704
                    col_idx = self.domain.index(col_idx)
705
                if col_idx >= 0:
706
                    if col_idx < self.X.shape[1]:
707
                        self.X[row_idx, col_idx] = value
708
                    else:
709
                        self._Y[row_idx, col_idx - self.X.shape[1]] = value
710
                else:
711
                    self.metas[row_idx, -1 - col_idx] = value
712
713
        # multiple rows, multiple columns
714
        attributes, col_indices = self.domain._compute_col_indices(col_idx)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _compute_col_indices was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
715
        if col_indices is ...:
716
            col_indices = range(len(self.domain))
717
        n_attrs = self.X.shape[1]
718
        if isinstance(value, str):
719
            if not attributes:
720
                attributes = self.domain.attributes
721
            for var, col in zip(attributes, col_indices):
722
                if 0 <= col < n_attrs:
723
                    self.X[row_idx, col] = var.to_val(value)
724
                elif col >= n_attrs:
725
                    self._Y[row_idx, col - n_attrs] = var.to_val(value)
726
                else:
727
                    self.metas[row_idx, -1 - col] = var.to_val(value)
728
        else:
729
            attr_cols = np.fromiter(
730
                (col for col in col_indices if 0 <= col < n_attrs), int)
731
            class_cols = np.fromiter(
732
                (col - n_attrs for col in col_indices if col >= n_attrs), int)
733
            meta_cols = np.fromiter(
734
                (-1 - col for col in col_indices if col < 0), int)
735
            if value is None:
736
                value = Unknown
737
738
            if not isinstance(value, Real) and \
739
                    (len(attr_cols) or len(class_cols)):
740
                raise TypeError(
741
                    "Ordinary attributes can only have primitive values")
742
            if len(attr_cols):
743
                if len(attr_cols) == 1:
744
                    # scipy.sparse matrices only allow primitive indices.
745
                    attr_cols = attr_cols[0]
746
                self.X[row_idx, attr_cols] = value
747
            if len(class_cols):
748
                if len(class_cols) == 1:
749
                    # scipy.sparse matrices only allow primitive indices.
750
                    class_cols = class_cols[0]
751
                self._Y[row_idx, class_cols] = value
752
            if len(meta_cols):
753
                self.metas[row_idx, meta_cols] = value
754
755
    def __delitem__(self, key):
756
        if not self._check_all_dense():
757
            raise ValueError("Rows of sparse data cannot be deleted")
758
        if key is ...:
759
            key = range(len(self))
760
        self.X = np.delete(self.X, key, axis=0)
0 ignored issues
show
The attribute X was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
761
        self.Y = np.delete(self._Y, key, axis=0)
762
        self.metas = np.delete(self.metas, key, axis=0)
0 ignored issues
show
The attribute metas was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
763
        self.W = np.delete(self.W, key, axis=0)
0 ignored issues
show
The attribute W was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
764
765
    def __len__(self):
766
        return self.X.shape[0]
767
768
    def __str__(self):
769
        return "[" + ",\n ".join(str(ex) for ex in self)
770
771
    def __repr__(self):
772
        s = "[" + ",\n ".join(repr(ex) for ex in self[:5])
773
        if len(self) > 5:
774
            s += ",\n ..."
775
        s += "\n]"
776
        return s
777
778
    def clear(self):
779
        """Remove all rows from the table."""
780
        if not self._check_all_dense():
781
            raise ValueError("Tables with sparse data cannot be cleared")
782
        del self[...]
783
784
    def append(self, instance):
785
        """
786
        Append a data instance to the table.
787
788
        :param instance: a data instance
789
        :type instance: Orange.data.Instance or a sequence of values
790
        """
791
        self.insert(len(self), instance)
792
793
    def insert(self, row, instance):
794
        """
795
        Insert a data instance into the table.
796
797
        :param row: row index
798
        :type row: int
799
        :param instance: a data instance
800
        :type instance: Orange.data.Instance or a sequence of values
801
        """
802
        if row < 0:
803
            row += len(self)
804
        if row < 0 or row > len(self):
805
            raise IndexError("Index out of range")
806
        self._resize_all(len(self) + 1)
807
        if row < len(self):
808
            self.X[row + 1:] = self.X[row:-1]
809
            self._Y[row + 1:] = self._Y[row:-1]
810
            self.metas[row + 1:] = self.metas[row:-1]
811
            self.W[row + 1:] = self.W[row:-1]
812
            self.ids[row + 1:] = self.ids[row:-1]
813
        try:
814
            self._set_row(instance, row)
815
            if self.W.shape[-1]:
816
                self.W[row] = 1
817
        except Exception:
818
            self.X[row:-1] = self.X[row + 1:]
819
            self._Y[row:-1] = self._Y[row + 1:]
820
            self.metas[row:-1] = self.metas[row + 1:]
821
            self.W[row:-1] = self.W[row + 1:]
822
            self.ids[row:-1] = self.ids[row + 1:]
823
            self._resize_all(len(self) - 1)
824
            raise
825
826
    def extend(self, instances):
827
        """
828
        Extend the table with the given instances. The instances can be given
829
        as a table of the same or a different domain, or a sequence. In the
830
        latter case, each instances can be given as
831
        :obj:`~Orange.data.Instance` or a sequence of values (e.g. list,
832
        tuple, numpy.array).
833
834
        :param instances: additional instances
835
        :type instances: Orange.data.Table or a sequence of instances
836
        """
837
        old_length = len(self)
838
        self._resize_all(old_length + len(instances))
839
        try:
840
            # shortcut
841
            if isinstance(instances, Table) and instances.domain == self.domain:
842
                self.X[old_length:] = instances.X
843
                self._Y[old_length:] = instances._Y
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _Y was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
844
                self.metas[old_length:] = instances.metas
845
                if self.W.shape[-1]:
846
                    if instances.W.shape[-1]:
847
                        self.W[old_length:] = instances.W
848
                    else:
849
                        self.W[old_length:] = 1
850
                self.ids[old_length:] = instances.ids
851
            else:
852
                for i, example in enumerate(instances):
853
                    self[old_length + i] = example
854
                    try:
855
                        self.ids[old_length + i] = example.id
856
                    except AttributeError:
857
                        self.ids[old_length + i] = self.new_id()
858
        except Exception:
859
            self._resize_all(old_length)
860
            raise
861
862
    @staticmethod
863
    def concatenate(tables, axis=1):
864
        """Return concatenation of `tables` by `axis`."""
865
        if not tables:
866
            raise ValueError('need at least one table to concatenate')
867
        if 1 == len(tables):
868
            return tables[0].copy()
869
        CONCAT_ROWS, CONCAT_COLS = 0, 1
870
        if axis == CONCAT_ROWS:
871
            table = tables[0].copy()
872
            for t in tables[1:]:
873
                table.extend(t)
874
            return table
875
        elif axis == CONCAT_COLS:
876
            from operator import iand, attrgetter
877
            from functools import reduce
0 ignored issues
show
Comprehensibility Bug introduced by
reduce is re-defining a name which is already available in the outer-scope (previously defined on line 7).

It is generally a bad practice to shadow variables from the outer-scope. In most cases, this is done unintentionally and might lead to unexpected behavior:

param = 5

class Foo:
    def __init__(self, param):   # "param" would be flagged here
        self.param = param
Loading history...
The import reduce was already done on line 7. You should be able to
remove this line.
Loading history...
878
            if reduce(iand,
879
                      (set(map(attrgetter('name'),
880
                               chain(t.domain.variables, t.domain.metas)))
881
                       for t in tables)):
882
                raise ValueError('Concatenating two domains with variables '
883
                                 'with same name is undefined')
884
            domain = Domain(flatten(t.domain.attributes for t in tables),
885
                            flatten(t.domain.class_vars for t in tables),
886
                            flatten(t.domain.metas for t in tables))
887
888
            def ndmin(A):
889
                return A if A.ndim > 1 else A.reshape(A.shape[0], 1)
890
891
            table = Table.from_numpy(domain,
892
                                     np.hstack(tuple(ndmin(t.X) for t in tables)),
893
                                     np.hstack(tuple(ndmin(t.Y) for t in tables)),
894
                                     np.hstack(tuple(ndmin(t.metas) for t in tables)),
895
                                     np.hstack(tuple(ndmin(t.W) for t in tables)))
896
            return table
897
        raise ValueError('axis {} out of bounds [0, 2)'.format(axis))
898
899
    def is_view(self):
900
        """
901
        Return `True` if all arrays represent a view referring to another table
902
        """
903
        return ((not self.X.shape[-1] or self.X.base is not None) and
904
                (not self._Y.shape[-1] or self._Y.base is not None) and
905
                (not self.metas.shape[-1] or self.metas.base is not None) and
906
                (not self._weights.shape[-1] or self.W.base is not None))
907
908
    def is_copy(self):
909
        """
910
        Return `True` if the table owns its data
911
        """
912
        return ((not self.X.shape[-1] or self.X.base is None) and
913
                (self._Y.base is None) and
914
                (self.metas.base is None) and
915
                (self.W.base is None))
916
917
    def ensure_copy(self):
918
        """
919
        Ensure that the table owns its data; copy arrays when necessary
920
        """
921
        if self.X.base is not None:
922
            self.X = self.X.copy()
0 ignored issues
show
The attribute X was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
923
        if self._Y.base is not None:
924
            self._Y = self._Y.copy()
0 ignored issues
show
The attribute _Y was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
925
        if self.metas.base is not None:
926
            self.metas = self.metas.copy()
0 ignored issues
show
The attribute metas was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
927
        if self.W.base is not None:
928
            self.W = self.W.copy()
0 ignored issues
show
The attribute W was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
929
930
    def copy(self):
931
        """
932
        Return a copy of the table
933
        """
934
        t = Table(self)
935
        t.ensure_copy()
936
        return t
937
938
    @staticmethod
939
    def __determine_density(data):
940
        if data is None:
941
            return Storage.Missing
942
        if data is not None and sp.issparse(data):
943
            try:
944
                if bn.bincount(data.data, 1)[0][0] == 0:
945
                    return Storage.SPARSE_BOOL
946
            except ValueError as e:
0 ignored issues
show
This except handler seems to be unused and could be removed.

Except handlers which only contain pass and do not have an else clause can usually simply be removed:

try:
    raises_exception()
except:  # Could be removed
    pass
Loading history...
The variable e seems to be unused.
Loading history...
947
                pass
948
            return Storage.SPARSE
949
        else:
950
            return Storage.DENSE
951
952
    def X_density(self):
953
        if not hasattr(self, "_X_density"):
954
            self._X_density = Table.__determine_density(self.X)
0 ignored issues
show
The attribute _X_density was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
955
        return self._X_density
956
957
    def Y_density(self):
958
        if not hasattr(self, "_Y_density"):
959
            self._Y_density = Table.__determine_density(self._Y)
0 ignored issues
show
The attribute _Y_density was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
960
        return self._Y_density
961
962
    def metas_density(self):
963
        if not hasattr(self, "_metas_density"):
964
            self._metas_density = Table.__determine_density(self.metas)
0 ignored issues
show
The attribute _metas_density was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
965
        return self._metas_density
966
967
    def set_weights(self, weight=1):
968
        """
969
        Set weights of data instances; create a vector of weights if necessary.
970
        """
971
        if not self.W.shape[-1]:
972
            self.W = np.empty(len(self))
0 ignored issues
show
The attribute W was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
973
        self.W[:] = weight
974
975
    def has_weights(self):
976
        """Return `True` if the data instances are weighed. """
977
        return self.W.shape[-1] != 0
978
979
    def total_weight(self):
980
        """
981
        Return the total weight of instances in the table, or their number if
982
        they are unweighted.
983
        """
984
        if self.W.shape[-1]:
985
            return sum(self.W)
986
        return len(self)
987
988
    def has_missing(self):
989
        """Return `True` if there are any missing attribute or class values."""
990
        return bn.anynan(self.X) or bn.anynan(self._Y)
991
992
    def has_missing_class(self):
993
        """Return `True` if there are any missing class values."""
994
        return bn.anynan(self._Y)
995
996
    def checksum(self, include_metas=True):
997
        # TODO: zlib.adler32 does not work for numpy arrays with dtype object
0 ignored issues
show
TODO and FIXME comments should generally be avoided.
Loading history...
998
        # (after pickling and unpickling such arrays, checksum changes)
999
        # Why, and should we fix it or remove it?
1000
        """Return a checksum over X, Y, metas and W."""
1001
        cs = zlib.adler32(np.ascontiguousarray(self.X))
1002
        cs = zlib.adler32(np.ascontiguousarray(self._Y), cs)
1003
        if include_metas:
1004
            cs = zlib.adler32(np.ascontiguousarray(self.metas), cs)
1005
        cs = zlib.adler32(np.ascontiguousarray(self.W), cs)
1006
        return cs
1007
1008
    def shuffle(self):
1009
        """Randomly shuffle the rows of the table."""
1010
        if not self._check_all_dense():
1011
            raise ValueError("Rows of sparse data cannot be shuffled")
1012
        ind = np.arange(self.X.shape[0])
1013
        np.random.shuffle(ind)
1014
        self.X = self.X[ind]
0 ignored issues
show
The attribute X was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
1015
        self._Y = self._Y[ind]
0 ignored issues
show
The attribute _Y was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
1016
        self.metas = self.metas[ind]
0 ignored issues
show
The attribute metas was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
1017
        self.W = self.W[ind]
0 ignored issues
show
The attribute W was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
1018
1019
    def get_column_view(self, index):
1020
        """
1021
        Return a vector - as a view, not a copy - with a column of the table,
1022
        and a bool flag telling whether this column is sparse. Note that
1023
        vertical slicing of sparse matrices is inefficient.
1024
1025
        :param index: the index of the column
1026
        :type index: int, str or Orange.data.Variable
1027
        :return: (one-dimensional numpy array, sparse)
1028
        """
1029
1030
        def rx(M):
1031
            if sp.issparse(M):
1032
                return np.asarray(M.todense())[:, 0], True
1033
            else:
1034
                return M, False
1035
1036
        if not isinstance(index, Integral):
1037
            index = self.domain.index(index)
1038
        if index >= 0:
1039
            if index < self.X.shape[1]:
1040
                return rx(self.X[:, index])
1041
            else:
1042
                return rx(self._Y[:, index - self.X.shape[1]])
1043
        else:
1044
            return rx(self.metas[:, -1 - index])
1045
1046
    def _filter_is_defined(self, columns=None, negate=False):
1047
        if columns is None:
1048
            if sp.issparse(self.X):
1049
                remove = (self.X.indptr[1:] !=
1050
                          self.X.indptr[-1:] + self.X.shape[1])
1051
            else:
1052
                remove = bn.anynan(self.X, axis=1)
1053
            if sp.issparse(self._Y):
1054
                remove = np.logical_or(remove, self._Y.indptr[1:] !=
1055
                                       self._Y.indptr[-1:] + self._Y.shape[1])
1056
            else:
1057
                remove = np.logical_or(remove, bn.anynan(self._Y, axis=1))
1058
        else:
1059
            remove = np.zeros(len(self), dtype=bool)
1060
            for column in columns:
1061
                col, sparse = self.get_column_view(column)
1062
                if sparse:
1063
                    remove = np.logical_or(remove, col == 0)
1064
                else:
1065
                    remove = np.logical_or(remove, bn.anynan([col], axis=0))
1066
        retain = remove if negate else np.logical_not(remove)
1067
        return Table.from_table_rows(self, retain)
1068
1069
    def _filter_has_class(self, negate=False):
1070
        if sp.issparse(self._Y):
1071
            if negate:
1072
                retain = (self._Y.indptr[1:] !=
1073
                          self._Y.indptr[-1:] + self._Y.shape[1])
1074
            else:
1075
                retain = (self._Y.indptr[1:] ==
1076
                          self._Y.indptr[-1:] + self._Y.shape[1])
1077
        else:
1078
            retain = bn.anynan(self._Y, axis=1)
1079
            if not negate:
1080
                retain = np.logical_not(retain)
1081
        return Table.from_table_rows(self, retain)
1082
1083
    def _filter_same_value(self, column, value, negate=False):
1084
        if not isinstance(value, Real):
1085
            value = self.domain[column].to_val(value)
1086
        sel = self.get_column_view(column)[0] == value
1087
        if negate:
1088
            sel = np.logical_not(sel)
1089
        return Table.from_table_rows(self, sel)
1090
1091
    def _filter_values(self, filter):
0 ignored issues
show
Bug Best Practice introduced by
This seems to re-define the built-in filter.

It is generally discouraged to redefine built-ins as this makes code very hard to read.

Loading history...
1092
        from Orange.data import filter as data_filter
1093
1094
        if isinstance(filter, data_filter.Values):
1095
            conditions = filter.conditions
1096
            conjunction = filter.conjunction
1097
        else:
1098
            conditions = [filter]
1099
            conjunction = True
1100
        if conjunction:
1101
            sel = np.ones(len(self), dtype=bool)
1102
        else:
1103
            sel = np.zeros(len(self), dtype=bool)
1104
1105
        for f in conditions:
1106
            col = self.get_column_view(f.column)[0]
1107
            if isinstance(f, data_filter.FilterDiscrete) and f.values is None \
1108
                    or isinstance(f, data_filter.FilterContinuous) and \
1109
                                    f.oper == f.IsDefined:
1110
                if conjunction:
1111
                    sel *= ~np.isnan(col)
1112
                else:
1113
                    sel += ~np.isnan(col)
1114
            elif isinstance(f, data_filter.FilterString) and \
1115
                            f.oper == f.IsDefined:
1116
                if conjunction:
1117
                    sel *= (col != "")
1118
                else:
1119
                    sel += (col != "")
1120
            elif isinstance(f, data_filter.FilterDiscrete):
1121
                if conjunction:
1122
                    s2 = np.zeros(len(self), dtype=bool)
1123
                    for val in f.values:
1124
                        if not isinstance(val, Real):
1125
                            val = self.domain[f.column].to_val(val)
1126
                        s2 += (col == val)
1127
                    sel *= s2
1128
                else:
1129
                    for val in f.values:
1130
                        if not isinstance(val, Real):
1131
                            val = self.domain[f.column].to_val(val)
1132
                        sel += (col == val)
1133
            elif isinstance(f, data_filter.FilterStringList):
1134
                if not f.case_sensitive:
1135
                    # noinspection PyTypeChecker
1136
                    col = np.char.lower(np.array(col, dtype=str))
1137
                    vals = [val.lower() for val in f.values]
1138
                else:
1139
                    vals = f.values
1140
                if conjunction:
1141
                    sel *= reduce(operator.add,
1142
                                  (col == val for val in vals))
1143
                else:
1144
                    sel = reduce(operator.add,
1145
                                 (col == val for val in vals), sel)
1146
            elif isinstance(f, data_filter.FilterRegex):
1147
                sel = np.vectorize(f)(col)
1148
            elif isinstance(f, (data_filter.FilterContinuous,
1149
                                data_filter.FilterString)):
1150
                if (isinstance(f, data_filter.FilterString) and
1151
                        not f.case_sensitive):
1152
                    # noinspection PyTypeChecker
1153
                    col = np.char.lower(np.array(col, dtype=str))
1154
                    fmin = f.min.lower()
1155
                    if f.oper in [f.Between, f.Outside]:
1156
                        fmax = f.max.lower()
1157
                else:
1158
                    fmin, fmax = f.min, f.max
1159
                if f.oper == f.Equal:
1160
                    col = (col == fmin)
1161
                elif f.oper == f.NotEqual:
1162
                    col = (col != fmin)
1163
                elif f.oper == f.Less:
1164
                    col = (col < fmin)
1165
                elif f.oper == f.LessEqual:
1166
                    col = (col <= fmin)
1167
                elif f.oper == f.Greater:
1168
                    col = (col > fmin)
1169
                elif f.oper == f.GreaterEqual:
1170
                    col = (col >= fmin)
1171
                elif f.oper == f.Between:
1172
                    col = (col >= fmin) * (col <= fmax)
1173
                elif f.oper == f.Outside:
1174
                    col = (col < fmin) + (col > fmax)
1175
                elif not isinstance(f, data_filter.FilterString):
1176
                    raise TypeError("Invalid operator")
1177
                elif f.oper == f.Contains:
1178
                    col = np.fromiter((fmin in e for e in col),
1179
                                      dtype=bool)
1180
                elif f.oper == f.StartsWith:
1181
                    col = np.fromiter((e.startswith(fmin) for e in col),
1182
                                      dtype=bool)
1183
                elif f.oper == f.EndsWith:
1184
                    col = np.fromiter((e.endswith(fmin) for e in col),
1185
                                      dtype=bool)
1186
                else:
1187
                    raise TypeError("Invalid operator")
1188
                if conjunction:
1189
                    sel *= col
1190
                else:
1191
                    sel += col
1192
            else:
1193
                raise TypeError("Invalid filter")
1194
1195
        if filter.negate:
1196
            sel = ~sel
1197
        return Table.from_table_rows(self, sel)
1198
1199
    def _compute_basic_stats(self, columns=None,
0 ignored issues
show
Arguments number differs from overridden '_compute_basic_stats' method
Loading history...
1200
                             include_metas=False, compute_variance=False):
1201
        if compute_variance:
1202
            raise NotImplementedError("computation of variance is "
1203
                                      "not implemented yet")
1204
        W = self.W if self.has_weights() else None
1205
        rr = []
1206
        stats = []
1207
        if not columns:
1208
            if self.domain.attributes:
1209
                rr.append(bn.stats(self.X, W))
1210
            if self.domain.class_vars:
1211
                rr.append(bn.stats(self._Y, W))
1212
            if include_metas and self.domain.metas:
1213
                rr.append(bn.stats(self.metas, W))
1214
            if len(rr):
1215
                stats = np.vstack(tuple(rr))
1216
        else:
1217
            columns = [self.domain.index(c) for c in columns]
1218
            nattrs = len(self.domain.attributes)
1219
            Xs = any(0 <= c < nattrs for c in columns) and bn.stats(self.X, W)
1220
            Ys = any(c >= nattrs for c in columns) and bn.stats(self._Y, W)
1221
            ms = any(c < 0 for c in columns) and bn.stats(self.metas, W)
1222
            for column in columns:
1223
                if 0 <= column < nattrs:
1224
                    stats.append(Xs[column, :])
1225
                elif column >= nattrs:
1226
                    stats.append(Ys[column - nattrs, :])
1227
                else:
1228
                    stats.append(ms[-1 - column])
1229
        return stats
1230
1231
    def _compute_distributions(self, columns=None):
1232
        def _get_matrix(M, cachedM, col):
1233
            nonlocal single_column
1234
            if not sp.issparse(M):
1235
                return M[:, col], self.W if self.has_weights() else None, None
1236
            if cachedM is None:
1237
                if single_column:
1238
                    warn(ResourceWarning,
1239
                         "computing distributions on sparse data "
1240
                         "for a single column is inefficient")
1241
                cachedM = sp.csc_matrix(self.X)
1242
            data = cachedM.data[cachedM.indptr[col]:cachedM.indptr[col + 1]]
1243
            if self.has_weights():
1244
                weights = self.W[
1245
                    cachedM.indices[cachedM.indptr[col]:cachedM.indptr[col + 1]]]
1246
            else:
1247
                weights = None
1248
            return data, weights, cachedM
1249
1250
        if columns is None:
1251
            columns = range(len(self.domain.variables))
1252
            single_column = False
1253
        else:
1254
            columns = [self.domain.index(var) for var in columns]
1255
            single_column = len(columns) == 1 and len(self.domain) > 1
1256
        distributions = []
1257
        Xcsc = Ycsc = None
1258
        for col in columns:
1259
            var = self.domain[col]
1260
            if col < self.X.shape[1]:
1261
                m, W, Xcsc = _get_matrix(self.X, Xcsc, col)
1262
            else:
1263
                m, W, Ycsc = _get_matrix(self._Y, Ycsc, col - self.X.shape[1])
1264
            if var.is_discrete:
1265
                if W is not None:
1266
                    W = W.ravel()
1267
                dist, unknowns = bn.bincount(m, len(var.values) - 1, W)
1268
            elif not len(m):
1269
                dist, unknowns = np.zeros((2, 0)), 0
1270
            else:
1271
                if W is not None:
1272
                    ranks = np.argsort(m)
1273
                    vals = np.vstack((m[ranks], W[ranks].flatten()))
1274
                    unknowns = bn.countnans(m, W)
1275
                else:
1276
                    vals = np.ones((2, m.shape[0]))
1277
                    vals[0, :] = m
1278
                    vals[0, :].sort()
1279
                    unknowns = bn.countnans(m)
1280
                dist = np.array(_valuecount.valuecount(vals))
1281
            distributions.append((dist, unknowns))
1282
1283
        return distributions
1284
1285
    def _compute_contingency(self, col_vars=None, row_var=None):
1286
        n_atts = self.X.shape[1]
1287
1288
        if col_vars is None:
1289
            col_vars = range(len(self.domain.variables))
1290
            single_column = False
0 ignored issues
show
The variable single_column seems to be unused.
Loading history...
1291
        else:
1292
            col_vars = [self.domain.index(var) for var in col_vars]
1293
            single_column = len(col_vars) == 1 and len(self.domain) > 1
1294
        if row_var is None:
1295
            row_var = self.domain.class_var
1296
            if row_var is None:
1297
                raise ValueError("No row variable")
1298
1299
        row_desc = self.domain[row_var]
1300
        if not row_desc.is_discrete:
1301
            raise TypeError("Row variable must be discrete")
1302
        row_indi = self.domain.index(row_var)
1303
        n_rows = len(row_desc.values)
1304
        if 0 <= row_indi < n_atts:
1305
            row_data = self.X[:, row_indi]
1306
        elif row_indi < 0:
1307
            row_data = self.metas[:, -1 - row_indi]
1308
        else:
1309
            row_data = self._Y[:, row_indi - n_atts]
1310
1311
        W = self.W if self.has_weights() else None
1312
        nan_inds = None
1313
1314
        col_desc = [self.domain[var] for var in col_vars]
1315
        col_indi = [self.domain.index(var) for var in col_vars]
1316
1317
        if any(not (var.is_discrete or var.is_continuous)
1318
               for var in col_desc):
1319
            raise ValueError("contingency can be computed only for discrete "
1320
                             "and continuous values")
1321
1322
        if row_data.dtype.kind != "f": #meta attributes can be stored as type object
1323
            row_data = row_data.astype(float)
1324
1325
        unknown_rows = bn.countnans(row_data)
1326
        if unknown_rows:
1327
            nan_inds = np.isnan(row_data)
1328
            row_data = row_data[~nan_inds]
1329
            if W:
1330
                W = W[~nan_inds]
1331
                unknown_rows = np.sum(W[nan_inds])
1332
1333
        contingencies = [None] * len(col_desc)
1334
        for arr, f_cond, f_ind in (
1335
                (self.X, lambda i: 0 <= i < n_atts, lambda i: i),
1336
                (self._Y, lambda i: i >= n_atts, lambda i: i - n_atts),
1337
                (self.metas, lambda i: i < 0, lambda i: -1 - i)):
1338
1339
            if nan_inds is not None:
1340
                arr = arr[~nan_inds]
1341
1342
            arr_indi = [e for e, ind in enumerate(col_indi) if f_cond(ind)]
1343
1344
            vars = [(e, f_ind(col_indi[e]), col_desc[e]) for e in arr_indi]
0 ignored issues
show
Bug Best Practice introduced by
This seems to re-define the built-in vars.

It is generally discouraged to redefine built-ins as this makes code very hard to read.

Loading history...
1345
            disc_vars = [v for v in vars if v[2].is_discrete]
1346
            if disc_vars:
1347
                if sp.issparse(arr):
1348
                    max_vals = max(len(v[2].values) for v in disc_vars)
1349
                    disc_indi = {i for _, i, _ in disc_vars}
1350
                    mask = [i in disc_indi for i in range(arr.shape[1])]
1351
                    conts, nans = bn.contingency(arr, row_data, max_vals - 1,
1352
                                                 n_rows - 1, W, mask)
1353
                    for col_i, arr_i, _ in disc_vars:
1354
                        contingencies[col_i] = (conts[arr_i], nans[arr_i])
1355
                else:
1356
                    for col_i, arr_i, var in disc_vars:
1357
                        contingencies[col_i] = bn.contingency(arr[:, arr_i],
1358
                                                              row_data, len(var.values) - 1, n_rows - 1, W)
1359
1360
            cont_vars = [v for v in vars if v[2].is_continuous]
1361
            if cont_vars:
1362
1363
                classes = row_data.astype(dtype=np.int8)
1364
                if W is not None:
1365
                    W = W.astype(dtype=np.float64)
1366
                if sp.issparse(arr):
1367
                    arr = sp.csc_matrix(arr)
1368
1369
                for col_i, arr_i, _ in cont_vars:
1370
                    if sp.issparse(arr):
1371
                        col_data = arr.data[arr.indptr[arr_i]:
1372
                        arr.indptr[arr_i + 1]]
1373
                        rows = arr.indices[arr.indptr[arr_i]:
1374
                        arr.indptr[arr_i + 1]]
1375
                        W_ = None if W is None else W[rows]
1376
                        classes_ = classes[rows]
1377
                    else:
1378
                        col_data, W_, classes_ = arr[:, arr_i], W, classes
1379
1380
                    col_data = col_data.astype(dtype=np.float64)
1381
                    U, C, unknown = _contingency.contingency_floatarray(
1382
                        col_data, classes_, n_rows, W_)
1383
                    contingencies[col_i] = ([U, C], unknown)
1384
1385
        return contingencies, unknown_rows
1386
1387
1388
def _check_arrays(*arrays, dtype=None):
1389
    checked = []
1390
    if not len(arrays):
1391
        return checked
1392
1393
    def ninstances(array):
1394
        if hasattr(array, "shape"):
1395
            return array.shape[0]
1396
        else:
1397
            return len(array) if array is not None else 0
1398
1399
    shape_1 = ninstances(arrays[0])
1400
1401
    for array in arrays:
1402
        if array is None:
1403
            checked.append(array)
1404
            continue
1405
1406
        if ninstances(array) != shape_1:
1407
            raise ValueError("Leading dimension mismatch (%d != %d)"
1408
                             % (len(array), shape_1))
1409
1410
        if sp.issparse(array):
1411
            array.data = np.asarray(array.data)
1412
            has_inf = _check_inf(array.data)
1413
        else:
1414
            if dtype is not None:
1415
                array = np.asarray(array, dtype=dtype)
1416
            else:
1417
                array = np.asarray(array)
1418
            has_inf = _check_inf(array)
1419
1420
        if has_inf:
1421
            raise ValueError("Array contains infinity.")
1422
        checked.append(array)
1423
1424
    return checked
1425
1426
1427
def _check_inf(array):
1428
    return array.dtype.char in np.typecodes['AllFloat'] and \
1429
           np.isinf(array.data).any()
1430
1431
1432
def _subarray(arr, rows, cols):
1433
    return arr[_rxc_ix(rows, cols)]
1434
1435
1436
def _rxc_ix(rows, cols):
1437
    """
1438
    Construct an index object to index the `rows` x `cols` cross product.
1439
1440
    Rows and columns can be a 1d bool or int sequence, a slice or an
1441
    Ellipsis (`...`). The later is a convenience and is interpreted the same
1442
    as `slice(None, None, -1)`
1443
1444
    Parameters
1445
    ----------
1446
    rows : 1D sequence, slice or Ellipsis
1447
        Row indices.
1448
    cols : 1D sequence, slice or Ellipsis
1449
        Column indices.
1450
1451
    See Also
1452
    --------
1453
    numpy.ix_
1454
1455
    Examples
1456
    --------
1457
    >>> a = np.arange(10).reshape(2, 5)
1458
    >>> a[_rxc_ix([0, 1], [3, 4])]
1459
    array([[3, 4],
1460
           [8, 9]])
1461
    >>> a[_rxc_ix([False, True], ...)]
1462
    array([[5, 6, 7, 8, 9]])
1463
1464
    """
1465
    rows = slice(None, None, 1) if rows is ... else rows
1466
    cols = slice(None, None, 1) if cols is ... else cols
1467
1468
    isslice = (isinstance(rows, slice), isinstance(cols, slice))
1469
    if isslice == (True, True):
1470
        return rows, cols
1471
    elif isslice == (True, False):
1472
        return rows, np.asarray(np.ix_(cols), int).ravel()
1473
    elif isslice == (False, True):
1474
        return np.asarray(np.ix_(rows), int).ravel(), cols
1475
    else:
1476
        r, c = np.ix_(rows, cols)
1477
        return np.asarray(r, int), np.asarray(c, int)
1478