GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.

Issues (4082)

Orange/data/domain.py (20 issues)

1
from math import log
2
from collections import Iterable
3
from itertools import chain
4
from numbers import Integral
5
6
import weakref
7
from .variable import *
0 ignored issues
show
The usage of wildcard imports like variable should generally be avoided.
Loading history...
floor was imported with wildcard, but is not used.
Loading history...
VariableMeta was imported with wildcard, but is not used.
Loading history...
Value was imported with wildcard, but is not used.
Loading history...
make_variable was imported with wildcard, but is not used.
Loading history...
Real was imported with wildcard, but is not used.
Loading history...
isnan was imported with wildcard, but is not used.
Loading history...
ValueUnknown was imported with wildcard, but is not used.
Loading history...
PickleError was imported with wildcard, but is not used.
Loading history...
collections was imported with wildcard, but is not used.
Loading history...
8
import numpy as np
0 ignored issues
show
The import numpy could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
9
10
11
class DomainConversion:
12
    """
13
    Indices and functions for conversion between domains.
14
15
    Every list contains indices (instances of int) of variables in the
16
    source domain, or the variable's compute_value function if the source
17
    domain does not contain the variable.
18
19
    .. attribute:: source
20
21
        The source domain. The destination is not stored since destination
22
        domain is the one which contains the instance of DomainConversion.
23
24
    .. attribute:: attributes
25
26
        Indices for attribute values.
27
28
    .. attribute:: class_vars
29
30
        Indices for class variables
31
32
    .. attribute:: variables
33
34
        Indices for attributes and class variables
35
        (:obj:`attributes`+:obj:`class_vars`).
36
37
    .. attribute:: metas
38
39
        Indices for meta attributes
40
    """
41
42
    def __init__(self, source, destination):
43
        """
44
        Compute the conversion indices from the given `source` to `destination`
45
        """
46
        self.source = source
47
48
        self.attributes = [
49
            source.index(var) if var in source
50
            else var.compute_value for var in destination.attributes]
51
        self.class_vars = [
52
            source.index(var) if var in source
53
            else var.compute_value for var in destination.class_vars]
54
        self.variables = self.attributes + self.class_vars
55
        self.metas = [
56
            source.index(var) if var in source
57
            else var.compute_value for var in destination.metas]
58
59
60
class Domain:
61
    def __init__(self, attributes, class_vars=None, metas=None, source=None):
62
        """
63
        Initialize a new domain descriptor. Arguments give the features and
64
        the class attribute(s). They can be described by descriptors (instances
65
        of :class:`Variable`), or by indices or names if the source domain is
66
        given.
67
68
        :param attributes: a list of attributes
69
        :type attributes: list of :class:`Variable`
70
        :param class_vars: target variable or a list of target variables
71
        :type class_vars: :class:`Variable` or list of :class:`Variable`
72
        :param metas: a list of meta attributes
73
        :type metas: list of :class:`Variable`
74
        :param source: the source domain for attributes
75
        :type source: Orange.data.Domain
76
        :return: a new domain
77
        :rtype: :class:`Domain`
78
        """
79
80
        if class_vars is None:
81
            class_vars = []
82
        elif isinstance(class_vars, (Variable, Integral, str)):
83
            class_vars = [class_vars]
84
        elif isinstance(class_vars, Iterable):
85
            class_vars = list(class_vars)
86
87
        if not isinstance(attributes, list):
88
            attributes = list(attributes)
89
        metas = list(metas) if metas else []
90
91
        # Replace str's and int's with descriptors if 'source' is given;
92
        # complain otherwise
93
        for lst in (attributes, class_vars, metas):
94
            for i, var in enumerate(lst):
95
                if not isinstance(var, Variable):
96
                    if source and isinstance(var, (str, int)):
97
                        lst[i] = source[var]
98
                    else:
99
                        raise TypeError(
100
                            "descriptors must be instances of Variable, "
101
                            "not '%s'" % type(var).__name__)
102
103
        # Store everything
104
        self.attributes = tuple(attributes)
105
        self.class_vars = tuple(class_vars)
106
        self._variables = self.attributes + self.class_vars
107
        self._metas = tuple(metas)
108
        self.class_var = \
109
            self.class_vars[0] if len(self.class_vars) == 1 else None
110
        if not all(var.is_primitive() for var in self._variables):
111
            raise TypeError("variables must be primitive")
112
113
        self._indices = dict(chain.from_iterable(
114
            ((var, idx), (var.name, idx), (idx, idx))
115
            for idx, var in enumerate(self._variables)))
116
        self._indices.update(chain.from_iterable(
117
            ((var, -1-idx), (var.name, -1-idx), (-1-idx, -1-idx))
118
            for idx, var in enumerate(self.metas)))
119
120
        self.anonymous = False
121
        self._known_domains = weakref.WeakKeyDictionary()
122
        self._last_conversion = None
123
124
    # noinspection PyPep8Naming
125
    @classmethod
126
    def from_numpy(cls, X, Y=None, metas=None):
127
        """
128
        Create a domain corresponding to the given numpy arrays. This method
129
        is usually invoked from :meth:`Orange.data.Table.from_numpy`.
130
131
        All attributes are assumed to be continuous and are named
132
        "Feature <n>". Target variables are discrete if all values are
133
        integers between 0 and 19; otherwise they are continuous. Discrete
134
        targets are named "Class <n>" and continuous are named "Target <n>".
135
        Domain is marked as :attr:`anonymous`, so data from any other domain of
136
        the same shape can be converted into this one and vice-versa.
137
138
        :param `numpy.ndarray` X: 2-dimensional array with data
139
        :param Y: 1- of 2- dimensional data for target
140
        :type Y: `numpy.ndarray` or None
141
        :param `numpy.ndarray` metas: meta attributes
142
        :type metas: `numpy.ndarray` or None
143
        :return: a new domain
144
        :rtype: :class:`Domain`
145
        """
146
        def get_places(max_index):
147
            return 0 if max_index == 1 else int(log(max_index, 10)) + 1
148
149
        def get_name(base, index, places):
150
            return base if not places \
151
                else "{} {:0{}}".format(base, index + 1, places)
152
153
        if X.ndim != 2:
154
            raise ValueError('X must be a 2-dimensional array')
155
        n_attrs = X.shape[1]
156
        places = get_places(n_attrs)
157
        attr_vars = [ContinuousVariable(name=get_name("Feature", a, places))
158
                     for a in range(n_attrs)]
159
        class_vars = []
160
        if Y is not None:
161
            if Y.ndim == 1:
162
                Y = Y.reshape(len(Y), 1)
163
            elif Y.ndim != 2:
164
                raise ValueError('Y has invalid shape')
165
            n_classes = Y.shape[1]
166
            places = get_places(n_classes)
167
            for i, class_ in enumerate(Y.T):
168
                mn, mx = np.min(class_), np.max(class_)
169
                if 0 <= mn <= mx <= 20:
170
                    values = np.unique(class_)
171
                    if all(int(x) == x and 0 <= x <= 19 for x in values):
172
                        mx = int(mx)
173
                        val_places = 1 + (mx >= 10)
174
                        values = ["v%*i" % (val_places, i + 1)
175
                                  for i in range(mx + 1)]
176
                        name = get_name("Class", i, places)
177
                        class_vars.append(DiscreteVariable(name, values))
178
                        continue
179
                class_vars.append(
180
                    ContinuousVariable(name=get_name("Target", i + 1, places)))
181
        if metas is not None:
182
            n_metas = metas.shape[1]
183
            places = get_places(n_metas)
184
            meta_vars = [StringVariable(get_name("Meta", m, places))
185
                         for m in range(n_metas)]
186
        else:
187
            meta_vars = []
188
189
        domain = cls(attr_vars, class_vars, meta_vars)
190
        domain.anonymous = True
191
        return domain
192
193
    @property
194
    def variables(self):
195
        return self._variables
196
197
    @property
198
    def metas(self):
199
        return self._metas
200
201
    def __len__(self):
202
        """The number of variables (features and class attributes)."""
203
        return len(self._variables)
204
205
    def __getitem__(self, idx):
206
        """
207
        Return a variable descriptor from the given argument, which can be
208
        a descriptor, index or name. If `var` is a descriptor, the function
209
        returns this same object.
210
211
        :param idx: index, name or descriptor
212
        :type idx: int, str or :class:`Variable`
213
        :return: an instance of :class:`Variable` described by `var`
214
        :rtype: :class:`Variable`
215
        """
216
        if isinstance(idx, slice):
217
            return self._variables[idx]
218
219
        idx = self._indices[idx]
220
        if idx >= 0:
221
            return self.variables[idx]
222
        else:
223
            return self.metas[-1-idx]
224
225
    def __contains__(self, item):
226
        """
227
        Return `True` if the item (`str`, `int`, :class:`Variable`) is
228
        in the domain.
229
        """
230
        return item in self._indices
231
232
    def __iter__(self):
233
        """
234
        Return an iterator through variables (features and class attributes).
235
        """
236
        return iter(self._variables)
237
238
    def __str__(self):
239
        """
240
        Return a list-like string with the domain's features, class attributes
241
        and meta attributes.
242
        """
243
        s = "[" + ", ".join(attr.name for attr in self.attributes)
244
        if self.class_vars:
245
            s += " | " + ", ".join(cls.name for cls in self.class_vars)
246
        s += "]"
247
        if self._metas:
248
            s += " {" + ", ".join(meta.name for meta in self._metas) + "}"
249
        return s
250
251
    __repr__ = __str__
252
253
    def __getstate__(self):
254
        state = self.__dict__.copy()
255
        state.pop("_known_domains", None)
256
        return state
257
258
    def __setstate__(self, state):
259
        self.__dict__.update(state)
260
        self._known_domains = weakref.WeakKeyDictionary()
261
262
    def index(self, var):
263
        """
264
        Return the index of the given variable or meta attribute, represented
265
        with an instance of :class:`Variable`, `int` or `str`.
266
        """
267
268
        try:
269
            return self._indices[var]
270
        except KeyError:
271
            raise ValueError("'%s' is not in domain" % var)
272
273
    def has_discrete_attributes(self, include_class=False):
274
        """
275
        Return `True` if domain has any discrete attributes. If `include_class`
276
                is set, the check includes the class attribute(s).
277
        """
278
        if not include_class:
279
            return any(var.is_discrete for var in self.attributes)
280
        else:
281
            return any(var.is_discrete for var in self.variables)
282
283
    def has_continuous_attributes(self, include_class=False):
284
        """
285
        Return `True` if domain has any continuous attributes. If
286
        `include_class` is set, the check includes the class attribute(s).
287
        """
288
        if not include_class:
289
            return any(var.is_continuous for var in self.attributes)
290
        else:
291
            return any(var.is_continuous for var in self.variables)
292
293
    @property
294
    def has_continuous_class(self):
295
        return self.class_var and self.class_var.is_continuous
296
297
    @property
298
    def has_discrete_class(self):
299
        return bool(self.class_var and self.class_var.is_discrete)
300
301
    def get_conversion(self, source):
302
        """
303
        Return an instance of :class:`DomainConversion` for conversion from the
304
        given source domain to this domain. Domain conversions are cached to
305
        speed-up the conversion in the common case in which the domain
306
        is based on another domain, for instance, when the domain contains
307
        discretized variables from another domain.
308
309
        :param source: the source domain
310
        :type source: Orange.data.Domain
311
        """
312
        # the method is thread-safe
313
        c = self._last_conversion
314
        if c and c.source is source:
315
            return c
316
        c = self._known_domains.get(source, None)
317
        if not c:
318
            c = DomainConversion(source, self)
319
            self._known_domains[source] = self._last_conversion = c
320
        return c
321
322
    # noinspection PyProtectedMember
323
    def convert(self, inst):
324
        """
325
        Convert a data instance from another domain to this domain.
326
327
        :param inst: The data instance to be converted
328
        :return: The data instance in this domain
329
        """
330
        from .instance import Instance
331
332
        if isinstance(inst, Instance):
333
            if inst.domain == self:
334
                return inst._x, inst._y, inst._metas
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _x was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
Coding Style Best Practice introduced by
It seems like _y was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
Coding Style Best Practice introduced by
It seems like _metas was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
335
            c = self.get_conversion(inst.domain)
336
            l = len(inst.domain.attributes)
337
            values = [(inst._x[i] if 0 <= i < l
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _x was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
338
                       else inst._y[i - l] if i >= l
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _y was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
339
                       else inst._metas[-i - 1])
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _metas was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
340
                      if isinstance(i, int)
341
                      else (Unknown if not i else i(inst))
342
                      for i in c.variables]
343
            metas = [(inst._x[i] if 0 <= i < l
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _x was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
344
                      else inst._y[i - l] if i >= l
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _y was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
345
                      else inst._metas[-i - 1])
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _metas was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
346
                     if isinstance(i, int)
347
                     else (Unknown if not i else i(inst))
348
                     for i in c.metas]
349
        else:
350
            nvars = len(self._variables)
351
            nmetas = len(self._metas)
352
            if len(inst) != nvars and len(inst) != nvars + nmetas:
353
                raise ValueError("invalid data length for domain")
354
            values = [var.to_val(val)
355
                      for var, val in zip(self._variables, inst)]
356
            if len(inst) == nvars + nmetas:
357
                metas = [var.to_val(val)
358
                         for var, val in zip(self._metas, inst[nvars:])]
359
            else:
360
                metas = [var.Unknown for var in self._metas]
361
        nattrs = len(self.attributes)
362
        # Let np.array decide dtype for values
363
        return np.array(values[:nattrs]), np.array(values[nattrs:]),\
364
               np.array(metas, dtype=object)
365
366
    def select_columns(self, col_idx):
367
        attributes, col_indices = self._compute_col_indices(col_idx)
368
        if attributes is not None:
369
            n_attrs = len(self.attributes)
370
            r_attrs = [attributes[i]
371
                       for i, col in enumerate(col_indices)
372
                       if 0 <= col < n_attrs]
373
            r_classes = [attributes[i]
374
                         for i, col in enumerate(col_indices)
375
                         if col >= n_attrs]
376
            r_metas = [attributes[i]
377
                       for i, col in enumerate(col_indices) if col < 0]
378
            return Domain(r_attrs, r_classes, r_metas)
379
        else:
380
            return self
381
382
    def _compute_col_indices(self, col_idx):
383
        if col_idx is ...:
384
            return None, None
385
        if isinstance(col_idx, np.ndarray) and col_idx.dtype == bool:
386
            return ([attr for attr, c in zip(self, col_idx) if c],
387
                    np.nonzero(col_idx))
388
        elif isinstance(col_idx, slice):
389
            s = len(self.variables)
390
            start, end, stride = col_idx.indices(s)
391
            if col_idx.indices(s) == (0, s, 1):
392
                return None, None
393
            else:
394
                return (self.variables[col_idx],
395
                        np.arange(start, end, stride))
396
        elif isinstance(col_idx, Iterable) and not isinstance(col_idx, str):
397
            attributes = [self[col] for col in col_idx]
398
            if attributes == self.attributes:
399
                return None, None
400
            return attributes, np.fromiter(
401
                (self.index(attr) for attr in attributes), int)
402
        elif isinstance(col_idx, Integral):
403
            attr = self[col_idx]
404
        else:
405
            attr = self[col_idx]
406
            col_idx = self.index(attr)
407
        return [attr], np.array([col_idx])
408
409
    def checksum(self):
410
        return hash(self)
411
412
    def __eq__(self, other):
413
        if not isinstance(other, Domain):
414
            return False
415
416
        return (self.attributes == other.attributes and
417
                self.class_vars == other.class_vars and
418
                self.metas == other.metas)
419
420
    def __hash__(self):
421
        return hash(self.attributes) ^ hash(self.class_vars) ^ hash(self.metas)
422