Issues in preprocess.py (master) - Issues in master - hugobuddel/orange3 - Measure and Improve Code Quality continuously with Scrutinizer

Issues (4082)

Orange/preprocess/preprocess.py (7 issues)

Labels

Severity

"""
Preprocess
----------

"""
import numpy as np
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import sklearn.preprocessing as skl_preprocessing
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import bottlechest
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

import Orange.data
from Orange.data import Table
from . import impute, discretize
from Orange.statistics import distribution

from ..misc.enum import Enum

__all__ = ["Continuize", "Discretize", "Impute", "SklImpute", "Normalize", "Randomize"]


class Preprocess:
    """
    A generic preprocessor class. All preprocessors need to inherit this
    class. Preprocessors can be instantiated without the data set to return
    data preprocessor, or can be given a data set to return the preprocessed
    data.

    Parameters
    ----------
    data : a data table (default=None)
        An optional data set to be preprocessed.
    """

    def __new__(cls, data=None, *args, **kwargs):
        self = super().__new__(cls)
        if isinstance(data, Orange.data.Storage):
            self.__init__(*args, **kwargs)
            return self(data)
        else:
            return self

    def __call__(self, data):
        raise NotImplementedError("Subclasses need to implement __call__")


class Continuize(Preprocess):
    MultinomialTreatment = Enum(
        "Indicators", "FirstAsBase", "FrequentAsBase",
        "Remove", "RemoveMultinomial", "ReportError", "AsOrdinal",
        "AsNormalizedOrdinal", "Leave"
    )

    (Indicators, FirstAsBase, FrequentAsBase, Remove, RemoveMultinomial,
     ReportError, AsOrdinal, AsNormalizedOrdinal, Leave) = MultinomialTreatment

    def __init__(self, zero_based=True, multinomial_treatment=Indicators):
        self.zero_based = zero_based
        self.multinomial_treatment = multinomial_treatment

    def __call__(self, data):
        from . import continuize

        continuizer = continuize.DomainContinuizer(
            zero_based=self.zero_based,
            multinomial_treatment=self.multinomial_treatment)
        domain = continuizer(data)
        return data.from_table(domain, data)


class Discretize(Preprocess):
    """
    Construct a discretizer, a preprocessor for discretization of
    continuous features.

    Parameters
    ----------
    method : discretization method (default: Orange.preprocess.discretize.Discretization)

    remove_const : bool (default=True)
        Determines whether the features with constant values are removed
        during discretization.
    """

    def __init__(self, method=None, remove_const=True):
        self.method = method
        self.remove_const = remove_const

    def __call__(self, data):
        """
        Compute and apply discretization of the given data. Returns a new
        data table.

        Parameters
        ----------
        data : Orange.data.Table
            A data table to be discretized.
        """

        def transform(var):
            if var.is_continuous:
                new_var = method(data, var)
                if new_var is not None and \
                        (len(new_var.values) >= 2 or not self.remove_const):
                    return new_var
                else:
                    return None
            else:
                return var

        method = self.method or discretize.EqualFreq()
        attributes = [transform(var) for var in data.domain.attributes]
        attributes = [var for var in attributes if var is not None]
        domain = Orange.data.Domain(
            attributes, data.domain.class_vars, data.domain.metas)
        return data.from_table(domain, data)


class Impute(Preprocess):
    """
    Construct a imputer, a preprocessor for imputation of missing values in
    the data table.

    Parameters
    ----------
    method : imputation method (default: Orange.preprocess.impute.Average())
    """

    def __init__(self, method=Orange.preprocess.impute.Average()):
        self.method = method

    def __call__(self, data):
        """
        Apply an imputation method to the given data set. Returns a new
        data table with missing values replaced by their imputations.

        Parameters
        ----------
        data : Orange.data.Table
            An input data table.
        """

        method = self.method or impute.Average()
        newattrs = [method(data, var) for var in data.domain.attributes]
        domain = Orange.data.Domain(
            newattrs, data.domain.class_vars, data.domain.metas)
        return data.from_table(domain, data)


class SklImpute(Preprocess):
    __wraps__ = skl_preprocessing.Imputer

    def __init__(self, strategy='mean', force=True):
        self.strategy = strategy
        self.force = force

    def __call__(self, data):
        if not self.force and not np.isnan(data.X).any():
            return data
        self.imputer = skl_preprocessing.Imputer(strategy=self.strategy)
class Foo:
    def __init__(self, x=None):
        self.x = x
        X = self.imputer.fit_transform(data.X)
        features = [impute.Average()(data, var, value) for var, value in
                    zip(data.domain.attributes, self.imputer.statistics_)]
        domain = Orange.data.Domain(features, data.domain.class_vars,
                                    data.domain.metas)
        return Orange.data.Table(domain, X, data.Y, data.metas)


class RemoveConstant(Preprocess):
    """
    Construct a preprocessor that removes features with constant values
    from the data set.
    """

    def __call__(self, data):
        """
        Remove columns with constant values from the data set and return
        the resulting data table.

        Parameters
        ----------
        data : an input data set
        """

        oks = bottlechest.nanmin(data.X, axis=0) != \
              bottlechest.nanmax(data.X, axis=0)
        atts = [data.domain.attributes[i] for i, ok in enumerate(oks) if ok]
        domain = Orange.data.Domain(atts, data.domain.class_vars,
                                    data.domain.metas)
        return Orange.data.Table(domain, data)


class Normalize(Preprocess):
    """
    Construct a preprocessor for normalization of features.
    Given a data table, preprocessor returns a new table in
    which the continuous attributes are normalized.

    Parameters
    ----------
    zero_based : bool (default=True)
        Determines the value used as the “low” value of the variable.
        It determines the interval for normalized continuous variables
        (either [-1, 1] or [0, 1]).

    norm_type : NormTypes (default: Normalize.NormalizeBySD)
        Normalization type. If Normalize.NormalizeBySD, the values are
        replaced with standardized values by subtracting the average
        value and dividing by the standard deviation.
        Attribute zero_based has no effect on this standardization.

        If Normalize.NormalizeBySpan, the values are replaced with
        normalized values by subtracting min value of the data and
        dividing by span (max - min).

    transform_class : bool (default=False)
        If True the class is normalized as well.

    Examples
    --------
    >>> from Orange.data import Table
    >>> from Orange.preprocess import Normalize
    >>> data = Table("iris")
    >>> normalizer = Normalize(Normalize.NormalizeBySpan)
    >>> normalized_data = normalizer(data)
    """

    NormTypes = Enum("NormalizeBySpan", "NormalizeBySD")
    (NormalizeBySpan, NormalizeBySD) = NormTypes


    def __init__(self,
                 zero_based=True,
                 norm_type=NormalizeBySD,
                 transform_class=False):
        self.zero_based = zero_based
        self.norm_type = norm_type
        self.transform_class = transform_class

    def __call__(self, data):
        """
        Compute and apply normalization of the given data. Returns a new
        data table.

        Parameters
        ----------
        data : Orange.data.Table
            A data table to be normalized.

        Returns
        -------
        data : Orange.data.Table
            Normalized data table.
        """
        from . import normalize

        normalizer = normalize.Normalizer(
            zero_based=self.zero_based,
            norm_type=self.norm_type,
            transform_class=self.transform_class)
        return normalizer(data)


class Randomize(Preprocess):
    """
    Construct a preprocessor for randomization of classes,
    attributes or metas.
    Given a data table, preprocessor returns a new table in
    which the data is shuffled.

    Parameters
    ----------

    rand_type : RandTypes (default: Randomize.RandomizeClasses)
        Randomization type. If Randomize.RandomizeClasses, classes
        are shuffled.
        If Randomize.RandomizeAttributes, attributes are shuffled.
        If Randomize.RandomizeMetas, metas are shuffled.

    Examples
    --------
    >>> from Orange.data import Table
    >>> from Orange.preprocess import Randomize
    >>> data = Table("iris")
    >>> randomizer = Randomize(Randomize.RandomizeClasses)
    >>> randomized_data = randomizer(data)
    """

    RandTypes = Enum("RandomizeClasses", "RandomizeAttributes",
                     "RandomizeMetas")
    (RandomizeClasses, RandomizeAttributes, RandomizeMetas) = RandTypes

    def __init__(self, rand_type=RandomizeClasses):
        self.rand_type = rand_type

    def __call__(self, data):
        """
        Apply randomization of the given data. Returns a new
        data table.

        Parameters
        ----------
        data : Orange.data.Table
            A data table to be randomized.

        Returns
        -------
        data : Orange.data.Table
            Randomized data table.
        """
        new_data = Table(data)
        new_data.ensure_copy()

        if self.rand_type == Randomize.RandomizeClasses:
            self.randomize(new_data.Y)
        elif self.rand_type == Randomize.RandomizeAttributes:
            self.randomize(new_data.X)
        elif self.rand_type == Randomize.RandomizeMetas:
            self.randomize(new_data.metas)
        else:
            raise TypeError('Unsupported type')

        return new_data

    def randomize(self, table):
class Foo:
    def some_method(self, x, y):
        return x + y;
        if len(table.shape) > 1:
            for i in range(table.shape[1]):
                np.random.shuffle(table[:,i])
        else:
            np.random.shuffle(table)


class PreprocessorList:
    """
    Store a list of preprocessors and on call apply them to the data set.

    Parameters
    ----------
    preprocessors : list
        A list of preprocessors.
    """

    def __init__(self, preprocessors):
        self.preprocessors = list(preprocessors)

    def __call__(self, data):
        """
        Applies a list of preprocessors to the data set.

        Parameters
        ----------
        data : an input data table
        """

        for pp in self.preprocessors:
            data = pp(data)
        return data



GitHub Access Token became invalid

Issues (4082)

Orange/preprocess/preprocess.py (7 issues)

Labels

Severity

Introduced By

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1			"""
2			Preprocess
3			----------
4
5			"""
6			import numpy as np
			0 ignored issues – show Configuration introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report Show Similar Issues like this The import `numpy` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
7			import sklearn.preprocessing as skl_preprocessing
			0 ignored issues – show Configuration introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report Show Similar Issues like this The import `sklearn.preprocessing` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
8			import bottlechest
			0 ignored issues – show Configuration introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report Show Similar Issues like this The import `bottlechest` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
9
10			import Orange.data
11			from Orange.data import Table
12			from . import impute, discretize
13			from Orange.statistics import distribution
			0 ignored issues – show Unused Code introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report Show Similar Issues like this Unused distribution imported from Orange.statistics Loading history...
14			from ..misc.enum import Enum
15
16			__all__ = ["Continuize", "Discretize", "Impute", "SklImpute", "Normalize", "Randomize"]
17
18
19			class Preprocess:
20			"""
21			A generic preprocessor class. All preprocessors need to inherit this
22			class. Preprocessors can be instantiated without the data set to return
23			data preprocessor, or can be given a data set to return the preprocessed
24			data.
25
26			Parameters
27			----------
28			data : a data table (default=None)
29			An optional data set to be preprocessed.
30			"""
31
32			def __new__(cls, data=None, args, *kwargs):
33			self = super().__new__(cls)
34			if isinstance(data, Orange.data.Storage):
35			self.__init__(args, *kwargs)
36			return self(data)
37			else:
38			return self
39
40			def __call__(self, data):
41			raise NotImplementedError("Subclasses need to implement __call__")
42
43
44			class Continuize(Preprocess):
45			MultinomialTreatment = Enum(
46			"Indicators", "FirstAsBase", "FrequentAsBase",
47			"Remove", "RemoveMultinomial", "ReportError", "AsOrdinal",
48			"AsNormalizedOrdinal", "Leave"
49			)
50
51			(Indicators, FirstAsBase, FrequentAsBase, Remove, RemoveMultinomial,
52			ReportError, AsOrdinal, AsNormalizedOrdinal, Leave) = MultinomialTreatment
53
54			def __init__(self, zero_based=True, multinomial_treatment=Indicators):
55			self.zero_based = zero_based
56			self.multinomial_treatment = multinomial_treatment
57
58			def __call__(self, data):
59			from . import continuize
60
61			continuizer = continuize.DomainContinuizer(
62			zero_based=self.zero_based,
63			multinomial_treatment=self.multinomial_treatment)
64			domain = continuizer(data)
65			return data.from_table(domain, data)
66
67
68			class Discretize(Preprocess):
69			"""
70			Construct a discretizer, a preprocessor for discretization of
71			continuous features.
72
73			Parameters
74			----------
75			method : discretization method (default: Orange.preprocess.discretize.Discretization)
76
77			remove_const : bool (default=True)
78			Determines whether the features with constant values are removed
79			during discretization.
80			"""
81
82			def __init__(self, method=None, remove_const=True):
83			self.method = method
84			self.remove_const = remove_const
85
86			def __call__(self, data):
87			"""
88			Compute and apply discretization of the given data. Returns a new
89			data table.
90
91			Parameters
92			----------
93			data : Orange.data.Table
94			A data table to be discretized.
95			"""
96
97			def transform(var):
98			if var.is_continuous:
99			new_var = method(data, var)
100			if new_var is not None and \
101			(len(new_var.values) >= 2 or not self.remove_const):
102			return new_var
103			else:
104			return None
105			else:
106			return var
107
108			method = self.method or discretize.EqualFreq()
109			attributes = [transform(var) for var in data.domain.attributes]
110			attributes = [var for var in attributes if var is not None]
111			domain = Orange.data.Domain(
112			attributes, data.domain.class_vars, data.domain.metas)
113			return data.from_table(domain, data)
114
115
116			class Impute(Preprocess):
117			"""
118			Construct a imputer, a preprocessor for imputation of missing values in
119			the data table.
120
121			Parameters
122			----------
123			method : imputation method (default: Orange.preprocess.impute.Average())
124			"""
125
126			def __init__(self, method=Orange.preprocess.impute.Average()):
127			self.method = method
128
129			def __call__(self, data):
130			"""
131			Apply an imputation method to the given data set. Returns a new
132			data table with missing values replaced by their imputations.
133
134			Parameters
135			----------
136			data : Orange.data.Table
137			An input data table.
138			"""
139
140			method = self.method or impute.Average()
141			newattrs = [method(data, var) for var in data.domain.attributes]
142			domain = Orange.data.Domain(
143			newattrs, data.domain.class_vars, data.domain.metas)
144			return data.from_table(domain, data)
145
146
147			class SklImpute(Preprocess):
148			__wraps__ = skl_preprocessing.Imputer
149
150			def __init__(self, strategy='mean', force=True):
151			self.strategy = strategy
152			self.force = force
153
154			def __call__(self, data):
155			if not self.force and not np.isnan(data.X).any():
156			return data
157			self.imputer = skl_preprocessing.Imputer(strategy=self.strategy)
			0 ignored issues – show Coding Style introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report Show Similar Issues like this The attribute `imputer` was defined outside `__init__`. It is generally a good practice to initialize all attributes to default values in the `__init__` method: class Foo: def __init__(self, x=None): self.x = x Loading history...
158			X = self.imputer.fit_transform(data.X)
159			features = [impute.Average()(data, var, value) for var, value in
160			zip(data.domain.attributes, self.imputer.statistics_)]
161			domain = Orange.data.Domain(features, data.domain.class_vars,
162			data.domain.metas)
163			return Orange.data.Table(domain, X, data.Y, data.metas)
164
165
166			class RemoveConstant(Preprocess):
167			"""
168			Construct a preprocessor that removes features with constant values
169			from the data set.
170			"""
171
172			def __call__(self, data):
173			"""
174			Remove columns with constant values from the data set and return
175			the resulting data table.
176
177			Parameters
178			----------
179			data : an input data set
180			"""
181
182			oks = bottlechest.nanmin(data.X, axis=0) != \
183			bottlechest.nanmax(data.X, axis=0)
184			atts = [data.domain.attributes[i] for i, ok in enumerate(oks) if ok]
185			domain = Orange.data.Domain(atts, data.domain.class_vars,
186			data.domain.metas)
187			return Orange.data.Table(domain, data)
188
189
190			class Normalize(Preprocess):
191			"""
192			Construct a preprocessor for normalization of features.
193			Given a data table, preprocessor returns a new table in
194			which the continuous attributes are normalized.
195
196			Parameters
197			----------
198			zero_based : bool (default=True)
199			Determines the value used as the “low” value of the variable.
200			It determines the interval for normalized continuous variables
201			(either [-1, 1] or [0, 1]).
202
203			norm_type : NormTypes (default: Normalize.NormalizeBySD)
204			Normalization type. If Normalize.NormalizeBySD, the values are
205			replaced with standardized values by subtracting the average
206			value and dividing by the standard deviation.
207			Attribute zero_based has no effect on this standardization.
208
209			If Normalize.NormalizeBySpan, the values are replaced with
210			normalized values by subtracting min value of the data and
211			dividing by span (max - min).
212
213			transform_class : bool (default=False)
214			If True the class is normalized as well.
215
216			Examples
217			--------
218			>>> from Orange.data import Table
219			>>> from Orange.preprocess import Normalize
220			>>> data = Table("iris")
221			>>> normalizer = Normalize(Normalize.NormalizeBySpan)
222			>>> normalized_data = normalizer(data)
223			"""
224
225			NormTypes = Enum("NormalizeBySpan", "NormalizeBySD")
226			(NormalizeBySpan, NormalizeBySD) = NormTypes
			0 ignored issues – show Bug Best Practice introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report Show Similar Issues like this Attempting to unpack a non-sequence Loading history...
227
228			def __init__(self,
229			zero_based=True,
230			norm_type=NormalizeBySD,
231			transform_class=False):
232			self.zero_based = zero_based
233			self.norm_type = norm_type
234			self.transform_class = transform_class
235
236			def __call__(self, data):
237			"""
238			Compute and apply normalization of the given data. Returns a new
239			data table.
240
241			Parameters
242			----------
243			data : Orange.data.Table
244			A data table to be normalized.
245
246			Returns
247			-------
248			data : Orange.data.Table
249			Normalized data table.
250			"""
251			from . import normalize
252
253			normalizer = normalize.Normalizer(
254			zero_based=self.zero_based,
255			norm_type=self.norm_type,
256			transform_class=self.transform_class)
257			return normalizer(data)
258
259
260			class Randomize(Preprocess):
261			"""
262			Construct a preprocessor for randomization of classes,
263			attributes or metas.
264			Given a data table, preprocessor returns a new table in
265			which the data is shuffled.
266
267			Parameters
268			----------
269
270			rand_type : RandTypes (default: Randomize.RandomizeClasses)
271			Randomization type. If Randomize.RandomizeClasses, classes
272			are shuffled.
273			If Randomize.RandomizeAttributes, attributes are shuffled.
274			If Randomize.RandomizeMetas, metas are shuffled.
275
276			Examples
277			--------
278			>>> from Orange.data import Table
279			>>> from Orange.preprocess import Randomize
280			>>> data = Table("iris")
281			>>> randomizer = Randomize(Randomize.RandomizeClasses)
282			>>> randomized_data = randomizer(data)
283			"""
284
285			RandTypes = Enum("RandomizeClasses", "RandomizeAttributes",
286			"RandomizeMetas")
287			(RandomizeClasses, RandomizeAttributes, RandomizeMetas) = RandTypes
288
289			def __init__(self, rand_type=RandomizeClasses):
290			self.rand_type = rand_type
291
292			def __call__(self, data):
293			"""
294			Apply randomization of the given data. Returns a new
295			data table.
296
297			Parameters
298			----------
299			data : Orange.data.Table
300			A data table to be randomized.
301
302			Returns
303			-------
304			data : Orange.data.Table
305			Randomized data table.
306			"""
307			new_data = Table(data)
308			new_data.ensure_copy()
309
310			if self.rand_type == Randomize.RandomizeClasses:
311			self.randomize(new_data.Y)
312			elif self.rand_type == Randomize.RandomizeAttributes:
313			self.randomize(new_data.X)
314			elif self.rand_type == Randomize.RandomizeMetas:
315			self.randomize(new_data.metas)
316			else:
317			raise TypeError('Unsupported type')
318
319			return new_data
320
321			def randomize(self, table):
			0 ignored issues – show Coding Style introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report Show Similar Issues like this This method could be written as a function/class method. If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo: def some_method(self, x, y): return x + y; could be written as class Foo: @classmethod def some_method(cls, x, y): return x + y; Loading history...
322			if len(table.shape) > 1:
323			for i in range(table.shape[1]):
324			np.random.shuffle(table[:,i])
325			else:
326			np.random.shuffle(table)
327
328
329			class PreprocessorList:
330			"""
331			Store a list of preprocessors and on call apply them to the data set.
332
333			Parameters
334			----------
335			preprocessors : list
336			A list of preprocessors.
337			"""
338
339			def __init__(self, preprocessors):
340			self.preprocessors = list(preprocessors)
341
342			def __call__(self, data):
343			"""
344			Applies a list of preprocessors to the data set.
345
346			Parameters
347			----------
348			data : an input data table
349			"""
350
351			for pp in self.preprocessors:
352			data = pp(data)
353			return data
354
355

hugobuddel / orange3

GitHub Access Token became invalid

Issues (4082)

Orange/preprocess/preprocess.py (7 issues)

Labels

Severity

Introduced By

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files