1 | import inspect |
||
2 | |||
3 | import Orange.data |
||
4 | from Orange.misc.wrapper_meta import WrapperMeta |
||
5 | from Orange.misc.cache import single_cache |
||
6 | import Orange.preprocess |
||
7 | |||
8 | __all__ = ["Projector", "Projection", "SklProjector"] |
||
9 | |||
10 | |||
11 | class Projector: |
||
12 | #: A sequence of data preprocessors to apply on data prior to projecting |
||
13 | name = 'projection' |
||
14 | preprocessors = () |
||
15 | |||
16 | def __init__(self, preprocessors=None): |
||
17 | if preprocessors is None: |
||
18 | preprocessors = type(self).preprocessors |
||
19 | self.preprocessors = tuple(preprocessors) |
||
20 | |||
21 | def fit(self, X, Y=None): |
||
22 | raise NotImplementedError( |
||
23 | "Classes derived from Projector must overload method fit") |
||
24 | |||
25 | def __call__(self, data): |
||
26 | data = self.preprocess(data) |
||
27 | self.domain = data.domain |
||
0 ignored issues
–
show
|
|||
28 | clf = self.fit(data.X, data.Y) |
||
29 | clf.pre_domain = self.domain |
||
30 | clf.name = self.name |
||
31 | return clf |
||
32 | |||
33 | def preprocess(self, data): |
||
34 | for pp in self.preprocessors: |
||
35 | data = pp(data) |
||
36 | return data |
||
37 | |||
38 | |||
39 | class Projection: |
||
40 | def __init__(self, proj): |
||
41 | self.__dict__.update(proj.__dict__) |
||
42 | self.proj = proj |
||
43 | |||
44 | @single_cache |
||
45 | def transform(self, X): |
||
46 | return self.proj.transform(X) |
||
47 | |||
48 | def __call__(self, data): |
||
49 | return data.from_table(self.domain, data) |
||
50 | |||
51 | def __repr__(self): |
||
52 | return self.name |
||
53 | |||
54 | |||
55 | class SklProjector(Projector, metaclass=WrapperMeta): |
||
56 | __wraps__ = None |
||
57 | _params = {} |
||
58 | name = 'skl projection' |
||
59 | preprocessors = [Orange.preprocess.Continuize(), |
||
60 | Orange.preprocess.SklImpute(force=False)] |
||
61 | |||
62 | @property |
||
63 | def params(self): |
||
64 | return self._params |
||
65 | |||
66 | @params.setter |
||
67 | def params(self, value): |
||
68 | self._params = self._get_sklparams(value) |
||
69 | |||
70 | def _get_sklparams(self, values): |
||
0 ignored issues
–
show
This code seems to be duplicated in your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. ![]() |
|||
71 | sklprojection = self.__wraps__ |
||
72 | if sklprojection is not None: |
||
73 | spec = inspect.getargs(sklprojection.__init__.__code__) |
||
74 | # first argument is 'self' |
||
75 | assert spec.args[0] == "self" |
||
76 | params = {name: values[name] for name in spec.args[1:] |
||
77 | if name in values} |
||
78 | else: |
||
79 | raise TypeError("Wrapper does not define '__wraps__'") |
||
80 | return params |
||
81 | |||
82 | def preprocess(self, data): |
||
83 | data = super().preprocess(data) |
||
84 | if any(v.is_discrete and len(v.values) > 2 |
||
85 | for v in data.domain.attributes): |
||
86 | raise ValueError("Wrapped scikit-learn methods do not support " |
||
87 | "multinomial variables.") |
||
88 | return data |
||
89 | |||
90 | def fit(self, X, Y=None): |
||
91 | proj = self.__wraps__(**self.params) |
||
92 | return proj.fit(X, Y) |
||
93 | |||
94 | def __repr__(self): |
||
95 | return '{} {}'.format(self.name, self.params) |
||
96 |
It is generally a good practice to initialize all attributes to default values in the
__init__
method: