1 | from Orange.data import ContinuousVariable, Domain |
||
2 | from Orange.statistics import distribution |
||
3 | from .transformation import Identity, Indicator, Indicator1, Normalizer |
||
4 | from .preprocess import Continuize |
||
5 | |||
6 | __all__ = ["DomainContinuizer", "MultinomialTreatment"] |
||
7 | |||
8 | |||
9 | class DomainContinuizer: |
||
10 | def __new__(cls, data=None, zero_based=True, |
||
11 | multinomial_treatment=Continuize.Indicators, |
||
12 | transform_class=False): |
||
13 | self = super().__new__(cls) |
||
14 | self.zero_based = zero_based |
||
15 | self.multinomial_treatment = multinomial_treatment |
||
16 | self.transform_class = transform_class |
||
17 | |||
18 | return self if data is None else self(data) |
||
19 | |||
20 | def __call__(self, data): |
||
21 | def transform_discrete(var): |
||
22 | if (len(var.values) < 2 or |
||
23 | treat == Continuize.Remove or |
||
24 | treat == Continuize.RemoveMultinomial and |
||
25 | len(var.values) > 2): |
||
26 | return [] |
||
27 | if treat == Continuize.AsOrdinal: |
||
28 | new_var = ContinuousVariable(var.name, |
||
29 | compute_value=Identity(var)) |
||
30 | return [new_var] |
||
31 | if treat == Continuize.AsNormalizedOrdinal: |
||
32 | n_values = max(1, len(var.values)) |
||
33 | if self.zero_based: |
||
34 | return [ContinuousVariable(var.name, compute_value=Normalizer(var, 0, 1 / (n_values - 1)))] |
||
35 | else: |
||
36 | return [ContinuousVariable(var.name, compute_value=Normalizer(var, (n_values - 1) / 2, 2 / (n_values - 1)))] |
||
0 ignored issues
–
show
|
|||
37 | |||
38 | new_vars = [] |
||
39 | if treat == Continuize.Indicators: |
||
40 | base = -1 |
||
41 | elif treat in (Continuize.FirstAsBase, |
||
42 | Continuize.RemoveMultinomial): |
||
43 | base = max(var.base_value, 0) |
||
44 | else: |
||
45 | base = dists[var_ptr].modus() |
||
46 | ind_class = [Indicator1, Indicator][self.zero_based] |
||
47 | for i, val in enumerate(var.values): |
||
48 | if i == base: |
||
49 | continue |
||
50 | new_var = ContinuousVariable( |
||
51 | "{}={}".format(var.name, val), |
||
52 | compute_value=ind_class(var, i)) |
||
53 | new_vars.append(new_var) |
||
54 | return new_vars |
||
55 | |||
56 | def transform_list(s): |
||
57 | nonlocal var_ptr |
||
58 | new_vars = [] |
||
59 | for var in s: |
||
60 | if var.is_discrete: |
||
61 | new_vars += transform_discrete(var) |
||
62 | if needs_discrete: |
||
63 | var_ptr += 1 |
||
0 ignored issues
–
show
|
|||
64 | else: |
||
65 | new_var = var |
||
66 | if new_var is not None: |
||
67 | new_vars.append(new_var) |
||
68 | if needs_continuous: |
||
69 | var_ptr += 1 |
||
70 | return new_vars |
||
71 | |||
72 | treat = self.multinomial_treatment |
||
73 | transform_class = self.transform_class |
||
74 | |||
75 | domain = data if isinstance(data, Domain) else data.domain |
||
76 | if (treat == Continuize.ReportError and |
||
77 | any(var.is_discrete and len(var.values) > 2 for var in domain)): |
||
78 | raise ValueError("data has multinomial attributes") |
||
79 | needs_discrete = (treat == Continuize.FrequentAsBase and |
||
80 | domain.has_discrete_attributes(transform_class)) |
||
81 | needs_continuous = False |
||
82 | if needs_discrete: |
||
83 | if isinstance(data, Domain): |
||
84 | raise TypeError("continuizer requires data") |
||
85 | dists = distribution.get_distributions( |
||
86 | data, not needs_discrete, not needs_continuous) |
||
87 | var_ptr = 0 |
||
88 | new_attrs = transform_list(domain.attributes) |
||
89 | if transform_class: |
||
90 | new_classes = transform_list(domain.class_vars) |
||
91 | else: |
||
92 | new_classes = domain.class_vars |
||
93 | return Domain(new_attrs, new_classes, domain.metas) |
||
94 | |||
95 | |||
96 | MultinomialTreatment = Continuize.MultinomialTreatment |
||
97 |
This check looks for lines that are too long. You can specify the maximum line length.