1
|
|
|
import sklearn.decomposition as skl_decomposition |
|
|
|
|
2
|
|
|
|
3
|
|
|
try: |
4
|
|
|
from orangecontrib.remote import aborted, save_state |
5
|
|
|
except ImportError: |
6
|
|
|
def aborted(): |
7
|
|
|
return False |
8
|
|
|
|
9
|
|
|
def save_state(_): |
10
|
|
|
pass |
11
|
|
|
|
12
|
|
|
import Orange.data |
13
|
|
|
from Orange.misc.wrapper_meta import WrapperMeta |
14
|
|
|
from Orange.preprocess import Continuize |
15
|
|
|
from Orange.projection import SklProjector, Projection |
16
|
|
|
|
17
|
|
|
__all__ = ["PCA", "SparsePCA", "RandomizedPCA", "IncrementalPCA"] |
18
|
|
|
|
19
|
|
|
|
20
|
|
|
class PCA(SklProjector): |
21
|
|
|
__wraps__ = skl_decomposition.PCA |
22
|
|
|
name = 'pca' |
23
|
|
|
|
24
|
|
|
def __init__(self, n_components=None, copy=True, whiten=False, preprocessors=None): |
|
|
|
|
25
|
|
|
super().__init__(preprocessors=preprocessors) |
26
|
|
|
self.params = vars() |
27
|
|
|
|
28
|
|
|
def fit(self, X, Y=None): |
29
|
|
|
proj = self.__wraps__(**self.params) |
30
|
|
|
proj = proj.fit(X, Y) |
31
|
|
|
return PCAModel(proj, self.domain) |
32
|
|
|
|
33
|
|
|
|
34
|
|
|
class SparsePCA(SklProjector): |
35
|
|
|
__wraps__ = skl_decomposition.SparsePCA |
36
|
|
|
name = 'sparse pca' |
37
|
|
|
|
38
|
|
|
def __init__(self, n_components=None, alpha=1, ridge_alpha=0.01, |
|
|
|
|
39
|
|
|
max_iter=1000, tol=1e-8, method='lars', n_jobs=1, U_init=None, |
|
|
|
|
40
|
|
|
V_init=None, verbose=False, random_state=None, preprocessors=None): |
|
|
|
|
41
|
|
|
super().__init__(preprocessors=preprocessors) |
42
|
|
|
self.params = vars() |
43
|
|
|
|
44
|
|
|
def fit(self, X, Y=None): |
45
|
|
|
proj = self.__wraps__(**self.params) |
46
|
|
|
proj = proj.fit(X, Y) |
47
|
|
|
return PCAModel(proj, self.domain) |
48
|
|
|
|
49
|
|
|
|
50
|
|
|
class RandomizedPCA(SklProjector): |
51
|
|
|
__wraps__ = skl_decomposition.RandomizedPCA |
52
|
|
|
name = 'randomized pca' |
53
|
|
|
|
54
|
|
|
def __init__(self, n_components=None, copy=True, iterated_power=3, |
|
|
|
|
55
|
|
|
whiten=False, random_state=None, preprocessors=None): |
|
|
|
|
56
|
|
|
super().__init__(preprocessors=preprocessors) |
57
|
|
|
self.params = vars() |
58
|
|
|
|
59
|
|
|
def fit(self, X, Y=None): |
60
|
|
|
proj = self.__wraps__(**self.params) |
61
|
|
|
proj = proj.fit(X, Y) |
62
|
|
|
return PCAModel(proj, self.domain) |
63
|
|
|
|
64
|
|
|
|
65
|
|
|
class _LinearCombination: |
66
|
|
|
def __init__(self, attrs, weights, mean=None): |
67
|
|
|
self.attrs = attrs |
68
|
|
|
self.weights = weights |
69
|
|
|
self.mean = mean |
70
|
|
|
|
71
|
|
|
def __call__(self): |
72
|
|
|
if self.mean is None: |
73
|
|
|
return ' + '.join('{} * {}'.format(w, a.to_sql()) |
74
|
|
|
for a, w in zip(self.attrs, self.weights)) |
75
|
|
|
return ' + '.join('{} * ({} - {})'.format(w, a.to_sql(), m, w) |
76
|
|
|
for a, m, w in zip(self.attrs, self.mean, self.weights)) |
77
|
|
|
|
78
|
|
|
|
79
|
|
|
class PCAModel(Projection, metaclass=WrapperMeta): |
80
|
|
|
def __init__(self, proj, domain): |
81
|
|
|
def pca_variable(i): |
82
|
|
|
v = Orange.data.ContinuousVariable( |
83
|
|
|
'PC%d' % (i + 1), compute_value=Projector(self, i)) |
84
|
|
|
v.to_sql = _LinearCombination( |
85
|
|
|
domain.attributes, self.components_[i, :], |
86
|
|
|
getattr(self, 'mean_', None)) |
87
|
|
|
return v |
88
|
|
|
|
89
|
|
|
super().__init__(proj=proj) |
90
|
|
|
self.orig_domain = domain |
91
|
|
|
self.n_components = self.components_.shape[0] |
92
|
|
|
self.domain = Orange.data.Domain( |
93
|
|
|
[pca_variable(i) for i in range(self.n_components)], |
94
|
|
|
domain.class_vars, domain.metas) |
95
|
|
|
|
96
|
|
|
|
97
|
|
|
class IncrementalPCA(SklProjector): |
98
|
|
|
__wraps__ = skl_decomposition.IncrementalPCA |
99
|
|
|
name = 'incremental pca' |
100
|
|
|
|
101
|
|
|
def __init__(self, n_components=None, whiten=False, copy=True, |
|
|
|
|
102
|
|
|
batch_size=None, preprocessors=None): |
|
|
|
|
103
|
|
|
super().__init__(preprocessors=preprocessors) |
104
|
|
|
self.params = vars() |
105
|
|
|
|
106
|
|
|
def fit(self, X, Y=None): |
107
|
|
|
proj = self.__wraps__(**self.params) |
108
|
|
|
proj = proj.fit(X, Y) |
109
|
|
|
return IncrementalPCAModel(proj, self.domain) |
110
|
|
|
|
111
|
|
|
def partial_fit(self, data): |
112
|
|
|
return self(data) |
113
|
|
|
|
114
|
|
|
|
115
|
|
|
class IncrementalPCAModel(PCAModel): |
116
|
|
|
def partial_fit(self, data): |
117
|
|
|
if isinstance(data, Orange.data.Storage): |
118
|
|
|
if data.domain != self.pre_domain: |
119
|
|
|
data = data.from_table(self.pre_domain, data) |
120
|
|
|
self.proj.partial_fit(data.X) |
121
|
|
|
else: |
122
|
|
|
self.proj.partial_fit(data) |
123
|
|
|
self.__dict__.update(self.proj.__dict__) |
124
|
|
|
return self |
125
|
|
|
|
126
|
|
|
|
127
|
|
|
class Projector: |
128
|
|
|
def __init__(self, projection, feature): |
129
|
|
|
self.projection = projection |
130
|
|
|
self.feature = feature |
131
|
|
|
self.transformed = None |
132
|
|
|
|
133
|
|
|
def __call__(self, data): |
134
|
|
|
if data.domain != self.projection.pre_domain: |
135
|
|
|
data = data.from_table(self.projection.pre_domain, data) |
136
|
|
|
self.transformed = self.projection.transform(data.X) |
137
|
|
|
return self.transformed[:, self.feature] |
138
|
|
|
|
139
|
|
|
def __getstate__(self): |
140
|
|
|
d = dict(self.__dict__) |
141
|
|
|
d['transformed'] = None |
142
|
|
|
return d |
143
|
|
|
|
144
|
|
|
|
145
|
|
|
class RemotePCA: |
146
|
|
|
def __new__(cls, data, batch=100, max_iter=100): |
147
|
|
|
cont = Continuize(multinomial_treatment=Continuize.Remove) |
148
|
|
|
data = cont(data) |
149
|
|
|
model = Orange.projection.IncrementalPCA() |
150
|
|
|
percent = batch / data.approx_len() * 100 |
151
|
|
|
for i in range(max_iter): |
152
|
|
|
data_sample = data.sample_percentage(percent, no_cache=True) |
153
|
|
|
if not data_sample: |
154
|
|
|
continue |
155
|
|
|
data_sample.download_data(1000000) |
156
|
|
|
data_sample = Orange.data.Table.from_numpy( |
157
|
|
|
Orange.data.Domain(data_sample.domain.attributes), |
158
|
|
|
data_sample.X) |
159
|
|
|
model = model.partial_fit(data_sample) |
160
|
|
|
model.iteration = i |
161
|
|
|
save_state(model) |
162
|
|
|
if aborted() or data_sample is data: |
163
|
|
|
break |
164
|
|
|
return model |
165
|
|
|
|
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.