1
|
|
|
import numpy |
|
|
|
|
2
|
|
|
from sklearn import linear_model |
|
|
|
|
3
|
|
|
from sklearn.preprocessing import Imputer |
|
|
|
|
4
|
|
|
import Orange.data |
5
|
|
|
import Orange.classification |
6
|
|
|
#from Orange.data.continuizer import DomainContinuizer |
7
|
|
|
|
8
|
|
|
class SGDLearner(Orange.classification.SklLearner): |
9
|
|
|
|
10
|
|
|
def __init__(self, all_classes, means=None, stds=None): |
|
|
|
|
11
|
|
|
self.all_classes = all_classes |
12
|
|
|
# The SGD Learner works significantly better with normalized data. |
13
|
|
|
# However, we cannot get the normalization from the data because we |
14
|
|
|
# do partial fitting. So it has to be provided. |
15
|
|
|
self.means = means |
16
|
|
|
self.stds = stds |
17
|
|
|
self.clf = linear_model.SGDClassifier(loss='log') |
18
|
|
|
self.clf.means = self.means |
19
|
|
|
self.clf.stds = self.stds |
20
|
|
|
|
21
|
|
|
#self.reset() |
22
|
|
|
|
23
|
|
|
def partial_fit(self, X, Y, W, normalize=True): |
|
|
|
|
24
|
|
|
X = X.copy() |
25
|
|
|
if normalize: |
26
|
|
|
if self.means is not None: |
27
|
|
|
X -= self.means |
28
|
|
|
if self.stds is not None: |
29
|
|
|
X /= self.stds |
30
|
|
|
|
31
|
|
|
# Sometimes this exception occurs, but it is unclear when: |
32
|
|
|
""" |
33
|
|
|
Exception in thread Thread-3: |
34
|
|
|
Traceback (most recent call last): |
35
|
|
|
File "/home/evis/anaconda3/lib/python3.4/threading.py", line 920, in _bootstra |
36
|
|
|
p_inner |
37
|
|
|
self.run() |
38
|
|
|
File "/home/evis/anaconda3/lib/python3.4/threading.py", line 868, in run |
39
|
|
|
self._target(*self._args, **self._kwargs) |
40
|
|
|
File "/home/evis/orange3/Orange/widgets/classify/owsgd.py", line 239, in train |
41
|
|
|
ing_thread |
42
|
|
|
self.train() |
43
|
|
|
File "/home/evis/orange3/Orange/widgets/classify/owsgd.py", line 261, in train |
44
|
|
|
classifier = self.learner.partial_fit(new_instances.X, new_instances.Y, None) |
45
|
|
|
File "/home/evis/orange3/Orange/classification/sgd.py", line 31, in partial_fit |
46
|
|
|
self.clf = self.clf.partial_fit(X, Y.reshape(-1), self.all_classes) |
47
|
|
|
File "/home/evis/anaconda3/lib/python3.4/site-packages/sklearn/linear_model/stochastic_gradient.py", line 526, in partial_fit |
48
|
|
|
coef_init=None, intercept_init=None) |
49
|
|
|
File "/home/evis/anaconda3/lib/python3.4/site-packages/sklearn/linear_model/stochastic_gradient.py", line 358, in _partial_fit |
50
|
|
|
X, y = check_X_y(X, y, 'csr', dtype=np.float64, order="C") |
51
|
|
|
File "/home/evis/anaconda3/lib/python3.4/site-packages/sklearn/utils/validation.py", line 450, in check_X_y |
52
|
|
|
_assert_all_finite(y) |
53
|
|
|
File "/home/evis/anaconda3/lib/python3.4/site-packages/sklearn/utils/validation.py", line 52, in _assert_all_finite |
54
|
|
|
" or a value too large for %r." % X.dtype) |
55
|
|
|
ValueError: Input contains NaN, infinity or a value too large for dtype('float64'). |
56
|
|
|
""" |
|
|
|
|
57
|
|
|
#print("SGD training to %s" % Y.reshape(-1)) |
58
|
|
|
|
59
|
|
|
self.clf = self.clf.partial_fit(X, Y.reshape(-1), self.all_classes) |
60
|
|
|
|
|
|
|
|
61
|
|
|
return SGDClassifier(self.clf) |
62
|
|
|
|
|
|
|
|
63
|
|
|
# TODO: Remove fit completely? |
|
|
|
|
64
|
|
|
fit = partial_fit |
65
|
|
|
|
66
|
|
|
# TODO: Is this reset function necessary? |
|
|
|
|
67
|
|
|
def reset(self): |
68
|
|
|
# 'log' or 'modified_huber' required to predict probabilities. |
69
|
|
|
self.clf = linear_model.SGDClassifier(loss='log') |
70
|
|
|
self.clf.means = self.means |
71
|
|
|
self.clf.stds = self.stds |
72
|
|
|
|
|
|
|
|
73
|
|
|
def decision_function(self, X): |
74
|
|
|
if self.means is not None and self.stds is not None: |
75
|
|
|
Xa = (numpy.array(X)-numpy.array(self.means))/numpy.array(self.stds) |
76
|
|
|
else: |
77
|
|
|
Xa = X |
78
|
|
|
|
|
|
|
|
79
|
|
|
value = self.clf.decision_function(Xa) |
80
|
|
|
return value |
81
|
|
|
|
82
|
|
|
def predict(self, X): |
83
|
|
|
if self.means is not None and self.stds is not None: |
84
|
|
|
Xa = (numpy.array(X)-numpy.array(self.means))/numpy.array(self.stds) |
85
|
|
|
else: |
86
|
|
|
1/0 |
|
|
|
|
87
|
|
|
Xa = X |
88
|
|
|
|
|
|
|
|
89
|
|
|
value = self.clf.predict(Xa) |
90
|
|
|
return value |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
93
|
|
|
class SGDClassifier(Orange.classification.SklModel): |
94
|
|
|
|
95
|
|
|
def __init__(self, clf): |
|
|
|
|
96
|
|
|
self.clf = clf |
97
|
|
|
|
98
|
|
|
def predict(self, X): |
99
|
|
|
if self.clf.means is not None and self.clf.stds is not None: |
100
|
|
|
Xa = (numpy.array(X)-numpy.array(self.clf.means))/numpy.array(self.clf.stds) |
101
|
|
|
else: |
102
|
|
|
Xa = X |
103
|
|
|
|
|
|
|
|
104
|
|
|
value = self.clf.predict(Xa) |
105
|
|
|
return value |
106
|
|
|
|
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.