1
|
|
|
#!/usr/bin/env python |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
|
|
|
4
|
1 |
|
import numpy as np |
5
|
1 |
|
import numpy.linalg as al |
6
|
1 |
|
from scipy.stats import multivariate_normal as mvn |
7
|
1 |
|
import sklearn as sk |
8
|
1 |
|
from sklearn.svm import LinearSVC |
9
|
1 |
|
from sklearn.linear_model import LogisticRegression, LinearRegression |
10
|
1 |
|
from sklearn.model_selection import cross_val_predict |
11
|
1 |
|
from os.path import basename |
12
|
|
|
|
13
|
1 |
|
from .util import one_hot, regularize_matrix |
14
|
|
|
|
15
|
|
|
|
16
|
1 |
|
class TargetContrastivePessimisticClassifier(object): |
17
|
|
|
""" |
18
|
|
|
Classifiers based on Target Contrastive Pessimistic Risk minimization. |
19
|
|
|
|
20
|
|
|
Methods contain models, risk functions, parameter estimation, etc. |
21
|
|
|
""" |
22
|
|
|
|
23
|
1 |
|
def __init__(self, loss='lda', l2=1.0, max_iter=500, tolerance=1e-12, |
24
|
|
|
learning_rate=1.0, rate_decay='linear', verbosity=0): |
25
|
|
|
""" |
26
|
|
|
Select a particular type of TCPR classifier. |
27
|
|
|
|
28
|
|
|
INPUT (1) str 'loss': loss function for TCP Risk, options: 'ls', |
29
|
|
|
'least_squares', 'lda', 'linear_discriminant_analysis', |
30
|
|
|
'qda', 'quadratic_discriminant_analysis' (def: 'lda') |
31
|
|
|
(2) float 'l2': l2-regularization parameter value (def:0.01) |
32
|
|
|
(3) int 'max_iter': maximum number of iterations for |
33
|
|
|
optimization (def: 500) |
34
|
|
|
(4) float 'tolerance': convergence criterion on the TCP |
35
|
|
|
parameters (def: 1e-5) |
36
|
|
|
(5) float 'learning_rate': parameter for size of update of |
37
|
|
|
gradient (def: 1.0) |
38
|
|
|
(6) str 'rate_decay': type of learning rate decay, options: |
39
|
|
|
'linear', 'quadratic', 'geometric', 'exponential' |
40
|
|
|
(def: 'linear') |
41
|
|
|
""" |
42
|
|
|
# Classifier options |
43
|
1 |
|
self.loss = loss |
44
|
1 |
|
self.l2 = l2 |
45
|
|
|
|
46
|
|
|
# Optimization parameters |
47
|
1 |
|
self.max_iter = max_iter |
48
|
1 |
|
self.tolerance = tolerance |
49
|
1 |
|
self.learning_rate = learning_rate |
50
|
1 |
|
self.rate_decay = rate_decay |
51
|
1 |
|
self.verbosity = verbosity |
52
|
|
|
|
53
|
1 |
|
if self.loss in ['linear discriminant analysis', 'lda']: |
54
|
|
|
|
55
|
|
|
# Set to short name |
56
|
1 |
|
self.loss = 'lda' |
57
|
|
|
|
58
|
|
|
elif self.loss in ['quadratic discriminant analysis', 'qda']: |
59
|
|
|
|
60
|
|
|
# Set to short name |
61
|
|
|
self.loss = 'qda' |
62
|
|
|
|
63
|
|
|
else: |
64
|
|
|
# Other loss functions are not implemented |
65
|
|
|
raise ValueError('Model not implemented.') |
66
|
|
|
|
67
|
|
|
# Initialize classifier and classes parameters |
68
|
1 |
|
self.parameters = [] |
69
|
1 |
|
self.classes = [] |
70
|
|
|
|
71
|
|
|
# Whether model has been trained |
72
|
1 |
|
self.is_trained = False |
73
|
|
|
|
74
|
|
|
# Dimensionality of training data |
75
|
1 |
|
self.train_data_dim = 0 |
76
|
|
|
|
77
|
1 |
|
def add_intercept(self, X): |
78
|
|
|
"""Add 1's to data as last features.""" |
79
|
|
|
# Data shape |
80
|
|
|
N, D = X.shape |
81
|
|
|
|
82
|
|
|
# Check if there's not already an intercept column |
83
|
|
|
if np.any(np.sum(X, axis=0) == N): |
84
|
|
|
|
85
|
|
|
# Report |
86
|
|
|
print('Intercept is not the last feature. Swapping..') |
87
|
|
|
|
88
|
|
|
# Find which column contains the intercept |
89
|
|
|
intercept_index = np.argwhere(np.sum(X, axis=0) == N) |
90
|
|
|
|
91
|
|
|
# Swap intercept to last |
92
|
|
|
X = X[:, np.setdiff1d(np.arange(D), intercept_index)] |
93
|
|
|
|
94
|
|
|
# Add intercept as last column |
95
|
|
|
X = np.hstack((X, np.ones((N, 1)))) |
96
|
|
|
|
97
|
|
|
# Append column of 1's to data, and increment dimensionality |
98
|
|
|
return X, D+1 |
99
|
|
|
|
100
|
1 |
|
def remove_intercept(self, X): |
101
|
|
|
"""Remove 1's from data as last features.""" |
102
|
|
|
# Data shape |
103
|
1 |
|
N, D = X.shape |
104
|
|
|
|
105
|
|
|
# Find which column contains the intercept |
106
|
1 |
|
intercept_index = np.argwhere(np.sum(X, axis=0) == N) |
107
|
|
|
|
108
|
|
|
# Swap intercept to last |
109
|
1 |
|
X = X[:, np.setdiff1d(np.arange(D), intercept_index)] |
110
|
|
|
|
111
|
1 |
|
return X, D-1 |
112
|
|
|
|
113
|
1 |
|
def project_simplex(self, v, z=1.0): |
114
|
|
|
""" |
115
|
|
|
Project vector onto simplex using sorting. |
116
|
|
|
|
117
|
|
|
Reference: "Efficient Projections onto the L1-Ball for Learning in High |
118
|
|
|
Dimensions (Duchi, Shalev-Shwartz, Singer, Chandra, 2006)." |
119
|
|
|
|
120
|
|
|
INPUT (1) array 'v': vector to be projected (n dimensions by 0) |
121
|
|
|
(2) float 'z': constant (def: 1.0) |
122
|
|
|
OUTPUT (1) array 'w': projected vector (n dimensions by 0) |
123
|
|
|
""" |
124
|
|
|
# Number of dimensions |
125
|
1 |
|
n = v.shape[0] |
126
|
|
|
|
127
|
|
|
# Sort vector |
128
|
1 |
|
mu = np.sort(v, axis=0)[::-1] |
129
|
|
|
|
130
|
|
|
# Find rho |
131
|
1 |
|
C = np.cumsum(mu) - z |
132
|
1 |
|
j = np.arange(n) + 1 |
133
|
1 |
|
rho = j[mu - C/j > 0][-1] |
134
|
|
|
|
135
|
|
|
# Define theta |
136
|
1 |
|
theta = C[mu - C/j > 0][-1] / float(rho) |
137
|
|
|
|
138
|
|
|
# Subtract theta from original vector and cap at 0 |
139
|
1 |
|
w = np.maximum(v - theta, 0) |
140
|
|
|
|
141
|
|
|
# Return projected vector |
142
|
1 |
|
return w |
143
|
|
|
|
144
|
1 |
|
def learning_rate_t(self, t): |
145
|
|
|
"""Compute current learning rate after decay.""" |
146
|
|
|
# Select rate decay |
147
|
1 |
|
if self.rate_decay == 'linear': |
148
|
|
|
|
149
|
|
|
# Linear dropoff between t=0 and t=T |
150
|
1 |
|
alpha = (self.max_iter - t)/(self.learning_rate*self.max_iter) |
151
|
|
|
|
152
|
|
|
elif self.rate_decay == 'quadratic': |
153
|
|
|
|
154
|
|
|
# Quadratic dropoff between t=0 and t=T |
155
|
|
|
alpha = ((self.max_iter - t)/(self.learning_rate*self.max_iter))**2 |
156
|
|
|
|
157
|
|
|
elif self.rate_decay == 'geometric': |
158
|
|
|
|
159
|
|
|
# Drop rate off inversely to time |
160
|
|
|
alpha = 1 / (self.learning_rate * t) |
161
|
|
|
|
162
|
|
|
elif self.rate_decay == 'exponential': |
163
|
|
|
|
164
|
|
|
# Exponential dropoff |
165
|
|
|
alpha = np.exp(-self.learning_rate * t) |
166
|
|
|
|
167
|
|
|
else: |
168
|
|
|
raise ValueError('Rate decay type unknown.') |
169
|
|
|
|
170
|
1 |
|
return alpha |
171
|
|
|
|
172
|
1 |
|
def risk(self, Z, theta, q): |
173
|
|
|
""" |
174
|
|
|
Compute target contrastive pessimistic risk. |
175
|
|
|
|
176
|
|
|
INPUT (1) array 'Z': target samples (M samples by D features) |
177
|
|
|
(2) array 'theta': classifier parameters (D features by |
178
|
|
|
K classes) |
179
|
|
|
(3) array 'q': soft labels (M samples by K classes) |
180
|
|
|
OUTPUT (1) float: risk |
181
|
|
|
""" |
182
|
|
|
# Number of classes |
183
|
1 |
|
K = q.shape[1] |
184
|
|
|
|
185
|
|
|
# Compute negative log-likelihood |
186
|
1 |
|
L = self.neg_log_likelihood(Z, theta) |
187
|
|
|
|
188
|
|
|
# Weight loss by soft labels |
189
|
1 |
|
for k in range(K): |
190
|
1 |
|
L[:, k] *= q[:, k] |
191
|
|
|
|
192
|
|
|
# Sum over weighted losses |
193
|
1 |
|
L = np.sum(L, axis=1) |
194
|
|
|
|
195
|
|
|
# Risk is average loss |
196
|
1 |
|
return np.mean(L, axis=0) |
197
|
|
|
|
198
|
1 |
|
def neg_log_likelihood(self, X, theta): |
199
|
|
|
""" |
200
|
|
|
Compute negative log-likelihood under Gaussian distributions. |
201
|
|
|
|
202
|
|
|
INPUT (1) array 'X': data (N samples by D features) |
203
|
|
|
(2) tuple 'theta': containing class proportions 'pi', class |
204
|
|
|
means 'mu', and class-covariances 'Si' |
205
|
|
|
OUTPUT (1) array 'L': loss (N samples by K classes) |
206
|
|
|
""" |
207
|
|
|
# Unpack parameters |
208
|
1 |
|
pi, mu, Si = theta |
209
|
|
|
|
210
|
|
|
# Check if parameter sets match |
211
|
1 |
|
assert pi.shape[1] == mu.shape[0] |
212
|
1 |
|
assert mu.shape[1] == Si.shape[0] |
213
|
1 |
|
assert Si.shape[0] == Si.shape[1] |
214
|
|
|
|
215
|
|
|
# Number of classes |
216
|
1 |
|
K = pi.shape[1] |
217
|
|
|
|
218
|
|
|
# Data shape |
219
|
1 |
|
N, D = X.shape |
220
|
|
|
|
221
|
|
|
# Preallocate loss array |
222
|
1 |
|
L = np.zeros((N, K)) |
223
|
|
|
|
224
|
1 |
|
for k in range(K): |
225
|
|
|
|
226
|
|
|
# Check for linear or quadratic |
227
|
1 |
|
if self.loss == 'lda': |
228
|
|
|
|
229
|
1 |
|
try: |
230
|
|
|
# Probability under k-th Gaussian with shared covariance |
231
|
1 |
|
probs = mvn.pdf(X, mu[k, :], Si) |
232
|
|
|
|
233
|
|
|
except al.LinAlgError as err: |
234
|
|
|
print('Covariance matrix is singular. Add regularization.') |
235
|
|
|
raise err |
236
|
|
|
|
237
|
|
|
elif self.loss == 'qda': |
238
|
|
|
|
239
|
|
|
try: |
240
|
|
|
# Probability under k-th Gaussian with own covariance |
241
|
|
|
probs = mvn.pdf(X, mu[k, :], Si[:, :, k]) |
242
|
|
|
|
243
|
|
|
except al.LinAlgError as err: |
244
|
|
|
print('Covariance matrix is singular. Add regularization.') |
245
|
|
|
raise err |
246
|
|
|
|
247
|
|
|
else: |
248
|
|
|
raise ValueError('Loss unknown.') |
249
|
|
|
|
250
|
|
|
# Negative log-likelihood |
251
|
1 |
|
L[:, k] = -np.log(pi[0, k]) - np.log(probs) |
252
|
|
|
|
253
|
1 |
|
return L |
254
|
|
|
|
255
|
1 |
|
def discriminant_parameters(self, X, Y): |
256
|
|
|
""" |
257
|
|
|
Estimate parameters of Gaussian distribution for discriminant analysis. |
258
|
|
|
|
259
|
|
|
INPUT (1) array 'X': data array (N samples by D features) |
260
|
|
|
(2) array 'Y': label array (N samples by K classes) |
261
|
|
|
OUTPUT (1) array 'pi': class proportions (1 by K classes) |
262
|
|
|
(2) array 'mu': class means (K classes by D features) |
263
|
|
|
(3) array 'Si': class covariances (D features D features by |
264
|
|
|
K classes) |
265
|
|
|
""" |
266
|
|
|
# Check labels |
267
|
1 |
|
K = Y.shape[1] |
268
|
1 |
|
assert K > 1 |
269
|
|
|
|
270
|
|
|
# Data shape |
271
|
1 |
|
N, D = X.shape |
272
|
|
|
|
273
|
|
|
# Preallocate parameter arrays |
274
|
1 |
|
pi = np.zeros((1, K)) |
275
|
1 |
|
mu = np.zeros((K, D)) |
276
|
1 |
|
Si = np.zeros((D, D, K)) |
277
|
|
|
|
278
|
|
|
# For each class |
279
|
1 |
|
for k in range(K): |
280
|
|
|
|
281
|
|
|
# Number of samples for current class |
282
|
1 |
|
Nk = np.sum(Y[:, k], axis=0) |
283
|
|
|
|
284
|
|
|
# Check for no samples assigned to certain class |
285
|
1 |
|
if Nk == 0: |
286
|
|
|
|
287
|
|
|
# Proportion of samples for current class |
288
|
|
|
pi[0, k] = 0 |
289
|
|
|
|
290
|
|
|
# Mean of current class |
291
|
|
|
mu[k, :] = np.zeros((1, D)) |
292
|
|
|
|
293
|
|
|
# Covariance of current class |
294
|
|
|
Si[:, :, k] = np.eye(D, D) |
295
|
|
|
|
296
|
|
|
else: |
297
|
|
|
|
298
|
|
|
# Proportion of samples for current class |
299
|
1 |
|
pi[0, k] = Nk / N |
300
|
|
|
|
301
|
|
|
# Mean of current class |
302
|
1 |
|
mu[k, :] = np.dot(Y[:, k].T, X) / Nk |
303
|
|
|
|
304
|
|
|
# Subtract mean from data |
305
|
1 |
|
X_ = X - mu[k, :] |
306
|
|
|
|
307
|
|
|
# Diagonalize current label vector |
308
|
1 |
|
dYk = np.diag(Y[:, k]) |
309
|
|
|
|
310
|
|
|
# Covariance of current class |
311
|
1 |
|
Si[:, :, k] = np.dot(np.dot(X_.T, dYk), X_) / Nk |
312
|
|
|
|
313
|
|
|
# Regularization |
314
|
1 |
|
Si[:, :, k] = regularize_matrix(Si[:, :, k], a=self.l2) |
315
|
|
|
|
316
|
|
|
# Check for linear or quadratic discriminant analysis |
317
|
1 |
|
if self.loss == 'lda': |
318
|
|
|
|
319
|
|
|
# In LDA, the class-covariance matrices are combined |
320
|
1 |
|
Si = self.combine_class_covariances(Si, pi) |
321
|
|
|
|
322
|
|
|
# Check for numerical problems |
323
|
1 |
|
if np.any(np.isnan(mu)) or np.any(np.isnan(Si)): |
324
|
|
|
raise RuntimeError('Parameter estimate is NaN.') |
325
|
|
|
|
326
|
1 |
|
return pi, mu, Si |
327
|
|
|
|
328
|
1 |
|
def combine_class_covariances(self, Si, pi): |
329
|
|
|
""" |
330
|
|
|
Linear combination of class covariance matrices. |
331
|
|
|
|
332
|
|
|
INPUT (1) array 'Si': Covariance matrix (D features by D features by |
333
|
|
|
K classes) |
334
|
|
|
(2) array 'pi': class proportions (1 by K classes) |
335
|
|
|
OUTPUT (1) array 'Si': Combined covariance matrix (D by D) |
336
|
|
|
""" |
337
|
|
|
# Number of classes |
338
|
1 |
|
K = Si.shape[2] |
339
|
|
|
|
340
|
|
|
# Check if w is size K |
341
|
1 |
|
assert pi.shape[1] == K |
342
|
|
|
|
343
|
|
|
# For each class |
344
|
1 |
|
for k in range(K): |
345
|
|
|
|
346
|
|
|
# Weight each class-covariance |
347
|
1 |
|
Si[:, :, k] = Si[:, :, k] * pi[0, k] |
348
|
|
|
|
349
|
|
|
# Sum over weighted class-covariances |
350
|
1 |
|
return np.sum(Si, axis=2) |
351
|
|
|
|
352
|
1 |
|
def tcpr_da(self, X, y, Z): |
353
|
|
|
""" |
354
|
|
|
Target Contrastive Pessimistic Risk - discriminant analysis. |
355
|
|
|
|
356
|
|
|
INPUT (1) array 'X': source data (N samples by D features) |
357
|
|
|
(2) array 'y': source labels (N samples by 1) |
358
|
|
|
(3) array 'Z': target data (M samples by D features) |
359
|
|
|
OUTPUT (1) array 'theta': classifier parameters (D features by K |
360
|
|
|
classes) |
361
|
|
|
""" |
362
|
|
|
# Data shapes |
363
|
1 |
|
N, DX = X.shape |
364
|
1 |
|
M, DZ = Z.shape |
365
|
|
|
|
366
|
|
|
# Assert equivalent dimensionalities |
367
|
1 |
|
if not DX == DZ: |
368
|
|
|
raise ValueError('Dimensionalities of X and Z should be equal.') |
369
|
|
|
|
370
|
|
|
# Augment data with bias if necessary |
371
|
1 |
|
X, DX = self.remove_intercept(X) |
372
|
1 |
|
Z, DZ = self.remove_intercept(Z) |
373
|
|
|
|
374
|
|
|
# Map labels to one-hot-encoding |
375
|
1 |
|
Y, classes = one_hot(y) |
376
|
|
|
|
377
|
|
|
# Number of classes |
378
|
1 |
|
K = len(classes) |
379
|
|
|
|
380
|
|
|
# Check for at least 2 classes |
381
|
1 |
|
assert K > 1 |
382
|
|
|
|
383
|
|
|
# Estimate parameters of source model |
384
|
1 |
|
theta_ref = self.discriminant_parameters(X, Y) |
385
|
|
|
|
386
|
|
|
# Loss is negative log-likelihood under reference parameters |
387
|
1 |
|
L_ref = self.neg_log_likelihood(Z, theta_ref) |
388
|
|
|
|
389
|
|
|
# Initialize target posterior |
390
|
1 |
|
q = np.ones((M, K)) / K |
391
|
|
|
|
392
|
1 |
|
print('Starting TCP optimization') |
393
|
|
|
|
394
|
1 |
|
TCPRt = np.inf |
395
|
1 |
|
for t in range(self.max_iter): |
396
|
|
|
|
397
|
|
|
# Maximization phase |
398
|
|
|
|
399
|
|
|
# Estimate parameters using TCP risk |
400
|
1 |
|
theta_tcp = self.discriminant_parameters(Z, q) |
401
|
|
|
|
402
|
|
|
# Minimization phase |
403
|
|
|
|
404
|
|
|
# Compute loss under new parameters |
405
|
1 |
|
L_tcp = self.neg_log_likelihood(Z, theta_tcp) |
406
|
|
|
|
407
|
|
|
# Gradient is difference in losses |
408
|
1 |
|
Dq = L_tcp - L_ref |
409
|
|
|
|
410
|
|
|
# Update learning rate |
411
|
1 |
|
alpha = self.learning_rate_t(t) |
412
|
|
|
|
413
|
|
|
# Steepest descent step |
414
|
1 |
|
q -= alpha*Dq |
415
|
|
|
|
416
|
|
|
# Project back onto simplex |
417
|
1 |
|
for m in range(M): |
418
|
1 |
|
q[m, :] = self.project_simplex(q[m, :]) |
419
|
|
|
|
420
|
|
|
# Monitor progress |
421
|
|
|
|
422
|
|
|
# Check for |
423
|
|
|
|
424
|
|
|
# Risks of current parameters |
425
|
1 |
|
R_tcp = self.risk(Z, theta_tcp, q) |
426
|
1 |
|
R_ref = self.risk(Z, theta_ref, q) |
427
|
|
|
|
428
|
|
|
# Assert no numerical problems |
429
|
1 |
|
if np.isnan(R_tcp) or np.isnan(R_ref): |
430
|
|
|
raise RuntimeError('Objective is NaN.') |
431
|
|
|
|
432
|
|
|
# Current TCP risk |
433
|
1 |
|
TCPRt_ = R_tcp - R_ref |
434
|
|
|
|
435
|
|
|
# Change in risk difference for this iteration |
436
|
1 |
|
dR = al.norm(TCPRt - TCPRt_) |
437
|
|
|
|
438
|
|
|
# Check for change smaller than tolerance |
439
|
1 |
|
if (dR < self.tolerance): |
440
|
1 |
|
print('Broke at iteration '+str(t)+', TCP Risk = '+str(TCPRt_)) |
441
|
1 |
|
break |
442
|
|
|
|
443
|
|
|
# Report progress |
444
|
1 |
|
if (t % 100) == 1: |
445
|
1 |
|
print('Iteration ' + str(t) + '/' + str(self.max_iter) + |
446
|
|
|
', TCP Risk = ' + str(TCPRt_)) |
447
|
|
|
|
448
|
|
|
# Update |
449
|
1 |
|
TCPRt = TCPRt_ |
450
|
|
|
|
451
|
|
|
# Return estimated parameters |
452
|
1 |
|
return theta_tcp |
453
|
|
|
|
454
|
1 |
|
def fit(self, X, y, Z): |
455
|
|
|
""" |
456
|
|
|
Fit/train an importance-weighted classifier. |
457
|
|
|
|
458
|
|
|
INPUT (1) array 'X': source data (N samples by D features) |
459
|
|
|
(2) array 'y': source labels (N samples by 1) |
460
|
|
|
(3) array 'Z': target data (M samples by D features) |
461
|
|
|
OUTPUT None |
462
|
|
|
""" |
463
|
|
|
# Data shapes |
464
|
1 |
|
N, DX = X.shape |
465
|
1 |
|
M, DZ = Z.shape |
466
|
|
|
|
467
|
|
|
# Assert equivalent dimensionalities |
468
|
1 |
|
if not DX == DZ: |
469
|
|
|
raise ValueError('Dimensionalities of X and Z should be equal.') |
470
|
|
|
|
471
|
1 |
|
if self.loss in ['lda', 'qda']: |
472
|
|
|
|
473
|
|
|
# Discriminant analysis model for TCPR |
474
|
1 |
|
self.parameters = self.tcpr_da(X, y, Z) |
475
|
|
|
|
476
|
|
|
else: |
477
|
|
|
# Other loss functions are not implemented |
478
|
|
|
raise ValueError('Loss function unknown.') |
479
|
|
|
|
480
|
|
|
# Extract and store classes |
481
|
1 |
|
self.classes = np.unique(y) |
482
|
|
|
|
483
|
|
|
# Mark classifier as trained |
484
|
1 |
|
self.is_trained = True |
485
|
|
|
|
486
|
|
|
# Store training data dimensionality |
487
|
1 |
|
self.train_data_dim = DX |
488
|
|
|
|
489
|
1 |
|
def predict(self, Z_): |
490
|
|
|
""" |
491
|
|
|
Make predictions on new dataset. |
492
|
|
|
|
493
|
|
|
INPUT (1) array 'Z_': new data set (M samples by D features) |
494
|
|
|
OUTPUT (2) array 'preds': label predictions (M samples by 1) |
495
|
|
|
""" |
496
|
|
|
# Data shape |
497
|
1 |
|
M, D = Z_.shape |
498
|
|
|
|
499
|
|
|
# If classifier is trained, check for same dimensionality |
500
|
1 |
|
if self.is_trained: |
501
|
1 |
|
assert D == self.train_data_dim |
502
|
|
|
|
503
|
1 |
|
if self.loss in ['lda', 'qda']: |
504
|
|
|
|
505
|
|
|
# Compute probabilities under each distribution |
506
|
1 |
|
probs = self.neg_log_likelihood(Z_, self.parameters) |
507
|
|
|
|
508
|
|
|
# Take largest probability as predictions |
509
|
1 |
|
preds = np.argmax(probs, axis=1) |
510
|
|
|
|
511
|
|
|
# Map predictions back to original labels |
512
|
1 |
|
preds = self.classes[preds] |
513
|
|
|
|
514
|
|
|
# Return predictions array |
515
|
1 |
|
return preds |
516
|
|
|
|
517
|
1 |
|
def get_params(self): |
518
|
|
|
"""Return classifier parameters.""" |
519
|
|
|
# Check if classifier is trained |
520
|
|
|
if self.is_trained: |
521
|
|
|
return self.parameters |
522
|
|
|
|
523
|
|
|
else: |
524
|
|
|
# Throw soft error |
525
|
|
|
print('Classifier is not trained yet.') |
526
|
|
|
return [] |
527
|
|
|
|
528
|
1 |
|
def error_rate(self, preds, u_): |
529
|
|
|
"""Compute classification error rate.""" |
530
|
|
|
return np.mean(preds != u_, axis=0) |
531
|
|
|
|