1
|
|
|
#!/usr/bin/env python |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
|
|
|
4
|
1 |
|
import numpy as np |
5
|
1 |
|
import numpy.linalg as al |
6
|
1 |
|
from scipy.stats import multivariate_normal as mvn |
7
|
1 |
|
import sklearn as sk |
8
|
1 |
|
from sklearn.svm import LinearSVC |
9
|
1 |
|
from sklearn.linear_model import LogisticRegression, LinearRegression |
10
|
1 |
|
from sklearn.model_selection import cross_val_predict |
11
|
1 |
|
from os.path import basename |
12
|
|
|
|
13
|
1 |
|
from .util import one_hot, regularize_matrix |
14
|
|
|
|
15
|
|
|
|
16
|
1 |
|
class TargetContrastivePessimisticClassifier(object): |
17
|
|
|
""" |
18
|
|
|
Classifiers based on Target Contrastive Pessimistic Risk minimization. |
19
|
|
|
|
20
|
|
|
Methods contain models, risk functions, parameter estimation, etc. |
21
|
|
|
""" |
22
|
|
|
|
23
|
1 |
|
def __init__(self, loss='lda', l2=1.0, max_iter=500, tolerance=1e-12, |
24
|
|
|
learning_rate=1.0, rate_decay='linear', verbosity=0): |
25
|
|
|
""" |
26
|
|
|
Select a particular type of TCPR classifier. |
27
|
|
|
|
28
|
|
|
Parameters |
29
|
|
|
---------- |
30
|
|
|
loss : str |
31
|
|
|
loss function for TCP Risk, options: 'ls', 'least_squares', 'lda', |
32
|
|
|
'linear_discriminant_analysis', 'qda', |
33
|
|
|
'quadratic_discriminant_analysis' (def: 'lda') |
34
|
|
|
l2 : float |
35
|
|
|
l2-regularization parameter value (def:0.01) |
36
|
|
|
max_iter : int |
37
|
|
|
maximum number of iterations for optimization (def: 500) |
38
|
|
|
tolerance : float |
39
|
|
|
convergence criterion on the TCP parameters (def: 1e-5) |
40
|
|
|
learning_rate : float |
41
|
|
|
parameter for size of update of gradient (def: 1.0) |
42
|
|
|
rate_decay : str |
43
|
|
|
type of learning rate decay, options: 'linear', 'quadratic', |
44
|
|
|
'geometric', 'exponential' (def: 'linear') |
45
|
|
|
|
46
|
|
|
Returns |
47
|
|
|
------- |
48
|
|
|
None |
49
|
|
|
|
50
|
|
|
""" |
51
|
|
|
# Classifier options |
52
|
1 |
|
self.loss = loss |
53
|
1 |
|
self.l2 = l2 |
54
|
|
|
|
55
|
|
|
# Optimization parameters |
56
|
1 |
|
self.max_iter = max_iter |
57
|
1 |
|
self.tolerance = tolerance |
58
|
1 |
|
self.learning_rate = learning_rate |
59
|
1 |
|
self.rate_decay = rate_decay |
60
|
1 |
|
self.verbosity = verbosity |
61
|
|
|
|
62
|
1 |
|
if self.loss in ['linear discriminant analysis', 'lda']: |
63
|
|
|
|
64
|
|
|
# Set to short name |
65
|
1 |
|
self.loss = 'lda' |
66
|
|
|
|
67
|
|
|
elif self.loss in ['quadratic discriminant analysis', 'qda']: |
68
|
|
|
|
69
|
|
|
# Set to short name |
70
|
|
|
self.loss = 'qda' |
71
|
|
|
|
72
|
|
|
else: |
73
|
|
|
# Other loss functions are not implemented |
74
|
|
|
raise ValueError('Model not implemented.') |
75
|
|
|
|
76
|
|
|
# Initialize classifier and classes parameters |
77
|
1 |
|
self.parameters = [] |
78
|
1 |
|
self.classes = [] |
79
|
|
|
|
80
|
|
|
# Whether model has been trained |
81
|
1 |
|
self.is_trained = False |
82
|
|
|
|
83
|
|
|
# Dimensionality of training data |
84
|
1 |
|
self.train_data_dim = 0 |
85
|
|
|
|
86
|
1 |
|
def add_intercept(self, X): |
87
|
|
|
"""Add 1's to data as last features.""" |
88
|
|
|
# Data shape |
89
|
|
|
N, D = X.shape |
90
|
|
|
|
91
|
|
|
# Check if there's not already an intercept column |
92
|
|
|
if np.any(np.sum(X, axis=0) == N): |
93
|
|
|
|
94
|
|
|
# Report |
95
|
|
|
print('Intercept is not the last feature. Swapping..') |
96
|
|
|
|
97
|
|
|
# Find which column contains the intercept |
98
|
|
|
intercept_index = np.argwhere(np.sum(X, axis=0) == N) |
99
|
|
|
|
100
|
|
|
# Swap intercept to last |
101
|
|
|
X = X[:, np.setdiff1d(np.arange(D), intercept_index)] |
102
|
|
|
|
103
|
|
|
# Add intercept as last column |
104
|
|
|
X = np.hstack((X, np.ones((N, 1)))) |
105
|
|
|
|
106
|
|
|
# Append column of 1's to data, and increment dimensionality |
107
|
|
|
return X, D+1 |
108
|
|
|
|
109
|
1 |
|
def remove_intercept(self, X): |
110
|
|
|
"""Remove 1's from data as last features.""" |
111
|
|
|
# Data shape |
112
|
1 |
|
N, D = X.shape |
113
|
|
|
|
114
|
|
|
# Find which column contains the intercept |
115
|
1 |
|
intercept_index = np.argwhere(np.sum(X, axis=0) == N) |
116
|
|
|
|
117
|
|
|
# Swap intercept to last |
118
|
1 |
|
X = X[:, np.setdiff1d(np.arange(D), intercept_index)] |
119
|
|
|
|
120
|
1 |
|
return X, D-1 |
121
|
|
|
|
122
|
1 |
|
def project_simplex(self, v, z=1.0): |
123
|
|
|
""" |
124
|
|
|
Project vector onto simplex using sorting. |
125
|
|
|
|
126
|
|
|
Reference: "Efficient Projections onto the L1-Ball for Learning in High |
127
|
|
|
Dimensions (Duchi, Shalev-Shwartz, Singer, Chandra, 2006)." |
128
|
|
|
|
129
|
|
|
Parameters |
130
|
|
|
---------- |
131
|
|
|
v : array |
132
|
|
|
vector to be projected (n dimensions by 0) |
133
|
|
|
z : float |
134
|
|
|
constant (def: 1.0) |
135
|
|
|
|
136
|
|
|
Returns |
137
|
|
|
------- |
138
|
|
|
w : array |
139
|
|
|
projected vector (n dimensions by 0) |
140
|
|
|
|
141
|
|
|
""" |
142
|
|
|
# Number of dimensions |
143
|
1 |
|
n = v.shape[0] |
144
|
|
|
|
145
|
|
|
# Sort vector |
146
|
1 |
|
mu = np.sort(v, axis=0)[::-1] |
147
|
|
|
|
148
|
|
|
# Find rho |
149
|
1 |
|
C = np.cumsum(mu) - z |
150
|
1 |
|
j = np.arange(n) + 1 |
151
|
1 |
|
rho = j[mu - C/j > 0][-1] |
152
|
|
|
|
153
|
|
|
# Define theta |
154
|
1 |
|
theta = C[mu - C/j > 0][-1] / float(rho) |
155
|
|
|
|
156
|
|
|
# Subtract theta from original vector and cap at 0 |
157
|
1 |
|
w = np.maximum(v - theta, 0) |
158
|
|
|
|
159
|
|
|
# Return projected vector |
160
|
1 |
|
return w |
161
|
|
|
|
162
|
1 |
|
def learning_rate_t(self, t): |
163
|
|
|
""" |
164
|
|
|
Compute current learning rate after decay. |
165
|
|
|
|
166
|
|
|
Parameters |
167
|
|
|
---------- |
168
|
|
|
t : int |
169
|
|
|
current iteration |
170
|
|
|
|
171
|
|
|
Returns |
172
|
|
|
------- |
173
|
|
|
alpha : float |
174
|
|
|
current learning rate |
175
|
|
|
|
176
|
|
|
""" |
177
|
|
|
# Select rate decay |
178
|
1 |
|
if self.rate_decay == 'linear': |
179
|
|
|
|
180
|
|
|
# Linear dropoff between t=0 and t=T |
181
|
1 |
|
alpha = (self.max_iter - t)/(self.learning_rate*self.max_iter) |
182
|
|
|
|
183
|
|
|
elif self.rate_decay == 'quadratic': |
184
|
|
|
|
185
|
|
|
# Quadratic dropoff between t=0 and t=T |
186
|
|
|
alpha = ((self.max_iter - t)/(self.learning_rate*self.max_iter))**2 |
187
|
|
|
|
188
|
|
|
elif self.rate_decay == 'geometric': |
189
|
|
|
|
190
|
|
|
# Drop rate off inversely to time |
191
|
|
|
alpha = 1 / (self.learning_rate * t) |
192
|
|
|
|
193
|
|
|
elif self.rate_decay == 'exponential': |
194
|
|
|
|
195
|
|
|
# Exponential dropoff |
196
|
|
|
alpha = np.exp(-self.learning_rate * t) |
197
|
|
|
|
198
|
|
|
else: |
199
|
|
|
raise ValueError('Rate decay type unknown.') |
200
|
|
|
|
201
|
1 |
|
return alpha |
202
|
|
|
|
203
|
1 |
|
def risk(self, Z, theta, q): |
204
|
|
|
""" |
205
|
|
|
Compute target contrastive pessimistic risk. |
206
|
|
|
|
207
|
|
|
Parameters |
208
|
|
|
---------- |
209
|
|
|
Z : array |
210
|
|
|
target samples (M samples by D features) |
211
|
|
|
theta : array |
212
|
|
|
classifier parameters (D features by K classes) |
213
|
|
|
q : array |
214
|
|
|
soft labels (M samples by K classes) |
215
|
|
|
|
216
|
|
|
Returns |
217
|
|
|
------- |
218
|
|
|
float |
219
|
|
|
Value of risk function. |
220
|
|
|
|
221
|
|
|
""" |
222
|
|
|
# Number of classes |
223
|
1 |
|
K = q.shape[1] |
224
|
|
|
|
225
|
|
|
# Compute negative log-likelihood |
226
|
1 |
|
L = self.neg_log_likelihood(Z, theta) |
227
|
|
|
|
228
|
|
|
# Weight loss by soft labels |
229
|
1 |
|
for k in range(K): |
230
|
1 |
|
L[:, k] *= q[:, k] |
231
|
|
|
|
232
|
|
|
# Sum over weighted losses |
233
|
1 |
|
L = np.sum(L, axis=1) |
234
|
|
|
|
235
|
|
|
# Risk is average loss |
236
|
1 |
|
return np.mean(L, axis=0) |
237
|
|
|
|
238
|
1 |
|
def neg_log_likelihood(self, X, theta): |
239
|
|
|
""" |
240
|
|
|
Compute negative log-likelihood under Gaussian distributions. |
241
|
|
|
|
242
|
|
|
Parameters |
243
|
|
|
---------- |
244
|
|
|
X : array |
245
|
|
|
data (N samples by D features) |
246
|
|
|
theta : tuple(array, array, array) |
247
|
|
|
tuple containing class proportions 'pi', class means 'mu', |
248
|
|
|
and class-covariances 'Si' |
249
|
|
|
|
250
|
|
|
Returns |
251
|
|
|
------- |
252
|
|
|
L : array |
253
|
|
|
loss (N samples by K classes) |
254
|
|
|
|
255
|
|
|
""" |
256
|
|
|
# Unpack parameters |
257
|
1 |
|
pi, mu, Si = theta |
258
|
|
|
|
259
|
|
|
# Check if parameter sets match |
260
|
1 |
|
if not pi.shape[1] == mu.shape[0]: |
261
|
|
|
raise ValueError('Number proportions does not match number means.') |
262
|
|
|
|
263
|
1 |
|
if not mu.shape[1] == Si.shape[0] == Si.shape[1]: |
264
|
|
|
raise ValueError('''Dimensions of mean does not match dimensions of |
265
|
|
|
covariance matrix.''') |
266
|
|
|
|
267
|
|
|
# Number of classes |
268
|
1 |
|
K = pi.shape[1] |
269
|
|
|
|
270
|
|
|
# Data shape |
271
|
1 |
|
N, D = X.shape |
272
|
|
|
|
273
|
|
|
# Preallocate loss array |
274
|
1 |
|
L = np.zeros((N, K)) |
275
|
|
|
|
276
|
1 |
|
for k in range(K): |
277
|
|
|
|
278
|
|
|
# Check for linear or quadratic |
279
|
1 |
|
if self.loss == 'lda': |
280
|
|
|
|
281
|
1 |
|
try: |
282
|
|
|
# Probability under k-th Gaussian with shared covariance |
283
|
1 |
|
probs = mvn.pdf(X, mu[k, :], Si) |
284
|
|
|
|
285
|
|
|
except al.LinAlgError as err: |
286
|
|
|
print('Covariance matrix is singular. Add regularization.') |
287
|
|
|
raise err |
288
|
|
|
|
289
|
|
|
elif self.loss == 'qda': |
290
|
|
|
|
291
|
|
|
try: |
292
|
|
|
# Probability under k-th Gaussian with own covariance |
293
|
|
|
probs = mvn.pdf(X, mu[k, :], Si[:, :, k]) |
294
|
|
|
|
295
|
|
|
except al.LinAlgError as err: |
296
|
|
|
print('Covariance matrix is singular. Add regularization.') |
297
|
|
|
raise err |
298
|
|
|
|
299
|
|
|
else: |
300
|
|
|
raise ValueError('Loss unknown.') |
301
|
|
|
|
302
|
|
|
# Negative log-likelihood |
303
|
1 |
|
L[:, k] = -np.log(pi[0, k]) - np.log(probs) |
304
|
|
|
|
305
|
1 |
|
return L |
306
|
|
|
|
307
|
1 |
|
def discriminant_parameters(self, X, Y): |
308
|
|
|
""" |
309
|
|
|
Estimate parameters of Gaussian distribution for discriminant analysis. |
310
|
|
|
|
311
|
|
|
Parameters |
312
|
|
|
---------- |
313
|
|
|
X : array |
314
|
|
|
data array (N samples by D features) |
315
|
|
|
Y : array |
316
|
|
|
label array (N samples by K classes) |
317
|
|
|
|
318
|
|
|
Returns |
319
|
|
|
------- |
320
|
|
|
pi : array |
321
|
|
|
class proportions (1 by K classes) |
322
|
|
|
mu : array |
323
|
|
|
class means (K classes by D features) |
324
|
|
|
Si : array |
325
|
|
|
class covariances (D features D features by K classes) |
326
|
|
|
|
327
|
|
|
""" |
328
|
|
|
# Check labels |
329
|
1 |
|
K = Y.shape[1] |
330
|
|
|
|
331
|
|
|
# Check for sufficient labels |
332
|
1 |
|
if not K > 1: |
333
|
|
|
raise ValueError('Number of classes should be larger than 1.') |
334
|
|
|
|
335
|
|
|
# Data shape |
336
|
1 |
|
N, D = X.shape |
337
|
|
|
|
338
|
|
|
# Preallocate parameter arrays |
339
|
1 |
|
pi = np.zeros((1, K)) |
340
|
1 |
|
mu = np.zeros((K, D)) |
341
|
1 |
|
Si = np.zeros((D, D, K)) |
342
|
|
|
|
343
|
|
|
# For each class |
344
|
1 |
|
for k in range(K): |
345
|
|
|
|
346
|
|
|
# Number of samples for current class |
347
|
1 |
|
Nk = np.sum(Y[:, k], axis=0) |
348
|
|
|
|
349
|
|
|
# Check for no samples assigned to certain class |
350
|
1 |
|
if Nk == 0: |
351
|
|
|
|
352
|
|
|
# Proportion of samples for current class |
353
|
|
|
pi[0, k] = 0 |
354
|
|
|
|
355
|
|
|
# Mean of current class |
356
|
|
|
mu[k, :] = np.zeros((1, D)) |
357
|
|
|
|
358
|
|
|
# Covariance of current class |
359
|
|
|
Si[:, :, k] = np.eye(D, D) |
360
|
|
|
|
361
|
|
|
else: |
362
|
|
|
|
363
|
|
|
# Proportion of samples for current class |
364
|
1 |
|
pi[0, k] = Nk / N |
365
|
|
|
|
366
|
|
|
# Mean of current class |
367
|
1 |
|
mu[k, :] = np.dot(Y[:, k].T, X) / Nk |
368
|
|
|
|
369
|
|
|
# Subtract mean from data |
370
|
1 |
|
X_ = X - mu[k, :] |
371
|
|
|
|
372
|
|
|
# Diagonalize current label vector |
373
|
1 |
|
dYk = np.diag(Y[:, k]) |
374
|
|
|
|
375
|
|
|
# Covariance of current class |
376
|
1 |
|
Si[:, :, k] = np.dot(np.dot(X_.T, dYk), X_) / Nk |
377
|
|
|
|
378
|
|
|
# Regularization |
379
|
1 |
|
Si[:, :, k] = regularize_matrix(Si[:, :, k], a=self.l2) |
380
|
|
|
|
381
|
|
|
# Check for linear or quadratic discriminant analysis |
382
|
1 |
|
if self.loss == 'lda': |
383
|
|
|
|
384
|
|
|
# In LDA, the class-covariance matrices are combined |
385
|
1 |
|
Si = self.combine_class_covariances(Si, pi) |
386
|
|
|
|
387
|
|
|
# Check for numerical problems |
388
|
1 |
|
if np.any(np.isnan(mu)) or np.any(np.isnan(Si)): |
389
|
|
|
raise RuntimeError('Parameter estimate is NaN.') |
390
|
|
|
|
391
|
1 |
|
return pi, mu, Si |
392
|
|
|
|
393
|
1 |
|
def combine_class_covariances(self, Si, pi): |
394
|
|
|
""" |
395
|
|
|
Linear combination of class covariance matrices. |
396
|
|
|
|
397
|
|
|
Parameters |
398
|
|
|
---------- |
399
|
|
|
Si : array |
400
|
|
|
Covariance matrix (D features by D features by K classes) |
401
|
|
|
pi : array |
402
|
|
|
class proportions (1 by K classes) |
403
|
|
|
|
404
|
|
|
Returns |
405
|
|
|
------- |
406
|
|
|
Si : array |
407
|
|
|
Combined covariance matrix (D by D) |
408
|
|
|
|
409
|
|
|
""" |
410
|
|
|
# Number of classes |
411
|
1 |
|
K = Si.shape[2] |
412
|
|
|
|
413
|
|
|
# Check if w is size K |
414
|
1 |
|
if not pi.shape[1] == K: |
415
|
|
|
raise ValueError('''Number of proportions does not match with number |
416
|
|
|
classes of covariance matrix.''') |
417
|
|
|
|
418
|
|
|
# For each class |
419
|
1 |
|
for k in range(K): |
420
|
|
|
|
421
|
|
|
# Weight each class-covariance |
422
|
1 |
|
Si[:, :, k] = Si[:, :, k] * pi[0, k] |
423
|
|
|
|
424
|
|
|
# Sum over weighted class-covariances |
425
|
1 |
|
return np.sum(Si, axis=2) |
426
|
|
|
|
427
|
1 |
|
def tcpr_da(self, X, y, Z): |
428
|
|
|
""" |
429
|
|
|
Target Contrastive Pessimistic Risk - discriminant analysis. |
430
|
|
|
|
431
|
|
|
Parameters |
432
|
|
|
---------- |
433
|
|
|
X : array |
434
|
|
|
source data (N samples by D features) |
435
|
|
|
y : array |
436
|
|
|
source labels (N samples by 1) |
437
|
|
|
Z : array |
438
|
|
|
target data (M samples by D features) |
439
|
|
|
|
440
|
|
|
Returns |
441
|
|
|
------- |
442
|
|
|
theta : array |
443
|
|
|
classifier parameters (D features by K classes) |
444
|
|
|
|
445
|
|
|
""" |
446
|
|
|
# Data shapes |
447
|
1 |
|
N, DX = X.shape |
448
|
1 |
|
M, DZ = Z.shape |
449
|
|
|
|
450
|
|
|
# Assert equivalent dimensionalities |
451
|
1 |
|
if not DX == DZ: |
452
|
|
|
raise ValueError('Dimensionalities of X and Z should be equal.') |
453
|
|
|
|
454
|
|
|
# Augment data with bias if necessary |
455
|
1 |
|
X, DX = self.remove_intercept(X) |
456
|
1 |
|
Z, DZ = self.remove_intercept(Z) |
457
|
|
|
|
458
|
|
|
# Map labels to one-hot-encoding |
459
|
1 |
|
Y, classes = one_hot(y) |
460
|
|
|
|
461
|
|
|
# Number of classes |
462
|
1 |
|
K = len(classes) |
463
|
|
|
|
464
|
|
|
# Check for at least 2 classes |
465
|
1 |
|
if not K > 1: |
466
|
|
|
raise ValueError('Number of classes should be larger than 1.') |
467
|
|
|
|
468
|
|
|
# Estimate parameters of source model |
469
|
1 |
|
theta_ref = self.discriminant_parameters(X, Y) |
470
|
|
|
|
471
|
|
|
# Loss is negative log-likelihood under reference parameters |
472
|
1 |
|
L_ref = self.neg_log_likelihood(Z, theta_ref) |
473
|
|
|
|
474
|
|
|
# Initialize target posterior |
475
|
1 |
|
q = np.ones((M, K)) / K |
476
|
|
|
|
477
|
1 |
|
print('Starting TCP optimization') |
478
|
|
|
|
479
|
1 |
|
TCPRt = np.inf |
480
|
1 |
|
for t in range(self.max_iter): |
481
|
|
|
|
482
|
|
|
# Maximization phase |
483
|
|
|
|
484
|
|
|
# Estimate parameters using TCP risk |
485
|
1 |
|
theta_tcp = self.discriminant_parameters(Z, q) |
486
|
|
|
|
487
|
|
|
# Minimization phase |
488
|
|
|
|
489
|
|
|
# Compute loss under new parameters |
490
|
1 |
|
L_tcp = self.neg_log_likelihood(Z, theta_tcp) |
491
|
|
|
|
492
|
|
|
# Gradient is difference in losses |
493
|
1 |
|
Dq = L_tcp - L_ref |
494
|
|
|
|
495
|
|
|
# Update learning rate |
496
|
1 |
|
alpha = self.learning_rate_t(t) |
497
|
|
|
|
498
|
|
|
# Steepest descent step |
499
|
1 |
|
q -= alpha*Dq |
500
|
|
|
|
501
|
|
|
# Project back onto simplex |
502
|
1 |
|
for m in range(M): |
503
|
1 |
|
q[m, :] = self.project_simplex(q[m, :]) |
504
|
|
|
|
505
|
|
|
# Monitor progress |
506
|
|
|
|
507
|
|
|
# Risks of current parameters |
508
|
1 |
|
R_tcp = self.risk(Z, theta_tcp, q) |
509
|
1 |
|
R_ref = self.risk(Z, theta_ref, q) |
510
|
|
|
|
511
|
|
|
# Assert no numerical problems |
512
|
1 |
|
if np.isnan(R_tcp) or np.isnan(R_ref): |
513
|
|
|
raise RuntimeError('Objective is NaN.') |
514
|
|
|
|
515
|
|
|
# Current TCP risk |
516
|
1 |
|
TCPRt_ = R_tcp - R_ref |
517
|
|
|
|
518
|
|
|
# Change in risk difference for this iteration |
519
|
1 |
|
dR = al.norm(TCPRt - TCPRt_) |
520
|
|
|
|
521
|
|
|
# Check for change smaller than tolerance |
522
|
1 |
|
if (dR < self.tolerance): |
523
|
1 |
|
print('Broke at iteration '+str(t)+', TCP Risk = '+str(TCPRt_)) |
524
|
1 |
|
break |
525
|
|
|
|
526
|
|
|
# Report progress |
527
|
1 |
|
if (t % 100) == 1: |
528
|
1 |
|
print('Iteration ' + str(t) + '/' + str(self.max_iter) + |
529
|
|
|
', TCP Risk = ' + str(TCPRt_)) |
530
|
|
|
|
531
|
|
|
# Update |
532
|
1 |
|
TCPRt = TCPRt_ |
533
|
|
|
|
534
|
|
|
# Return estimated parameters |
535
|
1 |
|
return theta_tcp |
536
|
|
|
|
537
|
1 |
|
def fit(self, X, y, Z): |
538
|
|
|
""" |
539
|
|
|
Fit/train an importance-weighted classifier. |
540
|
|
|
|
541
|
|
|
Parameters |
542
|
|
|
---------- |
543
|
|
|
X : array |
544
|
|
|
source data (N samples by D features) |
545
|
|
|
y : array |
546
|
|
|
source labels (N samples by 1) |
547
|
|
|
Z : array |
548
|
|
|
target data (M samples by D features) |
549
|
|
|
|
550
|
|
|
Returns |
551
|
|
|
------- |
552
|
|
|
None |
553
|
|
|
|
554
|
|
|
""" |
555
|
|
|
# Data shapes |
556
|
1 |
|
N, DX = X.shape |
557
|
1 |
|
M, DZ = Z.shape |
558
|
|
|
|
559
|
|
|
# Assert equivalent dimensionalities |
560
|
1 |
|
if not DX == DZ: |
561
|
|
|
raise ValueError('Dimensionalities of X and Z should be equal.') |
562
|
|
|
|
563
|
1 |
|
if self.loss in ['lda', 'qda']: |
564
|
|
|
|
565
|
|
|
# Discriminant analysis model for TCPR |
566
|
1 |
|
self.parameters = self.tcpr_da(X, y, Z) |
567
|
|
|
|
568
|
|
|
else: |
569
|
|
|
# Other loss functions are not implemented |
570
|
|
|
raise ValueError('Loss function unknown.') |
571
|
|
|
|
572
|
|
|
# Extract and store classes |
573
|
1 |
|
self.classes = np.unique(y) |
574
|
|
|
|
575
|
|
|
# Mark classifier as trained |
576
|
1 |
|
self.is_trained = True |
577
|
|
|
|
578
|
|
|
# Store training data dimensionality |
579
|
1 |
|
self.train_data_dim = DX |
580
|
|
|
|
581
|
1 |
|
def predict(self, Z_): |
582
|
|
|
""" |
583
|
|
|
Make predictions on new dataset. |
584
|
|
|
|
585
|
|
|
Parameters |
586
|
|
|
---------- |
587
|
|
|
Z : array |
588
|
|
|
new data set (M samples by D features) |
589
|
|
|
|
590
|
|
|
Returns |
591
|
|
|
------- |
592
|
|
|
preds : array |
593
|
|
|
label predictions (M samples by 1) |
594
|
|
|
|
595
|
|
|
""" |
596
|
|
|
# Data shape |
597
|
1 |
|
M, D = Z_.shape |
598
|
|
|
|
599
|
|
|
# If classifier is trained, check for same dimensionality |
600
|
1 |
|
if self.is_trained: |
601
|
1 |
|
if not self.train_data_dim == D: |
602
|
|
|
raise ValueError('''Test data is of different dimensionality |
603
|
|
|
than training data.''') |
604
|
|
|
|
605
|
1 |
|
if self.loss in ['lda', 'qda']: |
606
|
|
|
|
607
|
|
|
# Compute probabilities under each distribution |
608
|
1 |
|
probs = self.neg_log_likelihood(Z_, self.parameters) |
609
|
|
|
|
610
|
|
|
# Take largest probability as predictions |
611
|
1 |
|
preds = np.argmax(probs, axis=1) |
612
|
|
|
|
613
|
|
|
# Map predictions back to original labels |
614
|
1 |
|
preds = self.classes[preds] |
615
|
|
|
|
616
|
|
|
# Return predictions array |
617
|
1 |
|
return preds |
618
|
|
|
|
619
|
1 |
|
def get_params(self): |
620
|
|
|
"""Return classifier parameters.""" |
621
|
|
|
# Check if classifier is trained |
622
|
|
|
if self.is_trained: |
623
|
|
|
return self.parameters |
624
|
|
|
|
625
|
|
|
else: |
626
|
|
|
# Throw soft error |
627
|
|
|
print('Classifier is not trained yet.') |
628
|
|
|
return [] |
629
|
|
|
|
630
|
1 |
|
def error_rate(self, preds, u_): |
631
|
|
|
"""Compute classification error rate.""" |
632
|
|
|
return np.mean(preds != u_, axis=0) |
633
|
|
|
|