Completed
Push — master ( f3d068...88fa67 )
by Wouter
03:44
created

FeatureLevelDomainAdaptiveClassifier.__init__()   A

Complexity

Conditions 1

Size

Total Lines 47

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 1

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 1
c 2
b 0
f 0
dl 0
loc 47
ccs 10
cts 10
cp 1
crap 1
rs 9.0303
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4 1
import numpy as np
5 1
import scipy.stats as st
6 1
from scipy.optimize import minimize
7 1
from scipy.sparse.linalg import eigs
8 1
from scipy.spatial.distance import cdist
9 1
import sklearn as sk
10 1
from sklearn.svm import LinearSVC
11 1
from sklearn.linear_model import LogisticRegression, LinearRegression
12 1
from sklearn.model_selection import cross_val_predict
13 1
from os.path import basename
14
15 1
from .util import is_pos_def, one_hot
16
17
18 1
class FeatureLevelDomainAdaptiveClassifier(object):
19
    """
20
    Class of feature-level domain-adaptive classifiers.
21
22
    Reference: Kouw, Krijthe, Loog & Van der Maaten (2016). Feature-level
23
    domain adaptation. JMLR.
24
25
    Methods contain training and prediction functions.
26
    """
27
28 1
    def __init__(self, l2=0.0, loss='logistic', transfer_model='blankout',
29
                 max_iter=100, tolerance=1e-5, verbose=True):
30
        """
31
        Set classifier instance parameters.
32
33
        Parameters
34
        ----------
35
        l2 : float
36
            l2-regularization parameter value (def:0.01)
37
        loss : str
38
            loss function for classifier, options are 'logistic' or 'quadratic'
39
            (def: 'logistic')
40
        transfer_model : str
41
            distribution to use for transfer model, options are 'dropout' and
42
            'blankout' (def: 'blankout')
43
        max_iter : int
44
            maximum number of iterations (def: 100)
45
        tolerance : float
46
            convergence criterion threshold on x (def: 1e-5)
47
        verbose : bool
48
            report training progress (def: True)
49
50
        Returns
51
        -------
52
        None
53
54
        """
55
        # Classifier choices
56 1
        self.l2 = l2
57 1
        self.loss = 'logistic'
58 1
        self.transfer_model = transfer_model
59
60
        # Optimization parameters
61 1
        self.max_iter = max_iter
62 1
        self.tolerance = tolerance
63
64
        # Whether model has been trained
65 1
        self.is_trained = False
66
67
        # Dimensionality of training data
68 1
        self.train_data_dim = 0
69
70
        # Classifier parameters
71 1
        self.theta = 0
72
73
        # Verbosity
74 1
        self.verbose = verbose
75
76 1
    def mle_transfer_dist(self, X, Z, dist='blankout'):
77
        """
78
        Maximum likelihood estimation of transfer model parameters.
79
80
        Parameters
81
        ----------
82
        X : array
83
            source data set (N samples by D features)
84
        Z : array
85
            target data set (M samples by D features)
86
        dist : str
87
            distribution of transfer model, options are 'blankout' or 'dropout'
88
            (def: 'blankout')
89
90
        Returns
91
        -------
92
        iota : array
93
            estimated transfer model parameters (D features by 1)
94
95
        """
96
        # Data shapes
97 1
        N, DX = X.shape
98 1
        M, DZ = Z.shape
99
100
        # Assert equivalent dimensionalities
101 1
        if not DX == DZ:
102
            raise ValueError('Dimensionalities of X and Z should be equal.')
103
104
        # Blankout and dropout have same maximum likelihood estimator
105 1
        if (dist == 'blankout') or (dist == 'dropout'):
106
107
            # Rate parameters
108 1
            eta = np.mean(X > 0, axis=0)
109 1
            zeta = np.mean(Z > 0, axis=0)
110
111
            # Ratio of rate parameters
112 1
            iota = np.clip(1 - zeta / eta, 0, None)
113
114
        else:
115
            raise ValueError('Distribution unknown.')
116
117 1
        return iota
118
119 1
    def moments_transfer_model(self, X, iota, dist='blankout'):
120
        """
121
        Moments of the transfer model.
122
123
        Parameters
124
        ----------
125
        X : array
126
            data set (N samples by D features)
127
        iota : array
128
            transfer model parameters (D samples by 1)
129
        dist : str
130
            transfer model, options are 'dropout' and 'blankout'
131
            (def: 'blankout')
132
133
        Returns
134
        -------
135
        E : array
136
            expected value of transfer model (N samples by D feautures)
137
        V : array
138
            variance of transfer model (D features by D features by N samples)
139
140
        """
141
        # Data shape
142 1
        N, D = X.shape
143
144 1
        if (dist == 'dropout'):
145
146
            # First moment of transfer distribution
147
            E = (1-iota) * X
148
149
            # Second moment of transfer distribution
150
            V = np.zeros((D, D, N))
151
            for i in range(N):
152
                V[:, :, i] = np.diag(iota * (1-iota)) * (X[i, :].T*X[i, :])
153
154 1
        elif (dist == 'blankout'):
155
156
            # First moment of transfer distribution
157 1
            E = X
158
159
            # Second moment of transfer distribution
160 1
            V = np.zeros((D, D, N))
161 1
            for i in range(N):
162 1
                V[:, :, i] = np.diag(iota * (1-iota)) * (X[i, :].T * X[i, :])
163
164
        else:
165
            raise NotImplementedError('Transfer distribution not implemented')
166
167 1
        return E, V
168
169 1
    def flda_log_loss(self, theta, X, y, E, V, l2=0.0):
170
        """
171
        Compute average loss for flda-log.
172
173
        Parameters
174
        ----------
175
        theta : array
176
            classifier parameters (D features by 1)
177
        X : array
178
            source data set (N samples by D features)
179
        y : array
180
            label vector (N samples by 1)
181
        E : array
182
            expected value with respect to transfer model (N samples by
183
            D features)
184
        V : array
185
            variance with respect to transfer model (D features by D features
186
            by N samples)
187
        l2 : float
188
            regularization parameter (def: 0.0)
189
190
        Returns
191
        -------
192
        dL : array
193
            Value of loss function.
194
195
        """
196
        # Data shape
197 1
        N, D = X.shape
198
199
        # Assert y in {-1,+1}
200 1
        if not np.all(np.sort(np.unique(y)) == (-1, 1)):
201
            raise NotImplementedError('Labels can only be {-1, +1} for now.')
202
203
        # Precompute terms
204 1
        Xt = np.dot(X, theta)
205 1
        Et = np.dot(E, theta)
206 1
        alpha = np.exp(Xt) + np.exp(-Xt)
207 1
        beta = np.exp(Xt) - np.exp(-Xt)
208 1
        gamma = (np.exp(Xt).T * X.T).T + (np.exp(-Xt).T * X.T).T
209 1
        delta = (np.exp(Xt).T * X.T).T - (np.exp(-Xt).T * X.T).T
210
211
        # Log-partition function
212 1
        A = np.log(alpha)
213
214
        # First-order partial derivative of log-partition w.r.t. Xt
215 1
        dA = beta / alpha
216
217
        # Second-order partial derivative of log-partition w.r.t. Xt
218 1
        d2A = 1 - beta**2 / alpha**2
219
220
        # Compute pointwise loss (negative log-likelihood)
221 1
        L = np.zeros((N, 1))
222 1
        for i in range(N):
223 1
            L[i] = -y[i] * Et[i] + A[i] + dA[i] * (Et[i] - Xt[i]) + \
224
                   1./2*d2A[i]*np.dot(np.dot(theta.T, V[:, :, i]), theta)
225
226
        # Compute risk (average loss)
227 1
        R = np.mean(L, axis=0)
228
229
        # Add regularization
230 1
        return R + l2*np.sum(theta**2, axis=0)
231
232 1
    def flda_log_grad(self, theta, X, y, E, V, l2=0.0):
233
        """
234
        Compute gradient with respect to theta for flda-log.
235
236
        Parameters
237
        ----------
238
        theta : array
239
            classifier parameters (D features by 1)
240
        X : array
241
            source data set (N samples by D features)
242
        y : array
243
            label vector (N samples by 1)
244
        E : array
245
            expected value with respect to transfer model (N samples by
246
            D features)
247
        V : array
248
            variance with respect to transfer model (D features by D features
249
            by N samples)
250
        l2 : float
251
            regularization parameter (def: 0.0)
252
253
        Returns
254
        -------
255
        dR : array
256
            Value of gradient.
257
258
        """
259
        # Data shape
260 1
        N, D = X.shape
261
262
        # Assert y in {-1,+1}
263 1
        if not np.all(np.sort(np.unique(y)) == (-1, 1)):
264
            raise NotImplementedError('Labels can only be {-1, +1} for now.')
265
266
        # Precompute common terms
267 1
        Xt = np.dot(X, theta)
268 1
        Et = np.dot(E, theta)
269 1
        alpha = np.exp(Xt) + np.exp(-Xt)
270 1
        beta = np.exp(Xt) - np.exp(-Xt)
271 1
        gamma = (np.exp(Xt).T * X.T).T + (np.exp(-Xt).T * X.T).T
272 1
        delta = (np.exp(Xt).T * X.T).T - (np.exp(-Xt).T * X.T).T
273
274
        # Log-partition function
275 1
        A = np.log(alpha)
276
277
        # First-order partial derivative of log-partition w.r.t. Xt
278 1
        dA = beta / alpha
279
280
        # Second-order partial derivative of log-partition w.r.t. Xt
281 1
        d2A = 1 - beta**2 / alpha**2
282
283 1
        dR = 0
284 1
        for i in range(N):
285
286
            # Compute gradient terms
287 1
            t1 = -y[i]*E[i, :].T
288
289 1
            t2 = beta[i] / alpha[i] * X[i, :].T
290
291 1
            t3 = (gamma[i, :] / alpha[i] - beta[i]*delta[i, :] /
292
                  alpha[i]**2).T * (Et[i] - Xt[i])
293
294 1
            t4 = beta[i] / alpha[i] * (E[i, :] - X[i, :]).T
295
296 1
            t5 = (1 - beta[i]**2 / alpha[i]**2) * np.dot(V[:, :, i], theta)
297
298 1
            t6 = -(beta[i] * gamma[i, :] / alpha[i]**2 - beta[i]**2 *
299
                   delta[i, :] / alpha[i]**3).T * np.dot(np.dot(theta.T,
300
                                                         V[:, :, i]), theta)
301
302 1
            dR += t1 + t2 + t3 + t4 + t5 + t6
303
304
        # Add regularization
305 1
        dR += l2*2*theta
306
307 1
        return dR
308
309 1
    def fit(self, X, y, Z):
310
        """
311
        Fit/train a robust bias-aware classifier.
312
313
        Parameters
314
        ----------
315
        X : array
316
            source data (N samples by D features)
317
        y : array
318
            source labels (N samples by 1)
319
        Z : array
320
            target data (M samples by D features)
321
322
        Returns
323
        -------
324
        None
325
326
        """
327
        # Data shapes
328 1
        N, DX = X.shape
329 1
        M, DZ = Z.shape
330
331
        # Assert equivalent dimensionalities
332 1
        if not DX == DZ:
333
            raise ValueError('Dimensionalities of X and Z should be equal.')
334
335
        # Map to one-not-encoding
336 1
        Y, labels = one_hot(y, one_not=True)
337
338
        # Number of classes
339 1
        K = len(labels)
340
341
        # Compute transfer distribution parameters
342 1
        iota = self.mle_transfer_dist(X, Z)
343
344
        # Compute moments of transfer distribution
345 1
        E, V = self.moments_transfer_model(X, iota)
346
347
        # Select loss function
348 1
        if (self.loss == 'logistic'):
349
350
            # Preallocate parameter array
351 1
            theta = np.random.randn(DX, K)
352
353
            # Train a classifier for each class
354 1
            for k in range(K):
355
356
                # Shorthand for loss computation
357 1
                def L(theta): return self.flda_log_loss(theta, X, Y[:, k],
358
                                                        E, V, l2=self.l2)
359
360
                # Shorthand for gradient computation
361 1
                def J(theta): return self.flda_log_grad(theta, X, Y[:, k],
362
                                                        E, V, l2=self.l2)
363
364
                # Call scipy's minimizer
365 1
                results = minimize(L, theta[:, k], jac=J, method='BFGS',
366
                                   options={'gtol': self.tolerance,
367
                                            'disp': self.verbose})
368
369
                # Store resultant classifier parameters
370 1
                theta[:, k] = results.x
371
372
        elif (self.loss == 'quadratic'):
373
374
            # Compute closed-form least-squares solution
375
            theta = np.inv(E.T*E + np.sum(V, axis=2) + l2*np.eye(D))\
376
                         * (E.T * Y)
377
378
        # Store trained classifier parameters
379 1
        self.theta = theta
380
381
        # Store classes
382 1
        self.classes = labels
383
384
        # Mark classifier as trained
385 1
        self.is_trained = True
386
387
        # Store training data dimensionality
388 1
        self.train_data_dim = DX
389
390 1
    def predict(self, Z_):
391
        """
392
        Make predictions on new dataset.
393
394
        Parameters
395
        ----------
396
        Z : array
397
            new data set (M samples by D features)
398
399
        Returns
400
        -------
401
        preds : array
402
            label predictions (M samples by 1)
403
404
        """
405
        # Data shape
406 1
        M, D = Z_.shape
407
408
        # If classifier is trained, check for same dimensionality
409 1
        if self.is_trained:
410 1
            if not self.train_data_dim == D:
411
                raise ValueError('''Test data is of different dimensionality
412
                                 than training data.''')
413
414
        # Predict target labels
415 1
        preds = np.argmax(np.dot(Z_, self.theta), axis=1)
416
417
        # Map predictions back to labels
418 1
        preds = self.classes[preds]
419
420
        # Return predictions array
421 1
        return preds
422
423 1
    def get_params(self):
424
        """Get classifier parameters."""
425
        return self.clf.get_params()
426
427 1
    def is_trained(self):
428
        """Check whether classifier is trained."""
429
        return self.is_trained
430