Completed
Push — master ( f3d068...88fa67 )
by Wouter
03:44
created

flda_log_grad()   B

Complexity

Conditions 3

Size

Total Lines 76

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 23
CRAP Score 3.0006

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 3
c 2
b 0
f 0
dl 0
loc 76
ccs 23
cts 24
cp 0.9583
crap 3.0006
rs 8.9667

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4 1
import numpy as np
5 1
import scipy.stats as st
6 1
from scipy.optimize import minimize
7 1
from scipy.sparse.linalg import eigs
8 1
from scipy.spatial.distance import cdist
9 1
import sklearn as sk
10 1
from sklearn.svm import LinearSVC
11 1
from sklearn.linear_model import LogisticRegression, LinearRegression
12 1
from sklearn.model_selection import cross_val_predict
13 1
from os.path import basename
14
15 1
from .util import is_pos_def, one_hot
16
17
18 1
class FeatureLevelDomainAdaptiveClassifier(object):
19
    """
20
    Class of feature-level domain-adaptive classifiers.
21
22
    Reference: Kouw, Krijthe, Loog & Van der Maaten (2016). Feature-level
23
    domain adaptation. JMLR.
24
25
    Methods contain training and prediction functions.
26
    """
27
28 1
    def __init__(self, l2=0.0, loss='logistic', transfer_model='blankout',
29
                 max_iter=100, tolerance=1e-5, verbose=True):
30
        """
31
        Set classifier instance parameters.
32
33
        Parameters
34
        ----------
35
        l2 : float
36
            l2-regularization parameter value (def:0.01)
37
        loss : str
38
            loss function for classifier, options are 'logistic' or 'quadratic'
39
            (def: 'logistic')
40
        transfer_model : str
41
            distribution to use for transfer model, options are 'dropout' and
42
            'blankout' (def: 'blankout')
43
        max_iter : int
44
            maximum number of iterations (def: 100)
45
        tolerance : float
46
            convergence criterion threshold on x (def: 1e-5)
47
        verbose : bool
48
            report training progress (def: True)
49
50
        Returns
51
        -------
52
        None
53
54
        """
55
        # Classifier choices
56 1
        self.l2 = l2
57 1
        self.loss = 'logistic'
58 1
        self.transfer_model = transfer_model
59
60
        # Optimization parameters
61 1
        self.max_iter = max_iter
62 1
        self.tolerance = tolerance
63
64
        # Whether model has been trained
65 1
        self.is_trained = False
66
67
        # Dimensionality of training data
68 1
        self.train_data_dim = 0
69
70
        # Classifier parameters
71 1
        self.theta = 0
72
73
        # Verbosity
74 1
        self.verbose = verbose
75
76 1
    def mle_transfer_dist(self, X, Z, dist='blankout'):
77
        """
78
        Maximum likelihood estimation of transfer model parameters.
79
80
        Parameters
81
        ----------
82
        X : array
83
            source data set (N samples by D features)
84
        Z : array
85
            target data set (M samples by D features)
86
        dist : str
87
            distribution of transfer model, options are 'blankout' or 'dropout'
88
            (def: 'blankout')
89
90
        Returns
91
        -------
92
        iota : array
93
            estimated transfer model parameters (D features by 1)
94
95
        """
96
        # Data shapes
97 1
        N, DX = X.shape
98 1
        M, DZ = Z.shape
99
100
        # Assert equivalent dimensionalities
101 1
        if not DX == DZ:
102
            raise ValueError('Dimensionalities of X and Z should be equal.')
103
104
        # Blankout and dropout have same maximum likelihood estimator
105 1
        if (dist == 'blankout') or (dist == 'dropout'):
106
107
            # Rate parameters
108 1
            eta = np.mean(X > 0, axis=0)
109 1
            zeta = np.mean(Z > 0, axis=0)
110
111
            # Ratio of rate parameters
112 1
            iota = np.clip(1 - zeta / eta, 0, None)
113
114
        else:
115
            raise ValueError('Distribution unknown.')
116
117 1
        return iota
118
119 1
    def moments_transfer_model(self, X, iota, dist='blankout'):
120
        """
121
        Moments of the transfer model.
122
123
        Parameters
124
        ----------
125
        X : array
126
            data set (N samples by D features)
127
        iota : array
128
            transfer model parameters (D samples by 1)
129
        dist : str
130
            transfer model, options are 'dropout' and 'blankout'
131
            (def: 'blankout')
132
133
        Returns
134
        -------
135
        E : array
136
            expected value of transfer model (N samples by D feautures)
137
        V : array
138
            variance of transfer model (D features by D features by N samples)
139
140
        """
141
        # Data shape
142 1
        N, D = X.shape
143
144 1
        if (dist == 'dropout'):
145
146
            # First moment of transfer distribution
147
            E = (1-iota) * X
148
149
            # Second moment of transfer distribution
150
            V = np.zeros((D, D, N))
151
            for i in range(N):
152
                V[:, :, i] = np.diag(iota * (1-iota)) * (X[i, :].T*X[i, :])
153
154 1
        elif (dist == 'blankout'):
155
156
            # First moment of transfer distribution
157 1
            E = X
158
159
            # Second moment of transfer distribution
160 1
            V = np.zeros((D, D, N))
161 1
            for i in range(N):
162 1
                V[:, :, i] = np.diag(iota * (1-iota)) * (X[i, :].T * X[i, :])
163
164
        else:
165
            raise NotImplementedError('Transfer distribution not implemented')
166
167 1
        return E, V
168
169 1
    def flda_log_loss(self, theta, X, y, E, V, l2=0.0):
170
        """
171
        Compute average loss for flda-log.
172
173
        Parameters
174
        ----------
175
        theta : array
176
            classifier parameters (D features by 1)
177
        X : array
178
            source data set (N samples by D features)
179
        y : array
180
            label vector (N samples by 1)
181
        E : array
182
            expected value with respect to transfer model (N samples by
183
            D features)
184
        V : array
185
            variance with respect to transfer model (D features by D features
186
            by N samples)
187
        l2 : float
188
            regularization parameter (def: 0.0)
189
190
        Returns
191
        -------
192
        dL : array
193
            Value of loss function.
194
195
        """
196
        # Data shape
197 1
        N, D = X.shape
198
199
        # Assert y in {-1,+1}
200 1
        if not np.all(np.sort(np.unique(y)) == (-1, 1)):
201
            raise NotImplementedError('Labels can only be {-1, +1} for now.')
202
203
        # Precompute terms
204 1
        Xt = np.dot(X, theta)
205 1
        Et = np.dot(E, theta)
206 1
        alpha = np.exp(Xt) + np.exp(-Xt)
207 1
        beta = np.exp(Xt) - np.exp(-Xt)
208 1
        gamma = (np.exp(Xt).T * X.T).T + (np.exp(-Xt).T * X.T).T
209 1
        delta = (np.exp(Xt).T * X.T).T - (np.exp(-Xt).T * X.T).T
210
211
        # Log-partition function
212 1
        A = np.log(alpha)
213
214
        # First-order partial derivative of log-partition w.r.t. Xt
215 1
        dA = beta / alpha
216
217
        # Second-order partial derivative of log-partition w.r.t. Xt
218 1
        d2A = 1 - beta**2 / alpha**2
219
220
        # Compute pointwise loss (negative log-likelihood)
221 1
        L = np.zeros((N, 1))
222 1
        for i in range(N):
223 1
            L[i] = -y[i] * Et[i] + A[i] + dA[i] * (Et[i] - Xt[i]) + \
224
                   1./2*d2A[i]*np.dot(np.dot(theta.T, V[:, :, i]), theta)
225
226
        # Compute risk (average loss)
227 1
        R = np.mean(L, axis=0)
228
229
        # Add regularization
230 1
        return R + l2*np.sum(theta**2, axis=0)
231
232 1
    def flda_log_grad(self, theta, X, y, E, V, l2=0.0):
233
        """
234
        Compute gradient with respect to theta for flda-log.
235
236
        Parameters
237
        ----------
238
        theta : array
239
            classifier parameters (D features by 1)
240
        X : array
241
            source data set (N samples by D features)
242
        y : array
243
            label vector (N samples by 1)
244
        E : array
245
            expected value with respect to transfer model (N samples by
246
            D features)
247
        V : array
248
            variance with respect to transfer model (D features by D features
249
            by N samples)
250
        l2 : float
251
            regularization parameter (def: 0.0)
252
253
        Returns
254
        -------
255
        dR : array
256
            Value of gradient.
257
258
        """
259
        # Data shape
260 1
        N, D = X.shape
261
262
        # Assert y in {-1,+1}
263 1
        if not np.all(np.sort(np.unique(y)) == (-1, 1)):
264
            raise NotImplementedError('Labels can only be {-1, +1} for now.')
265
266
        # Precompute common terms
267 1
        Xt = np.dot(X, theta)
268 1
        Et = np.dot(E, theta)
269 1
        alpha = np.exp(Xt) + np.exp(-Xt)
270 1
        beta = np.exp(Xt) - np.exp(-Xt)
271 1
        gamma = (np.exp(Xt).T * X.T).T + (np.exp(-Xt).T * X.T).T
272 1
        delta = (np.exp(Xt).T * X.T).T - (np.exp(-Xt).T * X.T).T
273
274
        # Log-partition function
275 1
        A = np.log(alpha)
276
277
        # First-order partial derivative of log-partition w.r.t. Xt
278 1
        dA = beta / alpha
279
280
        # Second-order partial derivative of log-partition w.r.t. Xt
281 1
        d2A = 1 - beta**2 / alpha**2
282
283 1
        dR = 0
284 1
        for i in range(N):
285
286
            # Compute gradient terms
287 1
            t1 = -y[i]*E[i, :].T
288
289 1
            t2 = beta[i] / alpha[i] * X[i, :].T
290
291 1
            t3 = (gamma[i, :] / alpha[i] - beta[i]*delta[i, :] /
292
                  alpha[i]**2).T * (Et[i] - Xt[i])
293
294 1
            t4 = beta[i] / alpha[i] * (E[i, :] - X[i, :]).T
295
296 1
            t5 = (1 - beta[i]**2 / alpha[i]**2) * np.dot(V[:, :, i], theta)
297
298 1
            t6 = -(beta[i] * gamma[i, :] / alpha[i]**2 - beta[i]**2 *
299
                   delta[i, :] / alpha[i]**3).T * np.dot(np.dot(theta.T,
300
                                                         V[:, :, i]), theta)
301
302 1
            dR += t1 + t2 + t3 + t4 + t5 + t6
303
304
        # Add regularization
305 1
        dR += l2*2*theta
306
307 1
        return dR
308
309 1
    def fit(self, X, y, Z):
310
        """
311
        Fit/train a robust bias-aware classifier.
312
313
        Parameters
314
        ----------
315
        X : array
316
            source data (N samples by D features)
317
        y : array
318
            source labels (N samples by 1)
319
        Z : array
320
            target data (M samples by D features)
321
322
        Returns
323
        -------
324
        None
325
326
        """
327
        # Data shapes
328 1
        N, DX = X.shape
329 1
        M, DZ = Z.shape
330
331
        # Assert equivalent dimensionalities
332 1
        if not DX == DZ:
333
            raise ValueError('Dimensionalities of X and Z should be equal.')
334
335
        # Map to one-not-encoding
336 1
        Y, labels = one_hot(y, one_not=True)
337
338
        # Number of classes
339 1
        K = len(labels)
340
341
        # Compute transfer distribution parameters
342 1
        iota = self.mle_transfer_dist(X, Z)
343
344
        # Compute moments of transfer distribution
345 1
        E, V = self.moments_transfer_model(X, iota)
346
347
        # Select loss function
348 1
        if (self.loss == 'logistic'):
349
350
            # Preallocate parameter array
351 1
            theta = np.random.randn(DX, K)
352
353
            # Train a classifier for each class
354 1
            for k in range(K):
355
356
                # Shorthand for loss computation
357 1
                def L(theta): return self.flda_log_loss(theta, X, Y[:, k],
358
                                                        E, V, l2=self.l2)
359
360
                # Shorthand for gradient computation
361 1
                def J(theta): return self.flda_log_grad(theta, X, Y[:, k],
362
                                                        E, V, l2=self.l2)
363
364
                # Call scipy's minimizer
365 1
                results = minimize(L, theta[:, k], jac=J, method='BFGS',
366
                                   options={'gtol': self.tolerance,
367
                                            'disp': self.verbose})
368
369
                # Store resultant classifier parameters
370 1
                theta[:, k] = results.x
371
372
        elif (self.loss == 'quadratic'):
373
374
            # Compute closed-form least-squares solution
375
            theta = np.inv(E.T*E + np.sum(V, axis=2) + l2*np.eye(D))\
376
                         * (E.T * Y)
377
378
        # Store trained classifier parameters
379 1
        self.theta = theta
380
381
        # Store classes
382 1
        self.classes = labels
383
384
        # Mark classifier as trained
385 1
        self.is_trained = True
386
387
        # Store training data dimensionality
388 1
        self.train_data_dim = DX
389
390 1
    def predict(self, Z_):
391
        """
392
        Make predictions on new dataset.
393
394
        Parameters
395
        ----------
396
        Z : array
397
            new data set (M samples by D features)
398
399
        Returns
400
        -------
401
        preds : array
402
            label predictions (M samples by 1)
403
404
        """
405
        # Data shape
406 1
        M, D = Z_.shape
407
408
        # If classifier is trained, check for same dimensionality
409 1
        if self.is_trained:
410 1
            if not self.train_data_dim == D:
411
                raise ValueError('''Test data is of different dimensionality
412
                                 than training data.''')
413
414
        # Predict target labels
415 1
        preds = np.argmax(np.dot(Z_, self.theta), axis=1)
416
417
        # Map predictions back to labels
418 1
        preds = self.classes[preds]
419
420
        # Return predictions array
421 1
        return preds
422
423 1
    def get_params(self):
424
        """Get classifier parameters."""
425
        return self.clf.get_params()
426
427 1
    def is_trained(self):
428
        """Check whether classifier is trained."""
429
        return self.is_trained
430