Completed
Branch master (f50597)
by Wouter
52s
created

FeatureLevelDomainAdaptiveClassifier   A

Complexity

Total Complexity 28

Size/Duplication

Total Lines 331
Duplicated Lines 0 %

Test Coverage

Coverage 0%

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 331
ccs 0
cts 106
cp 0
rs 10
wmc 28

11 Methods

Rating   Name   Duplication   Size   Complexity  
C fit() 0 67 7
A J() 0 2 1
A get_params() 0 3 1
B moments_transfer_model() 0 40 5
B __init__() 0 37 1
A is_trained() 0 3 1
A flda_log_loss() 0 48 3
A flda_log_grad() 0 62 3
A L() 0 2 1
A predict() 0 21 3
B mle_transfer_dist() 0 32 4
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4
import numpy as np
5
import scipy.stats as st
6
from scipy.optimize import minimize
7
from scipy.sparse.linalg import eigs
8
from scipy.spatial.distance import cdist
9
import sklearn as sk
10
from sklearn.svm import LinearSVC
11
from sklearn.linear_model import LogisticRegression, LinearRegression
12
from sklearn.model_selection import cross_val_predict
13
from os.path import basename
14
15
from .util import is_pos_def, one_hot
16
17
18
class FeatureLevelDomainAdaptiveClassifier(object):
19
    """
20
    Class of feature-level domain-adaptive classifiers.
21
22
    Reference: Kouw, Krijthe, Loog & Van der Maaten (2016). Feature-level
23
    domain adaptation. JMLR.
24
25
    Methods contain training and prediction functions.
26
    """
27
28
    def __init__(self, l2=0.0, loss='logistic', transfer_model='blankout',
29
                 max_iter=100, tolerance=1e-5, verbose=True):
30
        """
31
        Set classifier instance parameters.
32
33
        INPUT   (1) float 'l2': l2-regularization parameter value (def:0.01)
34
                (2) str 'loss': loss function for classifier, options are
35
                    'logistic' or 'quadratic' (def: 'logistic')
36
                (3) str 'transfer_model': distribution to use for transfer
37
                    model, options are 'dropout' and 'blankout'
38
                    (def: 'blankout')
39
                (4) int 'max_iter': maximum number of iterations (def: 100)
40
                (5) float 'tolerance': convergence criterion threshold on x
41
                    (def: 1e-5)
42
                (7) boolean 'verbose': report training progress (def: True)
43
        OUTPUT  None
44
        """
45
        # Classifier choices
46
        self.l2 = l2
47
        self.loss = 'logistic'
48
        self.transfer_model = transfer_model
49
50
        # Optimization parameters
51
        self.max_iter = max_iter
52
        self.tolerance = tolerance
53
54
        # Whether model has been trained
55
        self.is_trained = False
56
57
        # Dimensionality of training data
58
        self.train_data_dim = 0
59
60
        # Classifier parameters
61
        self.theta = 0
62
63
        # Verbosity
64
        self.verbose = verbose
65
66
    def mle_transfer_dist(self, X, Z, dist='blankout'):
67
        """
68
        Maximum likelihood estimation of transfer model parameters.
69
70
        INPUT   (1) array 'X': source data set (N samples by D features)
71
                (2) array 'Z': target data set (M samples by D features)
72
                (3) str 'dist': distribution of transfer model, options are
73
                    'blankout' or 'dropout' (def: 'blankout')
74
        OUTPUT  (1) array 'iota': estimated transfer model parameters
75
                    (D features by 1)
76
        """
77
        # Data shapes
78
        N, DX = X.shape
79
        M, DZ = Z.shape
80
81
        # Assert equivalent dimensionalities
82
        assert DX == DZ
83
84
        # Blankout and dropout have same maximum likelihood estimator
85
        if (dist == 'blankout') or (dist == 'dropout'):
86
87
            # Rate parameters
88
            eta = np.mean(X > 0, axis=0)
89
            zeta = np.mean(Z > 0, axis=0)
90
91
            # Ratio of rate parameters
92
            iota = np.clip(1 - zeta / eta, 0, None)
93
94
        else:
95
            raise ValueError('Distribution unknown.')
96
97
        return iota
98
99
    def moments_transfer_model(self, X, iota, dist='blankout'):
100
        """
101
        Moments of the transfer model.
102
103
        INPUT   (1) array 'X': data set (N samples by D features)
104
                (2) array 'iota': transfer model parameters (D samples by 1)
105
                (3) str 'dist': transfer model, options are 'dropout' and
106
                    'blankout' (def: 'blankout')
107
        OUTPUT  (1) array 'E': expected value of transfer model (N samples by
108
                    D feautures)
109
                (2) array 'V': variance of transfer model (D features by D
110
                    features by N samples)
111
        """
112
        # Data shape
113
        N, D = X.shape
114
115
        if (dist == 'dropout'):
116
117
            # First moment of transfer distribution
118
            E = (1-iota) * X
119
120
            # Second moment of transfer distribution
121
            V = np.zeros((D, D, N))
122
            for i in range(N):
123
                V[:, :, i] = np.diag(iota * (1-iota)) * (X[i, :].T*X[i, :])
124
125
        elif (dist == 'blankout'):
126
127
            # First moment of transfer distribution
128
            E = X
129
130
            # Second moment of transfer distribution
131
            V = np.zeros((D, D, N))
132
            for i in range(N):
133
                V[:, :, i] = np.diag(iota * (1-iota)) * (X[i, :].T * X[i, :])
134
135
        else:
136
            raise ValueError('Transfer distribution not implemented')
137
138
        return E, V
139
140
    def flda_log_loss(self, theta, X, y, E, V, l2=0.0):
141
        """
142
        Compute average loss for flda-log.
143
144
        INPUT   (1) array 'theta': classifier parameters (D features by 1)
145
                (2) array 'X': source data set ()
146
                (3) array 'y': label vector (N samples by 1)
147
                (4) array 'E': expected value with respect to transfer model
148
                    (N samples by D features)
149
                (5) array 'V': variance with respect to transfer model
150
                    (D features by D features by N samples)
151
                (6) float 'l2': regularization parameter (def: 0.0)
152
        OUTPUT  (1) float 'L': loss function value
153
        """
154
        # Data shape
155
        N, D = X.shape
156
157
        # Assert y in {-1,+1}
158
        assert np.all(np.sort(np.unique(y)) == (-1, 1))
159
160
        # Precompute terms
161
        Xt = np.dot(X, theta)
162
        Et = np.dot(E, theta)
163
        alpha = np.exp(Xt) + np.exp(-Xt)
164
        beta = np.exp(Xt) - np.exp(-Xt)
165
        gamma = (np.exp(Xt).T * X.T).T + (np.exp(-Xt).T * X.T).T
166
        delta = (np.exp(Xt).T * X.T).T - (np.exp(-Xt).T * X.T).T
167
168
        # Log-partition function
169
        A = np.log(alpha)
170
171
        # First-order partial derivative of log-partition w.r.t. Xt
172
        dA = beta / alpha
173
174
        # Second-order partial derivative of log-partition w.r.t. Xt
175
        d2A = 1 - beta**2 / alpha**2
176
177
        # Compute pointwise loss (negative log-likelihood)
178
        L = np.zeros((N, 1))
179
        for i in range(N):
180
            L[i] = -y[i] * Et[i] + A[i] + dA[i] * (Et[i] - Xt[i]) + \
181
                   1./2*d2A[i]*np.dot(np.dot(theta.T, V[:, :, i]), theta)
182
183
        # Compute risk (average loss)
184
        R = np.mean(L, axis=0)
185
186
        # Add regularization
187
        return R + l2*np.sum(theta**2, axis=0)
188
189
    def flda_log_grad(self, theta, X, y, E, V, l2=0.0):
190
        """
191
        Compute gradient with respect to theta for flda-log.
192
193
        INPUT   (1) array 'theta': classifier parameters (D features by 1)
194
                (2) array 'X': source data set ()
195
                (3) array 'y': label vector (N samples by 1)
196
                (4) array 'E': expected value with respect to transfer model
197
                    (N samples by D features)
198
                (5) array 'V': variance with respect to transfer model
199
                    (D features by D features by N samples)
200
                (6) float 'l2': regularization parameter (def: 0.0)
201
        OUTPUT  (1) float
202
        """
203
        # Data shape
204
        N, D = X.shape
205
206
        # Assert y in {-1,+1}
207
        assert np.all(np.sort(np.unique(y)) == (-1, 1))
208
209
        # Precompute common terms
210
        Xt = np.dot(X, theta)
211
        Et = np.dot(E, theta)
212
        alpha = np.exp(Xt) + np.exp(-Xt)
213
        beta = np.exp(Xt) - np.exp(-Xt)
214
        gamma = (np.exp(Xt).T * X.T).T + (np.exp(-Xt).T * X.T).T
215
        delta = (np.exp(Xt).T * X.T).T - (np.exp(-Xt).T * X.T).T
216
217
        # Log-partition function
218
        A = np.log(alpha)
219
220
        # First-order partial derivative of log-partition w.r.t. Xt
221
        dA = beta / alpha
222
223
        # Second-order partial derivative of log-partition w.r.t. Xt
224
        d2A = 1 - beta**2 / alpha**2
225
226
        dR = 0
227
        for i in range(N):
228
229
            # Compute gradient terms
230
            t1 = -y[i]*E[i, :].T
231
232
            t2 = beta[i] / alpha[i] * X[i, :].T
233
234
            t3 = (gamma[i, :] / alpha[i] - beta[i]*delta[i, :] /
235
                  alpha[i]**2).T * (Et[i] - Xt[i])
236
237
            t4 = beta[i] / alpha[i] * (E[i, :] - X[i, :]).T
238
239
            t5 = (1 - beta[i]**2 / alpha[i]**2) * np.dot(V[:, :, i], theta)
240
241
            t6 = -(beta[i] * gamma[i, :] / alpha[i]**2 - beta[i]**2 *
242
                   delta[i, :] / alpha[i]**3).T * np.dot(np.dot(theta.T,
243
                                                         V[:, :, i]), theta)
244
245
            dR += t1 + t2 + t3 + t4 + t5 + t6
246
247
        # Add regularization
248
        dR += l2*2*theta
249
250
        return dR
251
252
    def fit(self, X, y, Z):
253
        """
254
        Fit/train a robust bias-aware classifier.
255
256
        INPUT   (1) array 'X': source data (N samples by D features)
257
                (2) array 'y': source labels (N samples by 1)
258
                (3) array 'Z': target data (M samples by D features)
259
        OUTPUT  None
260
        """
261
        # Data shapes
262
        N, DX = X.shape
263
        M, DZ = Z.shape
264
265
        # Assert equivalent dimensionalities
266
        assert DX == DZ
267
268
        # Number of classes
269
        K = len(np.unique(y))
270
271
        # Map to one-not-encoding
272
        Y = one_hot(y, one_not=True)
273
274
        # Compute transfer distribution parameters
275
        iota = self.mle_transfer_dist(X, Z)
276
277
        # Compute moments of transfer distribution
278
        E, V = self.moments_transfer_model(X, iota)
279
280
        # Select loss function
281
        if (self.loss == 'logistic'):
282
283
            # Preallocate parameter array
284
            theta = np.random.randn(DX, K)
285
286
            # Train a classifier for each class
287
            for k in range(K):
288
289
                # Shorthand for loss computation
290
                def L(theta): return self.flda_log_loss(theta, X, Y[:, k],
291
                                                        E, V, l2=self.l2)
292
293
                # Shorthand for gradient computation
294
                def J(theta): return self.flda_log_grad(theta, X, Y[:, k],
295
                                                        E, V, l2=self.l2)
296
297
                # Call scipy's minimizer
298
                results = minimize(L, theta[:, k], jac=J, method='BFGS',
299
                                   options={'gtol': self.tolerance,
300
                                            'disp': self.verbose})
301
302
                # Store resultant classifier parameters
303
                theta[:, k] = results.x
304
305
        elif (self.loss == 'quadratic'):
306
307
            # Compute closed-form least-squares solution
308
            theta = np.inv(E.T*E + np.sum(V, axis=2) + l2*np.eye(D))\
309
                         * (E.T * Y)
310
311
        # Store trained classifier parameters
312
        self.theta = theta
313
314
        # Mark classifier as trained
315
        self.is_trained = True
316
317
        # Store training data dimensionality
318
        self.train_data_dim = DX
319
320
    def predict(self, Z_):
321
        """
322
        Make predictions on new dataset.
323
324
        INPUT   (1) array 'Z_': new data set (M samples by D features)
325
        OUTPUT  (1) array 'preds': label predictions (M samples by 1)
326
        """
327
        # Data shape
328
        M, D = Z_.shape
329
330
        # If classifier is trained, check for same dimensionality
331
        if self.is_trained:
332
            assert self.train_data_dim == D
333
        else:
334
            raise UserWarning('Classifier is not trained yet.')
335
336
        # Predict target labels
337
        preds = np.argmax(np.dot(Z_, self.theta), axis=1)
338
339
        # Return predictions array
340
        return preds
341
342
    def get_params(self):
343
        """Get classifier parameters."""
344
        return self.clf.get_params()
345
346
    def is_trained(self):
347
        """Check whether classifier is trained."""
348
        return self.is_trained
349