Completed
Push — master ( 582254...17fb6a )
by Wouter
03:58
created

FeatureLevelDomainAdaptiveClassifier.predict()   B

Complexity

Conditions 3

Size

Total Lines 24

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 3.0175

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 3
c 2
b 0
f 0
dl 0
loc 24
ccs 7
cts 8
cp 0.875
crap 3.0175
rs 8.9713
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4 1
import numpy as np
5 1
import scipy.stats as st
6 1
from scipy.optimize import minimize
7 1
from scipy.sparse.linalg import eigs
8 1
from scipy.spatial.distance import cdist
9 1
import sklearn as sk
10 1
from sklearn.svm import LinearSVC
11 1
from sklearn.linear_model import LogisticRegression, LinearRegression
12 1
from sklearn.model_selection import cross_val_predict
13 1
from os.path import basename
14
15 1
from .util import is_pos_def, one_hot
16
17
18 1
class FeatureLevelDomainAdaptiveClassifier(object):
19
    """
20
    Class of feature-level domain-adaptive classifiers.
21
22
    Reference: Kouw, Krijthe, Loog & Van der Maaten (2016). Feature-level
23
    domain adaptation. JMLR.
24
25
    Methods contain training and prediction functions.
26
    """
27
28 1
    def __init__(self, l2=0.0, loss='logistic', transfer_model='blankout',
29
                 max_iter=100, tolerance=1e-5, verbose=True):
30
        """
31
        Set classifier instance parameters.
32
33
        INPUT   (1) float 'l2': l2-regularization parameter value (def:0.01)
34
                (2) str 'loss': loss function for classifier, options are
35
                    'logistic' or 'quadratic' (def: 'logistic')
36
                (3) str 'transfer_model': distribution to use for transfer
37
                    model, options are 'dropout' and 'blankout'
38
                    (def: 'blankout')
39
                (4) int 'max_iter': maximum number of iterations (def: 100)
40
                (5) float 'tolerance': convergence criterion threshold on x
41
                    (def: 1e-5)
42
                (7) boolean 'verbose': report training progress (def: True)
43
        OUTPUT  None
44
        """
45
        # Classifier choices
46 1
        self.l2 = l2
47 1
        self.loss = 'logistic'
48 1
        self.transfer_model = transfer_model
49
50
        # Optimization parameters
51 1
        self.max_iter = max_iter
52 1
        self.tolerance = tolerance
53
54
        # Whether model has been trained
55 1
        self.is_trained = False
56
57
        # Dimensionality of training data
58 1
        self.train_data_dim = 0
59
60
        # Classifier parameters
61 1
        self.theta = 0
62
63
        # Verbosity
64 1
        self.verbose = verbose
65
66 1
    def mle_transfer_dist(self, X, Z, dist='blankout'):
67
        """
68
        Maximum likelihood estimation of transfer model parameters.
69
70
        INPUT   (1) array 'X': source data set (N samples by D features)
71
                (2) array 'Z': target data set (M samples by D features)
72
                (3) str 'dist': distribution of transfer model, options are
73
                    'blankout' or 'dropout' (def: 'blankout')
74
        OUTPUT  (1) array 'iota': estimated transfer model parameters
75
                    (D features by 1)
76
        """
77
        # Data shapes
78 1
        N, DX = X.shape
79 1
        M, DZ = Z.shape
80
81
        # Assert equivalent dimensionalities
82 1
        if not DX == DZ:
83
            raise ValueError('Dimensionalities of X and Z should be equal.')
84
85
        # Blankout and dropout have same maximum likelihood estimator
86 1
        if (dist == 'blankout') or (dist == 'dropout'):
87
88
            # Rate parameters
89 1
            eta = np.mean(X > 0, axis=0)
90 1
            zeta = np.mean(Z > 0, axis=0)
91
92
            # Ratio of rate parameters
93 1
            iota = np.clip(1 - zeta / eta, 0, None)
94
95
        else:
96
            raise ValueError('Distribution unknown.')
97
98 1
        return iota
99
100 1
    def moments_transfer_model(self, X, iota, dist='blankout'):
101
        """
102
        Moments of the transfer model.
103
104
        INPUT   (1) array 'X': data set (N samples by D features)
105
                (2) array 'iota': transfer model parameters (D samples by 1)
106
                (3) str 'dist': transfer model, options are 'dropout' and
107
                    'blankout' (def: 'blankout')
108
        OUTPUT  (1) array 'E': expected value of transfer model (N samples by
109
                    D feautures)
110
                (2) array 'V': variance of transfer model (D features by D
111
                    features by N samples)
112
        """
113
        # Data shape
114 1
        N, D = X.shape
115
116 1
        if (dist == 'dropout'):
117
118
            # First moment of transfer distribution
119
            E = (1-iota) * X
120
121
            # Second moment of transfer distribution
122
            V = np.zeros((D, D, N))
123
            for i in range(N):
124
                V[:, :, i] = np.diag(iota * (1-iota)) * (X[i, :].T*X[i, :])
125
126 1
        elif (dist == 'blankout'):
127
128
            # First moment of transfer distribution
129 1
            E = X
130
131
            # Second moment of transfer distribution
132 1
            V = np.zeros((D, D, N))
133 1
            for i in range(N):
134 1
                V[:, :, i] = np.diag(iota * (1-iota)) * (X[i, :].T * X[i, :])
135
136
        else:
137
            raise ValueError('Transfer distribution not implemented')
138
139 1
        return E, V
140
141 1
    def flda_log_loss(self, theta, X, y, E, V, l2=0.0):
142
        """
143
        Compute average loss for flda-log.
144
145
        INPUT   (1) array 'theta': classifier parameters (D features by 1)
146
                (2) array 'X': source data set ()
147
                (3) array 'y': label vector (N samples by 1)
148
                (4) array 'E': expected value with respect to transfer model
149
                    (N samples by D features)
150
                (5) array 'V': variance with respect to transfer model
151
                    (D features by D features by N samples)
152
                (6) float 'l2': regularization parameter (def: 0.0)
153
        OUTPUT  (1) float 'L': loss function value
154
        """
155
        # Data shape
156 1
        N, D = X.shape
157
158
        # Assert y in {-1,+1}
159 1
        assert np.all(np.sort(np.unique(y)) == (-1, 1))
160
161
        # Precompute terms
162 1
        Xt = np.dot(X, theta)
163 1
        Et = np.dot(E, theta)
164 1
        alpha = np.exp(Xt) + np.exp(-Xt)
165 1
        beta = np.exp(Xt) - np.exp(-Xt)
166 1
        gamma = (np.exp(Xt).T * X.T).T + (np.exp(-Xt).T * X.T).T
167 1
        delta = (np.exp(Xt).T * X.T).T - (np.exp(-Xt).T * X.T).T
168
169
        # Log-partition function
170 1
        A = np.log(alpha)
171
172
        # First-order partial derivative of log-partition w.r.t. Xt
173 1
        dA = beta / alpha
174
175
        # Second-order partial derivative of log-partition w.r.t. Xt
176 1
        d2A = 1 - beta**2 / alpha**2
177
178
        # Compute pointwise loss (negative log-likelihood)
179 1
        L = np.zeros((N, 1))
180 1
        for i in range(N):
181 1
            L[i] = -y[i] * Et[i] + A[i] + dA[i] * (Et[i] - Xt[i]) + \
182
                   1./2*d2A[i]*np.dot(np.dot(theta.T, V[:, :, i]), theta)
183
184
        # Compute risk (average loss)
185 1
        R = np.mean(L, axis=0)
186
187
        # Add regularization
188 1
        return R + l2*np.sum(theta**2, axis=0)
189
190 1
    def flda_log_grad(self, theta, X, y, E, V, l2=0.0):
191
        """
192
        Compute gradient with respect to theta for flda-log.
193
194
        INPUT   (1) array 'theta': classifier parameters (D features by 1)
195
                (2) array 'X': source data set ()
196
                (3) array 'y': label vector (N samples by 1)
197
                (4) array 'E': expected value with respect to transfer model
198
                    (N samples by D features)
199
                (5) array 'V': variance with respect to transfer model
200
                    (D features by D features by N samples)
201
                (6) float 'l2': regularization parameter (def: 0.0)
202
        OUTPUT  (1) float
203
        """
204
        # Data shape
205 1
        N, D = X.shape
206
207
        # Assert y in {-1,+1}
208 1
        assert np.all(np.sort(np.unique(y)) == (-1, 1))
209
210
        # Precompute common terms
211 1
        Xt = np.dot(X, theta)
212 1
        Et = np.dot(E, theta)
213 1
        alpha = np.exp(Xt) + np.exp(-Xt)
214 1
        beta = np.exp(Xt) - np.exp(-Xt)
215 1
        gamma = (np.exp(Xt).T * X.T).T + (np.exp(-Xt).T * X.T).T
216 1
        delta = (np.exp(Xt).T * X.T).T - (np.exp(-Xt).T * X.T).T
217
218
        # Log-partition function
219 1
        A = np.log(alpha)
220
221
        # First-order partial derivative of log-partition w.r.t. Xt
222 1
        dA = beta / alpha
223
224
        # Second-order partial derivative of log-partition w.r.t. Xt
225 1
        d2A = 1 - beta**2 / alpha**2
226
227 1
        dR = 0
228 1
        for i in range(N):
229
230
            # Compute gradient terms
231 1
            t1 = -y[i]*E[i, :].T
232
233 1
            t2 = beta[i] / alpha[i] * X[i, :].T
234
235 1
            t3 = (gamma[i, :] / alpha[i] - beta[i]*delta[i, :] /
236
                  alpha[i]**2).T * (Et[i] - Xt[i])
237
238 1
            t4 = beta[i] / alpha[i] * (E[i, :] - X[i, :]).T
239
240 1
            t5 = (1 - beta[i]**2 / alpha[i]**2) * np.dot(V[:, :, i], theta)
241
242 1
            t6 = -(beta[i] * gamma[i, :] / alpha[i]**2 - beta[i]**2 *
243
                   delta[i, :] / alpha[i]**3).T * np.dot(np.dot(theta.T,
244
                                                         V[:, :, i]), theta)
245
246 1
            dR += t1 + t2 + t3 + t4 + t5 + t6
247
248
        # Add regularization
249 1
        dR += l2*2*theta
250
251 1
        return dR
252
253 1
    def fit(self, X, y, Z):
254
        """
255
        Fit/train a robust bias-aware classifier.
256
257
        INPUT   (1) array 'X': source data (N samples by D features)
258
                (2) array 'y': source labels (N samples by 1)
259
                (3) array 'Z': target data (M samples by D features)
260
        OUTPUT  None
261
        """
262
        # Data shapes
263 1
        N, DX = X.shape
264 1
        M, DZ = Z.shape
265
266
        # Assert equivalent dimensionalities
267 1
        if not DX == DZ:
268
            raise ValueError('Dimensionalities of X and Z should be equal.')
269
270
        # Map to one-not-encoding
271 1
        Y, labels = one_hot(y, one_not=True)
272
273
        # Number of classes
274 1
        K = len(labels)
275
276
        # Compute transfer distribution parameters
277 1
        iota = self.mle_transfer_dist(X, Z)
278
279
        # Compute moments of transfer distribution
280 1
        E, V = self.moments_transfer_model(X, iota)
281
282
        # Select loss function
283 1
        if (self.loss == 'logistic'):
284
285
            # Preallocate parameter array
286 1
            theta = np.random.randn(DX, K)
287
288
            # Train a classifier for each class
289 1
            for k in range(K):
290
291
                # Shorthand for loss computation
292 1
                def L(theta): return self.flda_log_loss(theta, X, Y[:, k],
293
                                                        E, V, l2=self.l2)
294
295
                # Shorthand for gradient computation
296 1
                def J(theta): return self.flda_log_grad(theta, X, Y[:, k],
297
                                                        E, V, l2=self.l2)
298
299
                # Call scipy's minimizer
300 1
                results = minimize(L, theta[:, k], jac=J, method='BFGS',
301
                                   options={'gtol': self.tolerance,
302
                                            'disp': self.verbose})
303
304
                # Store resultant classifier parameters
305 1
                theta[:, k] = results.x
306
307
        elif (self.loss == 'quadratic'):
308
309
            # Compute closed-form least-squares solution
310
            theta = np.inv(E.T*E + np.sum(V, axis=2) + l2*np.eye(D))\
311
                         * (E.T * Y)
312
313
        # Store trained classifier parameters
314 1
        self.theta = theta
315
316
        # Store classes
317 1
        self.classes = labels
318
319
        # Mark classifier as trained
320 1
        self.is_trained = True
321
322
        # Store training data dimensionality
323 1
        self.train_data_dim = DX
324
325 1
    def predict(self, Z_):
326
        """
327
        Make predictions on new dataset.
328
329
        INPUT   (1) array 'Z_': new data set (M samples by D features)
330
        OUTPUT  (1) array 'preds': label predictions (M samples by 1)
331
        """
332
        # Data shape
333 1
        M, D = Z_.shape
334
335
        # If classifier is trained, check for same dimensionality
336 1
        if self.is_trained:
337 1
            assert self.train_data_dim == D
338
        else:
339
            raise UserWarning('Classifier is not trained yet.')
340
341
        # Predict target labels
342 1
        preds = np.argmax(np.dot(Z_, self.theta), axis=1)
343
344
        # Map predictions back to labels
345 1
        preds = self.classes[preds]
346
347
        # Return predictions array
348 1
        return preds
349
350 1
    def get_params(self):
351
        """Get classifier parameters."""
352
        return self.clf.get_params()
353
354 1
    def is_trained(self):
355
        """Check whether classifier is trained."""
356
        return self.is_trained
357