FeatureLevelDomainAdaptiveClassifier - Code Metrics - Inspection of "Extended iw tests." - wmkouw/libTLDA - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Branch — master (f50597)

by Wouter

created 2018-06-12 14:40 UTC

FeatureLevelDomainAdaptiveClassifier A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	331
Duplicated Lines	0 %

Test Coverage

Coverage

Importance

Changes	2
Bugs	0	Features	0

Metric	Value
c	2
b	0
f	0
dl	0
loc	331
ccs	0
cts	106
cp	0
rs	10
wmc	28

11 Methods

Rating	Name	Size	Complexity
C	fit()	67	7
A	J()	2	1
A	get_params()	3	1
B	moments_transfer_model()	40	5
B	__init__()	37	1
A	is_trained()	3	1
A	flda_log_loss()	48	3
A	flda_log_grad()	62	3
A	L()	2	1
A	predict()	21	3
B	mle_transfer_dist()	32	4

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import numpy as np
import scipy.stats as st
from scipy.optimize import minimize
from scipy.sparse.linalg import eigs
from scipy.spatial.distance import cdist
import sklearn as sk
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.model_selection import cross_val_predict
from os.path import basename

from .util import is_pos_def, one_hot


class FeatureLevelDomainAdaptiveClassifier(object):
    """
    Class of feature-level domain-adaptive classifiers.

    Reference: Kouw, Krijthe, Loog & Van der Maaten (2016). Feature-level
    domain adaptation. JMLR.

    Methods contain training and prediction functions.
    """

    def __init__(self, l2=0.0, loss='logistic', transfer_model='blankout',
                 max_iter=100, tolerance=1e-5, verbose=True):
        """
        Set classifier instance parameters.

        INPUT   (1) float 'l2': l2-regularization parameter value (def:0.01)
                (2) str 'loss': loss function for classifier, options are
                    'logistic' or 'quadratic' (def: 'logistic')
                (3) str 'transfer_model': distribution to use for transfer
                    model, options are 'dropout' and 'blankout'
                    (def: 'blankout')
                (4) int 'max_iter': maximum number of iterations (def: 100)
                (5) float 'tolerance': convergence criterion threshold on x
                    (def: 1e-5)
                (7) boolean 'verbose': report training progress (def: True)
        OUTPUT  None
        """
        # Classifier choices
        self.l2 = l2
        self.loss = 'logistic'
        self.transfer_model = transfer_model

        # Optimization parameters
        self.max_iter = max_iter
        self.tolerance = tolerance

        # Whether model has been trained
        self.is_trained = False

        # Dimensionality of training data
        self.train_data_dim = 0

        # Classifier parameters
        self.theta = 0

        # Verbosity
        self.verbose = verbose

    def mle_transfer_dist(self, X, Z, dist='blankout'):
        """
        Maximum likelihood estimation of transfer model parameters.

        INPUT   (1) array 'X': source data set (N samples by D features)
                (2) array 'Z': target data set (M samples by D features)
                (3) str 'dist': distribution of transfer model, options are
                    'blankout' or 'dropout' (def: 'blankout')
        OUTPUT  (1) array 'iota': estimated transfer model parameters
                    (D features by 1)
        """
        # Data shapes
        N, DX = X.shape
        M, DZ = Z.shape

        # Assert equivalent dimensionalities
        assert DX == DZ

        # Blankout and dropout have same maximum likelihood estimator
        if (dist == 'blankout') or (dist == 'dropout'):

            # Rate parameters
            eta = np.mean(X > 0, axis=0)
            zeta = np.mean(Z > 0, axis=0)

            # Ratio of rate parameters
            iota = np.clip(1 - zeta / eta, 0, None)

        else:
            raise ValueError('Distribution unknown.')

        return iota

    def moments_transfer_model(self, X, iota, dist='blankout'):
        """
        Moments of the transfer model.

        INPUT   (1) array 'X': data set (N samples by D features)
                (2) array 'iota': transfer model parameters (D samples by 1)
                (3) str 'dist': transfer model, options are 'dropout' and
                    'blankout' (def: 'blankout')
        OUTPUT  (1) array 'E': expected value of transfer model (N samples by
                    D feautures)
                (2) array 'V': variance of transfer model (D features by D
                    features by N samples)
        """
        # Data shape
        N, D = X.shape

        if (dist == 'dropout'):

            # First moment of transfer distribution
            E = (1-iota) * X

            # Second moment of transfer distribution
            V = np.zeros((D, D, N))
            for i in range(N):
                V[:, :, i] = np.diag(iota * (1-iota)) * (X[i, :].T*X[i, :])

        elif (dist == 'blankout'):

            # First moment of transfer distribution
            E = X

            # Second moment of transfer distribution
            V = np.zeros((D, D, N))
            for i in range(N):
                V[:, :, i] = np.diag(iota * (1-iota)) * (X[i, :].T * X[i, :])

        else:
            raise ValueError('Transfer distribution not implemented')

        return E, V

    def flda_log_loss(self, theta, X, y, E, V, l2=0.0):
        """
        Compute average loss for flda-log.

        INPUT   (1) array 'theta': classifier parameters (D features by 1)
                (2) array 'X': source data set ()
                (3) array 'y': label vector (N samples by 1)
                (4) array 'E': expected value with respect to transfer model
                    (N samples by D features)
                (5) array 'V': variance with respect to transfer model
                    (D features by D features by N samples)
                (6) float 'l2': regularization parameter (def: 0.0)
        OUTPUT  (1) float 'L': loss function value
        """
        # Data shape
        N, D = X.shape

        # Assert y in {-1,+1}
        assert np.all(np.sort(np.unique(y)) == (-1, 1))

        # Precompute terms
        Xt = np.dot(X, theta)
        Et = np.dot(E, theta)
        alpha = np.exp(Xt) + np.exp(-Xt)
        beta = np.exp(Xt) - np.exp(-Xt)
        gamma = (np.exp(Xt).T * X.T).T + (np.exp(-Xt).T * X.T).T
        delta = (np.exp(Xt).T * X.T).T - (np.exp(-Xt).T * X.T).T

        # Log-partition function
        A = np.log(alpha)

        # First-order partial derivative of log-partition w.r.t. Xt
        dA = beta / alpha

        # Second-order partial derivative of log-partition w.r.t. Xt
        d2A = 1 - beta**2 / alpha**2

        # Compute pointwise loss (negative log-likelihood)
        L = np.zeros((N, 1))
        for i in range(N):
            L[i] = -y[i] * Et[i] + A[i] + dA[i] * (Et[i] - Xt[i]) + \
                   1./2*d2A[i]*np.dot(np.dot(theta.T, V[:, :, i]), theta)

        # Compute risk (average loss)
        R = np.mean(L, axis=0)

        # Add regularization
        return R + l2*np.sum(theta**2, axis=0)

    def flda_log_grad(self, theta, X, y, E, V, l2=0.0):
        """
        Compute gradient with respect to theta for flda-log.

        INPUT   (1) array 'theta': classifier parameters (D features by 1)
                (2) array 'X': source data set ()
                (3) array 'y': label vector (N samples by 1)
                (4) array 'E': expected value with respect to transfer model
                    (N samples by D features)
                (5) array 'V': variance with respect to transfer model
                    (D features by D features by N samples)
                (6) float 'l2': regularization parameter (def: 0.0)
        OUTPUT  (1) float
        """
        # Data shape
        N, D = X.shape

        # Assert y in {-1,+1}
        assert np.all(np.sort(np.unique(y)) == (-1, 1))

        # Precompute common terms
        Xt = np.dot(X, theta)
        Et = np.dot(E, theta)
        alpha = np.exp(Xt) + np.exp(-Xt)
        beta = np.exp(Xt) - np.exp(-Xt)
        gamma = (np.exp(Xt).T * X.T).T + (np.exp(-Xt).T * X.T).T
        delta = (np.exp(Xt).T * X.T).T - (np.exp(-Xt).T * X.T).T

        # Log-partition function
        A = np.log(alpha)

        # First-order partial derivative of log-partition w.r.t. Xt
        dA = beta / alpha

        # Second-order partial derivative of log-partition w.r.t. Xt
        d2A = 1 - beta**2 / alpha**2

        dR = 0
        for i in range(N):

            # Compute gradient terms
            t1 = -y[i]*E[i, :].T

            t2 = beta[i] / alpha[i] * X[i, :].T

            t3 = (gamma[i, :] / alpha[i] - beta[i]*delta[i, :] /
                  alpha[i]**2).T * (Et[i] - Xt[i])

            t4 = beta[i] / alpha[i] * (E[i, :] - X[i, :]).T

            t5 = (1 - beta[i]**2 / alpha[i]**2) * np.dot(V[:, :, i], theta)

            t6 = -(beta[i] * gamma[i, :] / alpha[i]**2 - beta[i]**2 *
                   delta[i, :] / alpha[i]**3).T * np.dot(np.dot(theta.T,
                                                         V[:, :, i]), theta)

            dR += t1 + t2 + t3 + t4 + t5 + t6

        # Add regularization
        dR += l2*2*theta

        return dR

    def fit(self, X, y, Z):
        """
        Fit/train a robust bias-aware classifier.

        INPUT   (1) array 'X': source data (N samples by D features)
                (2) array 'y': source labels (N samples by 1)
                (3) array 'Z': target data (M samples by D features)
        OUTPUT  None
        """
        # Data shapes
        N, DX = X.shape
        M, DZ = Z.shape

        # Assert equivalent dimensionalities
        assert DX == DZ

        # Number of classes
        K = len(np.unique(y))

        # Map to one-not-encoding
        Y = one_hot(y, one_not=True)

        # Compute transfer distribution parameters
        iota = self.mle_transfer_dist(X, Z)

        # Compute moments of transfer distribution
        E, V = self.moments_transfer_model(X, iota)

        # Select loss function
        if (self.loss == 'logistic'):

            # Preallocate parameter array
            theta = np.random.randn(DX, K)

            # Train a classifier for each class
            for k in range(K):

                # Shorthand for loss computation
                def L(theta): return self.flda_log_loss(theta, X, Y[:, k],
                                                        E, V, l2=self.l2)

                # Shorthand for gradient computation
                def J(theta): return self.flda_log_grad(theta, X, Y[:, k],
                                                        E, V, l2=self.l2)

                # Call scipy's minimizer
                results = minimize(L, theta[:, k], jac=J, method='BFGS',
                                   options={'gtol': self.tolerance,
                                            'disp': self.verbose})

                # Store resultant classifier parameters
                theta[:, k] = results.x

        elif (self.loss == 'quadratic'):

            # Compute closed-form least-squares solution
            theta = np.inv(E.T*E + np.sum(V, axis=2) + l2*np.eye(D))\
                         * (E.T * Y)

        # Store trained classifier parameters
        self.theta = theta

        # Mark classifier as trained
        self.is_trained = True

        # Store training data dimensionality
        self.train_data_dim = DX

    def predict(self, Z_):
        """
        Make predictions on new dataset.

        INPUT   (1) array 'Z_': new data set (M samples by D features)
        OUTPUT  (1) array 'preds': label predictions (M samples by 1)
        """
        # Data shape
        M, D = Z_.shape

        # If classifier is trained, check for same dimensionality
        if self.is_trained:
            assert self.train_data_dim == D
        else:
            raise UserWarning('Classifier is not trained yet.')

        # Predict target labels
        preds = np.argmax(np.dot(Z_, self.theta), axis=1)

        # Return predictions array
        return preds

    def get_params(self):
        """Get classifier parameters."""
        return self.clf.get_params()

    def is_trained(self):
        """Check whether classifier is trained."""
        return self.is_trained


1			#!/usr/bin/env python
2			# -- coding: utf-8 --
3
4			import numpy as np
5			import scipy.stats as st
6			from scipy.optimize import minimize
7			from scipy.sparse.linalg import eigs
8			from scipy.spatial.distance import cdist
9			import sklearn as sk
10			from sklearn.svm import LinearSVC
11			from sklearn.linear_model import LogisticRegression, LinearRegression
12			from sklearn.model_selection import cross_val_predict
13			from os.path import basename
14
15			from .util import is_pos_def, one_hot
16
17
18			class FeatureLevelDomainAdaptiveClassifier(object):
19			"""
20			Class of feature-level domain-adaptive classifiers.
21
22			Reference: Kouw, Krijthe, Loog & Van der Maaten (2016). Feature-level
23			domain adaptation. JMLR.
24
25			Methods contain training and prediction functions.
26			"""
27
28			def __init__(self, l2=0.0, loss='logistic', transfer_model='blankout',
29			max_iter=100, tolerance=1e-5, verbose=True):
30			"""
31			Set classifier instance parameters.
32
33			INPUT (1) float 'l2': l2-regularization parameter value (def:0.01)
34			(2) str 'loss': loss function for classifier, options are
35			'logistic' or 'quadratic' (def: 'logistic')
36			(3) str 'transfer_model': distribution to use for transfer
37			model, options are 'dropout' and 'blankout'
38			(def: 'blankout')
39			(4) int 'max_iter': maximum number of iterations (def: 100)
40			(5) float 'tolerance': convergence criterion threshold on x
41			(def: 1e-5)
42			(7) boolean 'verbose': report training progress (def: True)
43			OUTPUT None
44			"""
45			# Classifier choices
46			self.l2 = l2
47			self.loss = 'logistic'
48			self.transfer_model = transfer_model
49
50			# Optimization parameters
51			self.max_iter = max_iter
52			self.tolerance = tolerance
53
54			# Whether model has been trained
55			self.is_trained = False
56
57			# Dimensionality of training data
58			self.train_data_dim = 0
59
60			# Classifier parameters
61			self.theta = 0
62
63			# Verbosity
64			self.verbose = verbose
65
66			def mle_transfer_dist(self, X, Z, dist='blankout'):
67			"""
68			Maximum likelihood estimation of transfer model parameters.
69
70			INPUT (1) array 'X': source data set (N samples by D features)
71			(2) array 'Z': target data set (M samples by D features)
72			(3) str 'dist': distribution of transfer model, options are
73			'blankout' or 'dropout' (def: 'blankout')
74			OUTPUT (1) array 'iota': estimated transfer model parameters
75			(D features by 1)
76			"""
77			# Data shapes
78			N, DX = X.shape
79			M, DZ = Z.shape
80
81			# Assert equivalent dimensionalities
82			assert DX == DZ
83
84			# Blankout and dropout have same maximum likelihood estimator
85			if (dist == 'blankout') or (dist == 'dropout'):
86
87			# Rate parameters
88			eta = np.mean(X > 0, axis=0)
89			zeta = np.mean(Z > 0, axis=0)
90
91			# Ratio of rate parameters
92			iota = np.clip(1 - zeta / eta, 0, None)
93
94			else:
95			raise ValueError('Distribution unknown.')
96
97			return iota
98
99			def moments_transfer_model(self, X, iota, dist='blankout'):
100			"""
101			Moments of the transfer model.
102
103			INPUT (1) array 'X': data set (N samples by D features)
104			(2) array 'iota': transfer model parameters (D samples by 1)
105			(3) str 'dist': transfer model, options are 'dropout' and
106			'blankout' (def: 'blankout')
107			OUTPUT (1) array 'E': expected value of transfer model (N samples by
108			D feautures)
109			(2) array 'V': variance of transfer model (D features by D
110			features by N samples)
111			"""
112			# Data shape
113			N, D = X.shape
114
115			if (dist == 'dropout'):
116
117			# First moment of transfer distribution
118			E = (1-iota) * X
119
120			# Second moment of transfer distribution
121			V = np.zeros((D, D, N))
122			for i in range(N):
123			V[:, :, i] = np.diag(iota * (1-iota)) * (X[i, :].T*X[i, :])
124
125			elif (dist == 'blankout'):
126
127			# First moment of transfer distribution
128			E = X
129
130			# Second moment of transfer distribution
131			V = np.zeros((D, D, N))
132			for i in range(N):
133			V[:, :, i] = np.diag(iota * (1-iota)) * (X[i, :].T * X[i, :])
134
135			else:
136			raise ValueError('Transfer distribution not implemented')
137
138			return E, V
139
140			def flda_log_loss(self, theta, X, y, E, V, l2=0.0):
141			"""
142			Compute average loss for flda-log.
143
144			INPUT (1) array 'theta': classifier parameters (D features by 1)
145			(2) array 'X': source data set ()
146			(3) array 'y': label vector (N samples by 1)
147			(4) array 'E': expected value with respect to transfer model
148			(N samples by D features)
149			(5) array 'V': variance with respect to transfer model
150			(D features by D features by N samples)
151			(6) float 'l2': regularization parameter (def: 0.0)
152			OUTPUT (1) float 'L': loss function value
153			"""
154			# Data shape
155			N, D = X.shape
156
157			# Assert y in {-1,+1}
158			assert np.all(np.sort(np.unique(y)) == (-1, 1))
159
160			# Precompute terms
161			Xt = np.dot(X, theta)
162			Et = np.dot(E, theta)
163			alpha = np.exp(Xt) + np.exp(-Xt)
164			beta = np.exp(Xt) - np.exp(-Xt)
165			gamma = (np.exp(Xt).T * X.T).T + (np.exp(-Xt).T * X.T).T
166			delta = (np.exp(Xt).T * X.T).T - (np.exp(-Xt).T * X.T).T
167
168			# Log-partition function
169			A = np.log(alpha)
170
171			# First-order partial derivative of log-partition w.r.t. Xt
172			dA = beta / alpha
173
174			# Second-order partial derivative of log-partition w.r.t. Xt
175			d2A = 1 - beta2 / alpha2
176
177			# Compute pointwise loss (negative log-likelihood)
178			L = np.zeros((N, 1))
179			for i in range(N):
180			L[i] = -y[i] * Et[i] + A[i] + dA[i] * (Et[i] - Xt[i]) + \
181			1./2d2A[i]np.dot(np.dot(theta.T, V[:, :, i]), theta)
182
183			# Compute risk (average loss)
184			R = np.mean(L, axis=0)
185
186			# Add regularization
187			return R + l2np.sum(theta*2, axis=0)
188
189			def flda_log_grad(self, theta, X, y, E, V, l2=0.0):
190			"""
191			Compute gradient with respect to theta for flda-log.
192
193			INPUT (1) array 'theta': classifier parameters (D features by 1)
194			(2) array 'X': source data set ()
195			(3) array 'y': label vector (N samples by 1)
196			(4) array 'E': expected value with respect to transfer model
197			(N samples by D features)
198			(5) array 'V': variance with respect to transfer model
199			(D features by D features by N samples)
200			(6) float 'l2': regularization parameter (def: 0.0)
201			OUTPUT (1) float
202			"""
203			# Data shape
204			N, D = X.shape
205
206			# Assert y in {-1,+1}
207			assert np.all(np.sort(np.unique(y)) == (-1, 1))
208
209			# Precompute common terms
210			Xt = np.dot(X, theta)
211			Et = np.dot(E, theta)
212			alpha = np.exp(Xt) + np.exp(-Xt)
213			beta = np.exp(Xt) - np.exp(-Xt)
214			gamma = (np.exp(Xt).T * X.T).T + (np.exp(-Xt).T * X.T).T
215			delta = (np.exp(Xt).T * X.T).T - (np.exp(-Xt).T * X.T).T
216
217			# Log-partition function
218			A = np.log(alpha)
219
220			# First-order partial derivative of log-partition w.r.t. Xt
221			dA = beta / alpha
222
223			# Second-order partial derivative of log-partition w.r.t. Xt
224			d2A = 1 - beta2 / alpha2
225
226			dR = 0
227			for i in range(N):
228
229			# Compute gradient terms
230			t1 = -y[i]*E[i, :].T
231
232			t2 = beta[i] / alpha[i] * X[i, :].T
233
234			t3 = (gamma[i, :] / alpha[i] - beta[i]*delta[i, :] /
235			alpha[i]*2).T (Et[i] - Xt[i])
236
237			t4 = beta[i] / alpha[i] * (E[i, :] - X[i, :]).T
238
239			t5 = (1 - beta[i]2 / alpha[i]2) * np.dot(V[:, :, i], theta)
240
241			t6 = -(beta[i] * gamma[i, :] / alpha[i]2 - beta[i]2 *
242			delta[i, :] / alpha[i]*3).T np.dot(np.dot(theta.T,
243			V[:, :, i]), theta)
244
245			dR += t1 + t2 + t3 + t4 + t5 + t6
246
247			# Add regularization
248			dR += l22theta
249
250			return dR
251
252			def fit(self, X, y, Z):
253			"""
254			Fit/train a robust bias-aware classifier.
255
256			INPUT (1) array 'X': source data (N samples by D features)
257			(2) array 'y': source labels (N samples by 1)
258			(3) array 'Z': target data (M samples by D features)
259			OUTPUT None
260			"""
261			# Data shapes
262			N, DX = X.shape
263			M, DZ = Z.shape
264
265			# Assert equivalent dimensionalities
266			assert DX == DZ
267
268			# Number of classes
269			K = len(np.unique(y))
270
271			# Map to one-not-encoding
272			Y = one_hot(y, one_not=True)
273
274			# Compute transfer distribution parameters
275			iota = self.mle_transfer_dist(X, Z)
276
277			# Compute moments of transfer distribution
278			E, V = self.moments_transfer_model(X, iota)
279
280			# Select loss function
281			if (self.loss == 'logistic'):
282
283			# Preallocate parameter array
284			theta = np.random.randn(DX, K)
285
286			# Train a classifier for each class
287			for k in range(K):
288
289			# Shorthand for loss computation
290			def L(theta): return self.flda_log_loss(theta, X, Y[:, k],
291			E, V, l2=self.l2)
292
293			# Shorthand for gradient computation
294			def J(theta): return self.flda_log_grad(theta, X, Y[:, k],
295			E, V, l2=self.l2)
296
297			# Call scipy's minimizer
298			results = minimize(L, theta[:, k], jac=J, method='BFGS',
299			options={'gtol': self.tolerance,
300			'disp': self.verbose})
301
302			# Store resultant classifier parameters
303			theta[:, k] = results.x
304
305			elif (self.loss == 'quadratic'):
306
307			# Compute closed-form least-squares solution
308			theta = np.inv(E.TE + np.sum(V, axis=2) + l2np.eye(D))\
309			* (E.T * Y)
310
311			# Store trained classifier parameters
312			self.theta = theta
313
314			# Mark classifier as trained
315			self.is_trained = True
316
317			# Store training data dimensionality
318			self.train_data_dim = DX
319
320			def predict(self, Z_):
321			"""
322			Make predictions on new dataset.
323
324			INPUT (1) array 'Z_': new data set (M samples by D features)
325			OUTPUT (1) array 'preds': label predictions (M samples by 1)
326			"""
327			# Data shape
328			M, D = Z_.shape
329
330			# If classifier is trained, check for same dimensionality
331			if self.is_trained:
332			assert self.train_data_dim == D
333			else:
334			raise UserWarning('Classifier is not trained yet.')
335
336			# Predict target labels
337			preds = np.argmax(np.dot(Z_, self.theta), axis=1)
338
339			# Return predictions array
340			return preds
341
342			def get_params(self):
343			"""Get classifier parameters."""
344			return self.clf.get_params()
345
346			def is_trained(self):
347			"""Check whether classifier is trained."""
348			return self.is_trained
349

wmkouw / libTLDA

Branch — master (f50597)

FeatureLevelDomainAdaptiveClassifier A

Complexity

Size/Duplication

Test Coverage

Importance

11 Methods

Duplication Side-by-Side

Filter issues like