FeatureLevelDomainAdaptiveClassifier.predict() - Code Metrics - Inspection of "Added more tests and added cllasses output to one_..." - wmkouw/libTLDA - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 582254...17fb6a )

by Wouter

created 2018-06-14 07:40 UTC

FeatureLevelDomainAdaptiveClassifier.predict() B

↳ Parent: FeatureLevelDomainAdaptiveClassifier

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	7
CRAP Score	3.0175

Importance

Changes	2
Bugs	0	Features	0

Metric	Value
cc	3
c	2
b	0
f	0
dl	0
loc	24
ccs	7
cts	8
cp	0.875
crap	3.0175
rs	8.9713

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import numpy as np
import scipy.stats as st
from scipy.optimize import minimize
from scipy.sparse.linalg import eigs
from scipy.spatial.distance import cdist
import sklearn as sk
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.model_selection import cross_val_predict
from os.path import basename

from .util import is_pos_def, one_hot


class FeatureLevelDomainAdaptiveClassifier(object):
    """
    Class of feature-level domain-adaptive classifiers.

    Reference: Kouw, Krijthe, Loog & Van der Maaten (2016). Feature-level
    domain adaptation. JMLR.

    Methods contain training and prediction functions.
    """

    def __init__(self, l2=0.0, loss='logistic', transfer_model='blankout',
                 max_iter=100, tolerance=1e-5, verbose=True):
        """
        Set classifier instance parameters.

        INPUT   (1) float 'l2': l2-regularization parameter value (def:0.01)
                (2) str 'loss': loss function for classifier, options are
                    'logistic' or 'quadratic' (def: 'logistic')
                (3) str 'transfer_model': distribution to use for transfer
                    model, options are 'dropout' and 'blankout'
                    (def: 'blankout')
                (4) int 'max_iter': maximum number of iterations (def: 100)
                (5) float 'tolerance': convergence criterion threshold on x
                    (def: 1e-5)
                (7) boolean 'verbose': report training progress (def: True)
        OUTPUT  None
        """
        # Classifier choices
        self.l2 = l2
        self.loss = 'logistic'
        self.transfer_model = transfer_model

        # Optimization parameters
        self.max_iter = max_iter
        self.tolerance = tolerance

        # Whether model has been trained
        self.is_trained = False

        # Dimensionality of training data
        self.train_data_dim = 0

        # Classifier parameters
        self.theta = 0

        # Verbosity
        self.verbose = verbose

    def mle_transfer_dist(self, X, Z, dist='blankout'):
        """
        Maximum likelihood estimation of transfer model parameters.

        INPUT   (1) array 'X': source data set (N samples by D features)
                (2) array 'Z': target data set (M samples by D features)
                (3) str 'dist': distribution of transfer model, options are
                    'blankout' or 'dropout' (def: 'blankout')
        OUTPUT  (1) array 'iota': estimated transfer model parameters
                    (D features by 1)
        """
        # Data shapes
        N, DX = X.shape
        M, DZ = Z.shape

        # Assert equivalent dimensionalities
        if not DX == DZ:
            raise ValueError('Dimensionalities of X and Z should be equal.')

        # Blankout and dropout have same maximum likelihood estimator
        if (dist == 'blankout') or (dist == 'dropout'):

            # Rate parameters
            eta = np.mean(X > 0, axis=0)
            zeta = np.mean(Z > 0, axis=0)

            # Ratio of rate parameters
            iota = np.clip(1 - zeta / eta, 0, None)

        else:
            raise ValueError('Distribution unknown.')

        return iota

    def moments_transfer_model(self, X, iota, dist='blankout'):
        """
        Moments of the transfer model.

        INPUT   (1) array 'X': data set (N samples by D features)
                (2) array 'iota': transfer model parameters (D samples by 1)
                (3) str 'dist': transfer model, options are 'dropout' and
                    'blankout' (def: 'blankout')
        OUTPUT  (1) array 'E': expected value of transfer model (N samples by
                    D feautures)
                (2) array 'V': variance of transfer model (D features by D
                    features by N samples)
        """
        # Data shape
        N, D = X.shape

        if (dist == 'dropout'):

            # First moment of transfer distribution
            E = (1-iota) * X

            # Second moment of transfer distribution
            V = np.zeros((D, D, N))
            for i in range(N):
                V[:, :, i] = np.diag(iota * (1-iota)) * (X[i, :].T*X[i, :])

        elif (dist == 'blankout'):

            # First moment of transfer distribution
            E = X

            # Second moment of transfer distribution
            V = np.zeros((D, D, N))
            for i in range(N):
                V[:, :, i] = np.diag(iota * (1-iota)) * (X[i, :].T * X[i, :])

        else:
            raise ValueError('Transfer distribution not implemented')

        return E, V

    def flda_log_loss(self, theta, X, y, E, V, l2=0.0):
        """
        Compute average loss for flda-log.

        INPUT   (1) array 'theta': classifier parameters (D features by 1)
                (2) array 'X': source data set ()
                (3) array 'y': label vector (N samples by 1)
                (4) array 'E': expected value with respect to transfer model
                    (N samples by D features)
                (5) array 'V': variance with respect to transfer model
                    (D features by D features by N samples)
                (6) float 'l2': regularization parameter (def: 0.0)
        OUTPUT  (1) float 'L': loss function value
        """
        # Data shape
        N, D = X.shape

        # Assert y in {-1,+1}
        assert np.all(np.sort(np.unique(y)) == (-1, 1))

        # Precompute terms
        Xt = np.dot(X, theta)
        Et = np.dot(E, theta)
        alpha = np.exp(Xt) + np.exp(-Xt)
        beta = np.exp(Xt) - np.exp(-Xt)
        gamma = (np.exp(Xt).T * X.T).T + (np.exp(-Xt).T * X.T).T
        delta = (np.exp(Xt).T * X.T).T - (np.exp(-Xt).T * X.T).T

        # Log-partition function
        A = np.log(alpha)

        # First-order partial derivative of log-partition w.r.t. Xt
        dA = beta / alpha

        # Second-order partial derivative of log-partition w.r.t. Xt
        d2A = 1 - beta**2 / alpha**2

        # Compute pointwise loss (negative log-likelihood)
        L = np.zeros((N, 1))
        for i in range(N):
            L[i] = -y[i] * Et[i] + A[i] + dA[i] * (Et[i] - Xt[i]) + \
                   1./2*d2A[i]*np.dot(np.dot(theta.T, V[:, :, i]), theta)

        # Compute risk (average loss)
        R = np.mean(L, axis=0)

        # Add regularization
        return R + l2*np.sum(theta**2, axis=0)

    def flda_log_grad(self, theta, X, y, E, V, l2=0.0):
        """
        Compute gradient with respect to theta for flda-log.

        INPUT   (1) array 'theta': classifier parameters (D features by 1)
                (2) array 'X': source data set ()
                (3) array 'y': label vector (N samples by 1)
                (4) array 'E': expected value with respect to transfer model
                    (N samples by D features)
                (5) array 'V': variance with respect to transfer model
                    (D features by D features by N samples)
                (6) float 'l2': regularization parameter (def: 0.0)
        OUTPUT  (1) float
        """
        # Data shape
        N, D = X.shape

        # Assert y in {-1,+1}
        assert np.all(np.sort(np.unique(y)) == (-1, 1))

        # Precompute common terms
        Xt = np.dot(X, theta)
        Et = np.dot(E, theta)
        alpha = np.exp(Xt) + np.exp(-Xt)
        beta = np.exp(Xt) - np.exp(-Xt)
        gamma = (np.exp(Xt).T * X.T).T + (np.exp(-Xt).T * X.T).T
        delta = (np.exp(Xt).T * X.T).T - (np.exp(-Xt).T * X.T).T

        # Log-partition function
        A = np.log(alpha)

        # First-order partial derivative of log-partition w.r.t. Xt
        dA = beta / alpha

        # Second-order partial derivative of log-partition w.r.t. Xt
        d2A = 1 - beta**2 / alpha**2

        dR = 0
        for i in range(N):

            # Compute gradient terms
            t1 = -y[i]*E[i, :].T

            t2 = beta[i] / alpha[i] * X[i, :].T

            t3 = (gamma[i, :] / alpha[i] - beta[i]*delta[i, :] /
                  alpha[i]**2).T * (Et[i] - Xt[i])

            t4 = beta[i] / alpha[i] * (E[i, :] - X[i, :]).T

            t5 = (1 - beta[i]**2 / alpha[i]**2) * np.dot(V[:, :, i], theta)

            t6 = -(beta[i] * gamma[i, :] / alpha[i]**2 - beta[i]**2 *
                   delta[i, :] / alpha[i]**3).T * np.dot(np.dot(theta.T,
                                                         V[:, :, i]), theta)

            dR += t1 + t2 + t3 + t4 + t5 + t6

        # Add regularization
        dR += l2*2*theta

        return dR

    def fit(self, X, y, Z):
        """
        Fit/train a robust bias-aware classifier.

        INPUT   (1) array 'X': source data (N samples by D features)
                (2) array 'y': source labels (N samples by 1)
                (3) array 'Z': target data (M samples by D features)
        OUTPUT  None
        """
        # Data shapes
        N, DX = X.shape
        M, DZ = Z.shape

        # Assert equivalent dimensionalities
        if not DX == DZ:
            raise ValueError('Dimensionalities of X and Z should be equal.')

        # Map to one-not-encoding
        Y, labels = one_hot(y, one_not=True)

        # Number of classes
        K = len(labels)

        # Compute transfer distribution parameters
        iota = self.mle_transfer_dist(X, Z)

        # Compute moments of transfer distribution
        E, V = self.moments_transfer_model(X, iota)

        # Select loss function
        if (self.loss == 'logistic'):

            # Preallocate parameter array
            theta = np.random.randn(DX, K)

            # Train a classifier for each class
            for k in range(K):

                # Shorthand for loss computation
                def L(theta): return self.flda_log_loss(theta, X, Y[:, k],
                                                        E, V, l2=self.l2)

                # Shorthand for gradient computation
                def J(theta): return self.flda_log_grad(theta, X, Y[:, k],
                                                        E, V, l2=self.l2)

                # Call scipy's minimizer
                results = minimize(L, theta[:, k], jac=J, method='BFGS',
                                   options={'gtol': self.tolerance,
                                            'disp': self.verbose})

                # Store resultant classifier parameters
                theta[:, k] = results.x

        elif (self.loss == 'quadratic'):

            # Compute closed-form least-squares solution
            theta = np.inv(E.T*E + np.sum(V, axis=2) + l2*np.eye(D))\
                         * (E.T * Y)

        # Store trained classifier parameters
        self.theta = theta

        # Store classes
        self.classes = labels

        # Mark classifier as trained
        self.is_trained = True

        # Store training data dimensionality
        self.train_data_dim = DX

    def predict(self, Z_):
        """
        Make predictions on new dataset.

        INPUT   (1) array 'Z_': new data set (M samples by D features)
        OUTPUT  (1) array 'preds': label predictions (M samples by 1)
        """
        # Data shape
        M, D = Z_.shape

        # If classifier is trained, check for same dimensionality
        if self.is_trained:
            assert self.train_data_dim == D
        else:
            raise UserWarning('Classifier is not trained yet.')

        # Predict target labels
        preds = np.argmax(np.dot(Z_, self.theta), axis=1)

        # Map predictions back to labels
        preds = self.classes[preds]

        # Return predictions array
        return preds

    def get_params(self):
        """Get classifier parameters."""
        return self.clf.get_params()

    def is_trained(self):
        """Check whether classifier is trained."""
        return self.is_trained


1		#!/usr/bin/env python
2		# -- coding: utf-8 --
3
4	1	import numpy as np
5	1	import scipy.stats as st
6	1	from scipy.optimize import minimize
7	1	from scipy.sparse.linalg import eigs
8	1	from scipy.spatial.distance import cdist
9	1	import sklearn as sk
10	1	from sklearn.svm import LinearSVC
11	1	from sklearn.linear_model import LogisticRegression, LinearRegression
12	1	from sklearn.model_selection import cross_val_predict
13	1	from os.path import basename
14
15	1	from .util import is_pos_def, one_hot
16
17
18	1	class FeatureLevelDomainAdaptiveClassifier(object):
19		"""
20		Class of feature-level domain-adaptive classifiers.
21
22		Reference: Kouw, Krijthe, Loog & Van der Maaten (2016). Feature-level
23		domain adaptation. JMLR.
24
25		Methods contain training and prediction functions.
26		"""
27
28	1	def __init__(self, l2=0.0, loss='logistic', transfer_model='blankout',
29		max_iter=100, tolerance=1e-5, verbose=True):
30		"""
31		Set classifier instance parameters.
32
33		INPUT (1) float 'l2': l2-regularization parameter value (def:0.01)
34		(2) str 'loss': loss function for classifier, options are
35		'logistic' or 'quadratic' (def: 'logistic')
36		(3) str 'transfer_model': distribution to use for transfer
37		model, options are 'dropout' and 'blankout'
38		(def: 'blankout')
39		(4) int 'max_iter': maximum number of iterations (def: 100)
40		(5) float 'tolerance': convergence criterion threshold on x
41		(def: 1e-5)
42		(7) boolean 'verbose': report training progress (def: True)
43		OUTPUT None
44		"""
45		# Classifier choices
46	1	self.l2 = l2
47	1	self.loss = 'logistic'
48	1	self.transfer_model = transfer_model
49
50		# Optimization parameters
51	1	self.max_iter = max_iter
52	1	self.tolerance = tolerance
53
54		# Whether model has been trained
55	1	self.is_trained = False
56
57		# Dimensionality of training data
58	1	self.train_data_dim = 0
59
60		# Classifier parameters
61	1	self.theta = 0
62
63		# Verbosity
64	1	self.verbose = verbose
65
66	1	def mle_transfer_dist(self, X, Z, dist='blankout'):
67		"""
68		Maximum likelihood estimation of transfer model parameters.
69
70		INPUT (1) array 'X': source data set (N samples by D features)
71		(2) array 'Z': target data set (M samples by D features)
72		(3) str 'dist': distribution of transfer model, options are
73		'blankout' or 'dropout' (def: 'blankout')
74		OUTPUT (1) array 'iota': estimated transfer model parameters
75		(D features by 1)
76		"""
77		# Data shapes
78	1	N, DX = X.shape
79	1	M, DZ = Z.shape
80
81		# Assert equivalent dimensionalities
82	1	if not DX == DZ:
83		raise ValueError('Dimensionalities of X and Z should be equal.')
84
85		# Blankout and dropout have same maximum likelihood estimator
86	1	if (dist == 'blankout') or (dist == 'dropout'):
87
88		# Rate parameters
89	1	eta = np.mean(X > 0, axis=0)
90	1	zeta = np.mean(Z > 0, axis=0)
91
92		# Ratio of rate parameters
93	1	iota = np.clip(1 - zeta / eta, 0, None)
94
95		else:
96		raise ValueError('Distribution unknown.')
97
98	1	return iota
99
100	1	def moments_transfer_model(self, X, iota, dist='blankout'):
101		"""
102		Moments of the transfer model.
103
104		INPUT (1) array 'X': data set (N samples by D features)
105		(2) array 'iota': transfer model parameters (D samples by 1)
106		(3) str 'dist': transfer model, options are 'dropout' and
107		'blankout' (def: 'blankout')
108		OUTPUT (1) array 'E': expected value of transfer model (N samples by
109		D feautures)
110		(2) array 'V': variance of transfer model (D features by D
111		features by N samples)
112		"""
113		# Data shape
114	1	N, D = X.shape
115
116	1	if (dist == 'dropout'):
117
118		# First moment of transfer distribution
119		E = (1-iota) * X
120
121		# Second moment of transfer distribution
122		V = np.zeros((D, D, N))
123		for i in range(N):
124		V[:, :, i] = np.diag(iota * (1-iota)) * (X[i, :].T*X[i, :])
125
126	1	elif (dist == 'blankout'):
127
128		# First moment of transfer distribution
129	1	E = X
130
131		# Second moment of transfer distribution
132	1	V = np.zeros((D, D, N))
133	1	for i in range(N):
134	1	V[:, :, i] = np.diag(iota * (1-iota)) * (X[i, :].T * X[i, :])
135
136		else:
137		raise ValueError('Transfer distribution not implemented')
138
139	1	return E, V
140
141	1	def flda_log_loss(self, theta, X, y, E, V, l2=0.0):
142		"""
143		Compute average loss for flda-log.
144
145		INPUT (1) array 'theta': classifier parameters (D features by 1)
146		(2) array 'X': source data set ()
147		(3) array 'y': label vector (N samples by 1)
148		(4) array 'E': expected value with respect to transfer model
149		(N samples by D features)
150		(5) array 'V': variance with respect to transfer model
151		(D features by D features by N samples)
152		(6) float 'l2': regularization parameter (def: 0.0)
153		OUTPUT (1) float 'L': loss function value
154		"""
155		# Data shape
156	1	N, D = X.shape
157
158		# Assert y in {-1,+1}
159	1	assert np.all(np.sort(np.unique(y)) == (-1, 1))
160
161		# Precompute terms
162	1	Xt = np.dot(X, theta)
163	1	Et = np.dot(E, theta)
164	1	alpha = np.exp(Xt) + np.exp(-Xt)
165	1	beta = np.exp(Xt) - np.exp(-Xt)
166	1	gamma = (np.exp(Xt).T * X.T).T + (np.exp(-Xt).T * X.T).T
167	1	delta = (np.exp(Xt).T * X.T).T - (np.exp(-Xt).T * X.T).T
168
169		# Log-partition function
170	1	A = np.log(alpha)
171
172		# First-order partial derivative of log-partition w.r.t. Xt
173	1	dA = beta / alpha
174
175		# Second-order partial derivative of log-partition w.r.t. Xt
176	1	d2A = 1 - beta2 / alpha2
177
178		# Compute pointwise loss (negative log-likelihood)
179	1	L = np.zeros((N, 1))
180	1	for i in range(N):
181	1	L[i] = -y[i] * Et[i] + A[i] + dA[i] * (Et[i] - Xt[i]) + \
182		1./2d2A[i]np.dot(np.dot(theta.T, V[:, :, i]), theta)
183
184		# Compute risk (average loss)
185	1	R = np.mean(L, axis=0)
186
187		# Add regularization
188	1	return R + l2np.sum(theta*2, axis=0)
189
190	1	def flda_log_grad(self, theta, X, y, E, V, l2=0.0):
191		"""
192		Compute gradient with respect to theta for flda-log.
193
194		INPUT (1) array 'theta': classifier parameters (D features by 1)
195		(2) array 'X': source data set ()
196		(3) array 'y': label vector (N samples by 1)
197		(4) array 'E': expected value with respect to transfer model
198		(N samples by D features)
199		(5) array 'V': variance with respect to transfer model
200		(D features by D features by N samples)
201		(6) float 'l2': regularization parameter (def: 0.0)
202		OUTPUT (1) float
203		"""
204		# Data shape
205	1	N, D = X.shape
206
207		# Assert y in {-1,+1}
208	1	assert np.all(np.sort(np.unique(y)) == (-1, 1))
209
210		# Precompute common terms
211	1	Xt = np.dot(X, theta)
212	1	Et = np.dot(E, theta)
213	1	alpha = np.exp(Xt) + np.exp(-Xt)
214	1	beta = np.exp(Xt) - np.exp(-Xt)
215	1	gamma = (np.exp(Xt).T * X.T).T + (np.exp(-Xt).T * X.T).T
216	1	delta = (np.exp(Xt).T * X.T).T - (np.exp(-Xt).T * X.T).T
217
218		# Log-partition function
219	1	A = np.log(alpha)
220
221		# First-order partial derivative of log-partition w.r.t. Xt
222	1	dA = beta / alpha
223
224		# Second-order partial derivative of log-partition w.r.t. Xt
225	1	d2A = 1 - beta2 / alpha2
226
227	1	dR = 0
228	1	for i in range(N):
229
230		# Compute gradient terms
231	1	t1 = -y[i]*E[i, :].T
232
233	1	t2 = beta[i] / alpha[i] * X[i, :].T
234
235	1	t3 = (gamma[i, :] / alpha[i] - beta[i]*delta[i, :] /
236		alpha[i]*2).T (Et[i] - Xt[i])
237
238	1	t4 = beta[i] / alpha[i] * (E[i, :] - X[i, :]).T
239
240	1	t5 = (1 - beta[i]2 / alpha[i]2) * np.dot(V[:, :, i], theta)
241
242	1	t6 = -(beta[i] * gamma[i, :] / alpha[i]2 - beta[i]2 *
243		delta[i, :] / alpha[i]*3).T np.dot(np.dot(theta.T,
244		V[:, :, i]), theta)
245
246	1	dR += t1 + t2 + t3 + t4 + t5 + t6
247
248		# Add regularization
249	1	dR += l22theta
250
251	1	return dR
252
253	1	def fit(self, X, y, Z):
254		"""
255		Fit/train a robust bias-aware classifier.
256
257		INPUT (1) array 'X': source data (N samples by D features)
258		(2) array 'y': source labels (N samples by 1)
259		(3) array 'Z': target data (M samples by D features)
260		OUTPUT None
261		"""
262		# Data shapes
263	1	N, DX = X.shape
264	1	M, DZ = Z.shape
265
266		# Assert equivalent dimensionalities
267	1	if not DX == DZ:
268		raise ValueError('Dimensionalities of X and Z should be equal.')
269
270		# Map to one-not-encoding
271	1	Y, labels = one_hot(y, one_not=True)
272
273		# Number of classes
274	1	K = len(labels)
275
276		# Compute transfer distribution parameters
277	1	iota = self.mle_transfer_dist(X, Z)
278
279		# Compute moments of transfer distribution
280	1	E, V = self.moments_transfer_model(X, iota)
281
282		# Select loss function
283	1	if (self.loss == 'logistic'):
284
285		# Preallocate parameter array
286	1	theta = np.random.randn(DX, K)
287
288		# Train a classifier for each class
289	1	for k in range(K):
290
291		# Shorthand for loss computation
292	1	def L(theta): return self.flda_log_loss(theta, X, Y[:, k],
293		E, V, l2=self.l2)
294
295		# Shorthand for gradient computation
296	1	def J(theta): return self.flda_log_grad(theta, X, Y[:, k],
297		E, V, l2=self.l2)
298
299		# Call scipy's minimizer
300	1	results = minimize(L, theta[:, k], jac=J, method='BFGS',
301		options={'gtol': self.tolerance,
302		'disp': self.verbose})
303
304		# Store resultant classifier parameters
305	1	theta[:, k] = results.x
306
307		elif (self.loss == 'quadratic'):
308
309		# Compute closed-form least-squares solution
310		theta = np.inv(E.TE + np.sum(V, axis=2) + l2np.eye(D))\
311		* (E.T * Y)
312
313		# Store trained classifier parameters
314	1	self.theta = theta
315
316		# Store classes
317	1	self.classes = labels
318
319		# Mark classifier as trained
320	1	self.is_trained = True
321
322		# Store training data dimensionality
323	1	self.train_data_dim = DX
324
325	1	def predict(self, Z_):
326		"""
327		Make predictions on new dataset.
328
329		INPUT (1) array 'Z_': new data set (M samples by D features)
330		OUTPUT (1) array 'preds': label predictions (M samples by 1)
331		"""
332		# Data shape
333	1	M, D = Z_.shape
334
335		# If classifier is trained, check for same dimensionality
336	1	if self.is_trained:
337	1	assert self.train_data_dim == D
338		else:
339		raise UserWarning('Classifier is not trained yet.')
340
341		# Predict target labels
342	1	preds = np.argmax(np.dot(Z_, self.theta), axis=1)
343
344		# Map predictions back to labels
345	1	preds = self.classes[preds]
346
347		# Return predictions array
348	1	return preds
349
350	1	def get_params(self):
351		"""Get classifier parameters."""
352		return self.clf.get_params()
353
354	1	def is_trained(self):
355		"""Check whether classifier is trained."""
356		return self.is_trained
357

wmkouw / libTLDA

Push — master ( 582254...17fb6a )

FeatureLevelDomainAdaptiveClassifier.predict() B

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like