TargetContrastivePessimisticClassifier - Code Metrics - Inspection of "Added more tests and added cllasses output to one_..." - wmkouw/libTLDA - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 582254...17fb6a )

by Wouter

created 2018-06-14 07:40 UTC

TargetContrastivePessimisticClassifier B

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	515
Duplicated Lines	0 %

Test Coverage

Coverage

77.14%

Importance

Changes	3
Bugs	0	Features	0

Metric	Value
c	3
b	0
f	0
dl	0
loc	515
ccs	135
cts	175
cp	0.7714
rs	7.9487
wmc	52

14 Methods

Rating	Name	Size	Complexity
B	fit()	34	3
A	__init__()	53	3
B	predict()	27	4
B	learning_rate_t()	27	5
F	tcpr_da()	101	9
A	remove_intercept()	12	1
B	project_simplex()	30	1
A	error_rate()	3	1
A	add_intercept()	22	2
C	neg_log_likelihood()	56	9
A	get_params()	10	2
B	risk()	25	2
A	combine_class_covariances()	23	3
C	discriminant_parameters()	72	7

How to fix Complexity

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import numpy as np
import numpy.linalg as al
from scipy.stats import multivariate_normal as mvn
import sklearn as sk
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.model_selection import cross_val_predict
from os.path import basename

from .util import one_hot, regularize_matrix


class TargetContrastivePessimisticClassifier(object):
    """
    Classifiers based on Target Contrastive Pessimistic Risk minimization.

    Methods contain models, risk functions, parameter estimation, etc.
    """

    def __init__(self, loss='lda', l2=1.0, max_iter=500, tolerance=1e-12,
                 learning_rate=1.0, rate_decay='linear', verbosity=0):
        """
        Select a particular type of TCPR classifier.

        INPUT   (1) str 'loss': loss function for TCP Risk, options: 'ls',
                    'least_squares', 'lda', 'linear_discriminant_analysis',
                    'qda', 'quadratic_discriminant_analysis' (def: 'lda')
                (2) float 'l2': l2-regularization parameter value (def:0.01)
                (3) int 'max_iter': maximum number of iterations for
                    optimization (def: 500)
                (4) float 'tolerance': convergence criterion on the TCP
                    parameters (def: 1e-5)
                (5) float 'learning_rate': parameter for size of update of
                    gradient (def: 1.0)
                (6) str 'rate_decay': type of learning rate decay, options:
                    'linear', 'quadratic', 'geometric', 'exponential'
                    (def: 'linear')
        """
        # Classifier options
        self.loss = loss
        self.l2 = l2

        # Optimization parameters
        self.max_iter = max_iter
        self.tolerance = tolerance
        self.learning_rate = learning_rate
        self.rate_decay = rate_decay
        self.verbosity = verbosity

        if self.loss in ['linear discriminant analysis', 'lda']:

            # Set to short name
            self.loss = 'lda'

        elif self.loss in ['quadratic discriminant analysis', 'qda']:

            # Set to short name
            self.loss = 'qda'

        else:
            # Other loss functions are not implemented
            raise ValueError('Model not implemented.')

        # Initialize classifier and classes parameters
        self.parameters = []
        self.classes = []

        # Whether model has been trained
        self.is_trained = False

        # Dimensionality of training data
        self.train_data_dim = 0

    def add_intercept(self, X):
        """Add 1's to data as last features."""
        # Data shape
        N, D = X.shape

        # Check if there's not already an intercept column
        if np.any(np.sum(X, axis=0) == N):

            # Report
            print('Intercept is not the last feature. Swapping..')

            # Find which column contains the intercept
            intercept_index = np.argwhere(np.sum(X, axis=0) == N)

            # Swap intercept to last
            X = X[:, np.setdiff1d(np.arange(D), intercept_index)]

        # Add intercept as last column
        X = np.hstack((X, np.ones((N, 1))))

        # Append column of 1's to data, and increment dimensionality
        return X, D+1

    def remove_intercept(self, X):
        """Remove 1's from data as last features."""
        # Data shape
        N, D = X.shape

        # Find which column contains the intercept
        intercept_index = np.argwhere(np.sum(X, axis=0) == N)

        # Swap intercept to last
        X = X[:, np.setdiff1d(np.arange(D), intercept_index)]

        return X, D-1

    def project_simplex(self, v, z=1.0):
        """
        Project vector onto simplex using sorting.

        Reference: "Efficient Projections onto the L1-Ball for Learning in High
        Dimensions (Duchi, Shalev-Shwartz, Singer, Chandra, 2006)."

        INPUT   (1) array 'v': vector to be projected (n dimensions by 0)
                (2) float 'z': constant (def: 1.0)
        OUTPUT  (1) array 'w': projected vector (n dimensions by 0)
        """
        # Number of dimensions
        n = v.shape[0]

        # Sort vector
        mu = np.sort(v, axis=0)[::-1]

        # Find rho
        C = np.cumsum(mu) - z
        j = np.arange(n) + 1
        rho = j[mu - C/j > 0][-1]

        # Define theta
        theta = C[mu - C/j > 0][-1] / float(rho)

        # Subtract theta from original vector and cap at 0
        w = np.maximum(v - theta, 0)

        # Return projected vector
        return w

    def learning_rate_t(self, t):
        """Compute current learning rate after decay."""
        # Select rate decay
        if self.rate_decay == 'linear':

            # Linear dropoff between t=0 and t=T
            alpha = (self.max_iter - t)/(self.learning_rate*self.max_iter)

        elif self.rate_decay == 'quadratic':

            # Quadratic dropoff between t=0 and t=T
            alpha = ((self.max_iter - t)/(self.learning_rate*self.max_iter))**2

        elif self.rate_decay == 'geometric':

            # Drop rate off inversely to time
            alpha = 1 / (self.learning_rate * t)

        elif self.rate_decay == 'exponential':

            # Exponential dropoff
            alpha = np.exp(-self.learning_rate * t)

        else:
            raise ValueError('Rate decay type unknown.')

        return alpha

    def risk(self, Z, theta, q):
        """
        Compute target contrastive pessimistic risk.

        INPUT   (1) array 'Z': target samples (M samples by D features)
                (2) array 'theta': classifier parameters (D features by
                    K classes)
                (3) array 'q': soft labels (M samples by K classes)
        OUTPUT  (1) float: risk
        """
        # Number of classes
        K = q.shape[1]

        # Compute negative log-likelihood
        L = self.neg_log_likelihood(Z, theta)

        # Weight loss by soft labels
        for k in range(K):
            L[:, k] *= q[:, k]

        # Sum over weighted losses
        L = np.sum(L, axis=1)

        # Risk is average loss
        return np.mean(L, axis=0)

    def neg_log_likelihood(self, X, theta):
        """
        Compute negative log-likelihood under Gaussian distributions.

        INPUT   (1) array 'X': data (N samples by D features)
                (2) tuple 'theta': containing class proportions 'pi', class
                    means 'mu', and class-covariances 'Si'
        OUTPUT  (1) array 'L': loss (N samples by K classes)
        """
        # Unpack parameters
        pi, mu, Si = theta

        # Check if parameter sets match
        assert pi.shape[1] == mu.shape[0]
        assert mu.shape[1] == Si.shape[0]
        assert Si.shape[0] == Si.shape[1]

        # Number of classes
        K = pi.shape[1]

        # Data shape
        N, D = X.shape

        # Preallocate loss array
        L = np.zeros((N, K))

        for k in range(K):

            # Check for linear or quadratic
            if self.loss == 'lda':

                try:
                    # Probability under k-th Gaussian with shared covariance
                    probs = mvn.pdf(X, mu[k, :], Si)

                except al.LinAlgError as err:
                    print('Covariance matrix is singular. Add regularization.')
                    raise err

            elif self.loss == 'qda':

                try:
                    # Probability under k-th Gaussian with own covariance
                    probs = mvn.pdf(X, mu[k, :], Si[:, :, k])

                except al.LinAlgError as err:
                    print('Covariance matrix is singular. Add regularization.')
                    raise err

            else:
                raise ValueError('Loss unknown.')

            # Negative log-likelihood
            L[:, k] = -np.log(pi[0, k]) - np.log(probs)

        return L

    def discriminant_parameters(self, X, Y):
        """
        Estimate parameters of Gaussian distribution for discriminant analysis.

        INPUT   (1) array 'X': data array (N samples by D features)
                (2) array 'Y': label array (N samples by K classes)
        OUTPUT  (1) array 'pi': class proportions (1 by K classes)
                (2) array 'mu': class means (K classes by D features)
                (3) array 'Si': class covariances (D features D features by
                    K classes)
        """
        # Check labels
        K = Y.shape[1]
        assert K > 1

        # Data shape
        N, D = X.shape

        # Preallocate parameter arrays
        pi = np.zeros((1, K))
        mu = np.zeros((K, D))
        Si = np.zeros((D, D, K))

        # For each class
        for k in range(K):

            # Number of samples for current class
            Nk = np.sum(Y[:, k], axis=0)

            # Check for no samples assigned to certain class
            if Nk == 0:

                # Proportion of samples for current class
                pi[0, k] = 0

                # Mean of current class
                mu[k, :] = np.zeros((1, D))

                # Covariance of current class
                Si[:, :, k] = np.eye(D, D)

            else:

                # Proportion of samples for current class
                pi[0, k] = Nk / N

                # Mean of current class
                mu[k, :] = np.dot(Y[:, k].T, X) / Nk

                # Subtract mean from data
                X_ = X - mu[k, :]

                # Diagonalize current label vector
                dYk = np.diag(Y[:, k])

                # Covariance of current class
                Si[:, :, k] = np.dot(np.dot(X_.T, dYk), X_) / Nk

                # Regularization
                Si[:, :, k] = regularize_matrix(Si[:, :, k], a=self.l2)

        # Check for linear or quadratic discriminant analysis
        if self.loss == 'lda':

            # In LDA, the class-covariance matrices are combined
            Si = self.combine_class_covariances(Si, pi)

        # Check for numerical problems
        if np.any(np.isnan(mu)) or np.any(np.isnan(Si)):
            raise RuntimeError('Parameter estimate is NaN.')

        return pi, mu, Si

    def combine_class_covariances(self, Si, pi):
        """
        Linear combination of class covariance matrices.

        INPUT   (1) array 'Si': Covariance matrix (D features by D features by
                    K classes)
                (2) array 'pi': class proportions (1 by K classes)
        OUTPUT  (1) array 'Si': Combined covariance matrix (D by D)
        """
        # Number of classes
        K = Si.shape[2]

        # Check if w is size K
        assert pi.shape[1] == K

        # For each class
        for k in range(K):

            # Weight each class-covariance
            Si[:, :, k] = Si[:, :, k] * pi[0, k]

        # Sum over weighted class-covariances
        return np.sum(Si, axis=2)

    def tcpr_da(self, X, y, Z):
        """
        Target Contrastive Pessimistic Risk - discriminant analysis.

        INPUT   (1) array 'X': source data (N samples by D features)
                (2) array 'y': source labels (N samples by 1)
                (3) array 'Z': target data (M samples by D features)
        OUTPUT  (1) array 'theta': classifier parameters (D features by K
                classes)
        """
        # Data shapes
        N, DX = X.shape
        M, DZ = Z.shape

        # Assert equivalent dimensionalities
        if not DX == DZ:
            raise ValueError('Dimensionalities of X and Z should be equal.')

        # Augment data with bias if necessary
        X, DX = self.remove_intercept(X)
        Z, DZ = self.remove_intercept(Z)

        # Map labels to one-hot-encoding
        Y, classes = one_hot(y)

        # Number of classes
        K = len(classes)

        # Check for at least 2 classes
        assert K > 1

        # Estimate parameters of source model
        theta_ref = self.discriminant_parameters(X, Y)

        # Loss is negative log-likelihood under reference parameters
        L_ref = self.neg_log_likelihood(Z, theta_ref)

        # Initialize target posterior
        q = np.ones((M, K)) / K

        print('Starting TCP optimization')

        TCPRt = np.inf
        for t in range(self.max_iter):

            # Maximization phase

            # Estimate parameters using TCP risk
            theta_tcp = self.discriminant_parameters(Z, q)

            # Minimization phase

            # Compute loss under new parameters
            L_tcp = self.neg_log_likelihood(Z, theta_tcp)

            # Gradient is difference in losses
            Dq = L_tcp - L_ref

            # Update learning rate
            alpha = self.learning_rate_t(t)

            # Steepest descent step
            q -= alpha*Dq

            # Project back onto simplex
            for m in range(M):
                q[m, :] = self.project_simplex(q[m, :])

            # Monitor progress

            # Check for 

            # Risks of current parameters
            R_tcp = self.risk(Z, theta_tcp, q)
            R_ref = self.risk(Z, theta_ref, q)

            # Assert no numerical problems
            if np.isnan(R_tcp) or np.isnan(R_ref):
                raise RuntimeError('Objective is NaN.')

            # Current TCP risk
            TCPRt_ = R_tcp - R_ref

            # Change in risk difference for this iteration
            dR = al.norm(TCPRt - TCPRt_)

            # Check for change smaller than tolerance
            if (dR < self.tolerance):
                print('Broke at iteration '+str(t)+', TCP Risk = '+str(TCPRt_))
                break

            # Report progress
            if (t % 100) == 1:
                print('Iteration ' + str(t) + '/' + str(self.max_iter) +
                      ', TCP Risk = ' + str(TCPRt_))

            # Update
            TCPRt = TCPRt_

        # Return estimated parameters
        return theta_tcp

    def fit(self, X, y, Z):
        """
        Fit/train an importance-weighted classifier.

        INPUT   (1) array 'X': source data (N samples by D features)
                (2) array 'y': source labels (N samples by 1)
                (3) array 'Z': target data (M samples by D features)
        OUTPUT  None
        """
        # Data shapes
        N, DX = X.shape
        M, DZ = Z.shape

        # Assert equivalent dimensionalities
        if not DX == DZ:
            raise ValueError('Dimensionalities of X and Z should be equal.')

        if self.loss in ['lda', 'qda']:

            # Discriminant analysis model for TCPR
            self.parameters = self.tcpr_da(X, y, Z)

        else:
            # Other loss functions are not implemented
            raise ValueError('Loss function unknown.')

        # Extract and store classes
        self.classes = np.unique(y)

        # Mark classifier as trained
        self.is_trained = True

        # Store training data dimensionality
        self.train_data_dim = DX

    def predict(self, Z_):
        """
        Make predictions on new dataset.

        INPUT   (1) array 'Z_': new data set (M samples by D features)
        OUTPUT  (2) array 'preds': label predictions (M samples by 1)
        """
        # Data shape
        M, D = Z_.shape

        # If classifier is trained, check for same dimensionality
        if self.is_trained:
            assert D == self.train_data_dim

        if self.loss in ['lda', 'qda']:

            # Compute probabilities under each distribution
            probs = self.neg_log_likelihood(Z_, self.parameters)

            # Take largest probability as predictions
            preds = np.argmax(probs, axis=1)

        # Map predictions back to original labels
        preds = self.classes[preds]

        # Return predictions array
        return preds

    def get_params(self):
        """Return classifier parameters."""
        # Check if classifier is trained
        if self.is_trained:
            return self.parameters

        else:
            # Throw soft error
            print('Classifier is not trained yet.')
            return []

    def error_rate(self, preds, u_):
        """Compute classification error rate."""
        return np.mean(preds != u_, axis=0)


1		#!/usr/bin/env python
2		# -- coding: utf-8 --
3
4	1	import numpy as np
5	1	import numpy.linalg as al
6	1	from scipy.stats import multivariate_normal as mvn
7	1	import sklearn as sk
8	1	from sklearn.svm import LinearSVC
9	1	from sklearn.linear_model import LogisticRegression, LinearRegression
10	1	from sklearn.model_selection import cross_val_predict
11	1	from os.path import basename
12
13	1	from .util import one_hot, regularize_matrix
14
15
16	1	class TargetContrastivePessimisticClassifier(object):
17		"""
18		Classifiers based on Target Contrastive Pessimistic Risk minimization.
19
20		Methods contain models, risk functions, parameter estimation, etc.
21		"""
22
23	1	def __init__(self, loss='lda', l2=1.0, max_iter=500, tolerance=1e-12,
24		learning_rate=1.0, rate_decay='linear', verbosity=0):
25		"""
26		Select a particular type of TCPR classifier.
27
28		INPUT (1) str 'loss': loss function for TCP Risk, options: 'ls',
29		'least_squares', 'lda', 'linear_discriminant_analysis',
30		'qda', 'quadratic_discriminant_analysis' (def: 'lda')
31		(2) float 'l2': l2-regularization parameter value (def:0.01)
32		(3) int 'max_iter': maximum number of iterations for
33		optimization (def: 500)
34		(4) float 'tolerance': convergence criterion on the TCP
35		parameters (def: 1e-5)
36		(5) float 'learning_rate': parameter for size of update of
37		gradient (def: 1.0)
38		(6) str 'rate_decay': type of learning rate decay, options:
39		'linear', 'quadratic', 'geometric', 'exponential'
40		(def: 'linear')
41		"""
42		# Classifier options
43	1	self.loss = loss
44	1	self.l2 = l2
45
46		# Optimization parameters
47	1	self.max_iter = max_iter
48	1	self.tolerance = tolerance
49	1	self.learning_rate = learning_rate
50	1	self.rate_decay = rate_decay
51	1	self.verbosity = verbosity
52
53	1	if self.loss in ['linear discriminant analysis', 'lda']:
54
55		# Set to short name
56	1	self.loss = 'lda'
57
58		elif self.loss in ['quadratic discriminant analysis', 'qda']:
59
60		# Set to short name
61		self.loss = 'qda'
62
63		else:
64		# Other loss functions are not implemented
65		raise ValueError('Model not implemented.')
66
67		# Initialize classifier and classes parameters
68	1	self.parameters = []
69	1	self.classes = []
70
71		# Whether model has been trained
72	1	self.is_trained = False
73
74		# Dimensionality of training data
75	1	self.train_data_dim = 0
76
77	1	def add_intercept(self, X):
78		"""Add 1's to data as last features."""
79		# Data shape
80		N, D = X.shape
81
82		# Check if there's not already an intercept column
83		if np.any(np.sum(X, axis=0) == N):
84
85		# Report
86		print('Intercept is not the last feature. Swapping..')
87
88		# Find which column contains the intercept
89		intercept_index = np.argwhere(np.sum(X, axis=0) == N)
90
91		# Swap intercept to last
92		X = X[:, np.setdiff1d(np.arange(D), intercept_index)]
93
94		# Add intercept as last column
95		X = np.hstack((X, np.ones((N, 1))))
96
97		# Append column of 1's to data, and increment dimensionality
98		return X, D+1
99
100	1	def remove_intercept(self, X):
101		"""Remove 1's from data as last features."""
102		# Data shape
103	1	N, D = X.shape
104
105		# Find which column contains the intercept
106	1	intercept_index = np.argwhere(np.sum(X, axis=0) == N)
107
108		# Swap intercept to last
109	1	X = X[:, np.setdiff1d(np.arange(D), intercept_index)]
110
111	1	return X, D-1
112
113	1	def project_simplex(self, v, z=1.0):
114		"""
115		Project vector onto simplex using sorting.
116
117		Reference: "Efficient Projections onto the L1-Ball for Learning in High
118		Dimensions (Duchi, Shalev-Shwartz, Singer, Chandra, 2006)."
119
120		INPUT (1) array 'v': vector to be projected (n dimensions by 0)
121		(2) float 'z': constant (def: 1.0)
122		OUTPUT (1) array 'w': projected vector (n dimensions by 0)
123		"""
124		# Number of dimensions
125	1	n = v.shape[0]
126
127		# Sort vector
128	1	mu = np.sort(v, axis=0)[::-1]
129
130		# Find rho
131	1	C = np.cumsum(mu) - z
132	1	j = np.arange(n) + 1
133	1	rho = j[mu - C/j > 0][-1]
134
135		# Define theta
136	1	theta = C[mu - C/j > 0][-1] / float(rho)
137
138		# Subtract theta from original vector and cap at 0
139	1	w = np.maximum(v - theta, 0)
140
141		# Return projected vector
142	1	return w
143
144	1	def learning_rate_t(self, t):
145		"""Compute current learning rate after decay."""
146		# Select rate decay
147	1	if self.rate_decay == 'linear':
148
149		# Linear dropoff between t=0 and t=T
150	1	alpha = (self.max_iter - t)/(self.learning_rate*self.max_iter)
151
152		elif self.rate_decay == 'quadratic':
153
154		# Quadratic dropoff between t=0 and t=T
155		alpha = ((self.max_iter - t)/(self.learning_rateself.max_iter))*2
156
157		elif self.rate_decay == 'geometric':
158
159		# Drop rate off inversely to time
160		alpha = 1 / (self.learning_rate * t)
161
162		elif self.rate_decay == 'exponential':
163
164		# Exponential dropoff
165		alpha = np.exp(-self.learning_rate * t)
166
167		else:
168		raise ValueError('Rate decay type unknown.')
169
170	1	return alpha
171
172	1	def risk(self, Z, theta, q):
173		"""
174		Compute target contrastive pessimistic risk.
175
176		INPUT (1) array 'Z': target samples (M samples by D features)
177		(2) array 'theta': classifier parameters (D features by
178		K classes)
179		(3) array 'q': soft labels (M samples by K classes)
180		OUTPUT (1) float: risk
181		"""
182		# Number of classes
183	1	K = q.shape[1]
184
185		# Compute negative log-likelihood
186	1	L = self.neg_log_likelihood(Z, theta)
187
188		# Weight loss by soft labels
189	1	for k in range(K):
190	1	L[:, k] *= q[:, k]
191
192		# Sum over weighted losses
193	1	L = np.sum(L, axis=1)
194
195		# Risk is average loss
196	1	return np.mean(L, axis=0)
197
198	1	def neg_log_likelihood(self, X, theta):
199		"""
200		Compute negative log-likelihood under Gaussian distributions.
201
202		INPUT (1) array 'X': data (N samples by D features)
203		(2) tuple 'theta': containing class proportions 'pi', class
204		means 'mu', and class-covariances 'Si'
205		OUTPUT (1) array 'L': loss (N samples by K classes)
206		"""
207		# Unpack parameters
208	1	pi, mu, Si = theta
209
210		# Check if parameter sets match
211	1	assert pi.shape[1] == mu.shape[0]
212	1	assert mu.shape[1] == Si.shape[0]
213	1	assert Si.shape[0] == Si.shape[1]
214
215		# Number of classes
216	1	K = pi.shape[1]
217
218		# Data shape
219	1	N, D = X.shape
220
221		# Preallocate loss array
222	1	L = np.zeros((N, K))
223
224	1	for k in range(K):
225
226		# Check for linear or quadratic
227	1	if self.loss == 'lda':
228
229	1	try:
230		# Probability under k-th Gaussian with shared covariance
231	1	probs = mvn.pdf(X, mu[k, :], Si)
232
233		except al.LinAlgError as err:
234		print('Covariance matrix is singular. Add regularization.')
235		raise err
236
237		elif self.loss == 'qda':
238
239		try:
240		# Probability under k-th Gaussian with own covariance
241		probs = mvn.pdf(X, mu[k, :], Si[:, :, k])
242
243		except al.LinAlgError as err:
244		print('Covariance matrix is singular. Add regularization.')
245		raise err
246
247		else:
248		raise ValueError('Loss unknown.')
249
250		# Negative log-likelihood
251	1	L[:, k] = -np.log(pi[0, k]) - np.log(probs)
252
253	1	return L
254
255	1	def discriminant_parameters(self, X, Y):
256		"""
257		Estimate parameters of Gaussian distribution for discriminant analysis.
258
259		INPUT (1) array 'X': data array (N samples by D features)
260		(2) array 'Y': label array (N samples by K classes)
261		OUTPUT (1) array 'pi': class proportions (1 by K classes)
262		(2) array 'mu': class means (K classes by D features)
263		(3) array 'Si': class covariances (D features D features by
264		K classes)
265		"""
266		# Check labels
267	1	K = Y.shape[1]
268	1	assert K > 1
269
270		# Data shape
271	1	N, D = X.shape
272
273		# Preallocate parameter arrays
274	1	pi = np.zeros((1, K))
275	1	mu = np.zeros((K, D))
276	1	Si = np.zeros((D, D, K))
277
278		# For each class
279	1	for k in range(K):
280
281		# Number of samples for current class
282	1	Nk = np.sum(Y[:, k], axis=0)
283
284		# Check for no samples assigned to certain class
285	1	if Nk == 0:
286
287		# Proportion of samples for current class
288		pi[0, k] = 0
289
290		# Mean of current class
291		mu[k, :] = np.zeros((1, D))
292
293		# Covariance of current class
294		Si[:, :, k] = np.eye(D, D)
295
296		else:
297
298		# Proportion of samples for current class
299	1	pi[0, k] = Nk / N
300
301		# Mean of current class
302	1	mu[k, :] = np.dot(Y[:, k].T, X) / Nk
303
304		# Subtract mean from data
305	1	X_ = X - mu[k, :]
306
307		# Diagonalize current label vector
308	1	dYk = np.diag(Y[:, k])
309
310		# Covariance of current class
311	1	Si[:, :, k] = np.dot(np.dot(X_.T, dYk), X_) / Nk
312
313		# Regularization
314	1	Si[:, :, k] = regularize_matrix(Si[:, :, k], a=self.l2)
315
316		# Check for linear or quadratic discriminant analysis
317	1	if self.loss == 'lda':
318
319		# In LDA, the class-covariance matrices are combined
320	1	Si = self.combine_class_covariances(Si, pi)
321
322		# Check for numerical problems
323	1	if np.any(np.isnan(mu)) or np.any(np.isnan(Si)):
324		raise RuntimeError('Parameter estimate is NaN.')
325
326	1	return pi, mu, Si
327
328	1	def combine_class_covariances(self, Si, pi):
329		"""
330		Linear combination of class covariance matrices.
331
332		INPUT (1) array 'Si': Covariance matrix (D features by D features by
333		K classes)
334		(2) array 'pi': class proportions (1 by K classes)
335		OUTPUT (1) array 'Si': Combined covariance matrix (D by D)
336		"""
337		# Number of classes
338	1	K = Si.shape[2]
339
340		# Check if w is size K
341	1	assert pi.shape[1] == K
342
343		# For each class
344	1	for k in range(K):
345
346		# Weight each class-covariance
347	1	Si[:, :, k] = Si[:, :, k] * pi[0, k]
348
349		# Sum over weighted class-covariances
350	1	return np.sum(Si, axis=2)
351
352	1	def tcpr_da(self, X, y, Z):
353		"""
354		Target Contrastive Pessimistic Risk - discriminant analysis.
355
356		INPUT (1) array 'X': source data (N samples by D features)
357		(2) array 'y': source labels (N samples by 1)
358		(3) array 'Z': target data (M samples by D features)
359		OUTPUT (1) array 'theta': classifier parameters (D features by K
360		classes)
361		"""
362		# Data shapes
363	1	N, DX = X.shape
364	1	M, DZ = Z.shape
365
366		# Assert equivalent dimensionalities
367	1	if not DX == DZ:
368		raise ValueError('Dimensionalities of X and Z should be equal.')
369
370		# Augment data with bias if necessary
371	1	X, DX = self.remove_intercept(X)
372	1	Z, DZ = self.remove_intercept(Z)
373
374		# Map labels to one-hot-encoding
375	1	Y, classes = one_hot(y)
376
377		# Number of classes
378	1	K = len(classes)
379
380		# Check for at least 2 classes
381	1	assert K > 1
382
383		# Estimate parameters of source model
384	1	theta_ref = self.discriminant_parameters(X, Y)
385
386		# Loss is negative log-likelihood under reference parameters
387	1	L_ref = self.neg_log_likelihood(Z, theta_ref)
388
389		# Initialize target posterior
390	1	q = np.ones((M, K)) / K
391
392	1	print('Starting TCP optimization')
393
394	1	TCPRt = np.inf
395	1	for t in range(self.max_iter):
396
397		# Maximization phase
398
399		# Estimate parameters using TCP risk
400	1	theta_tcp = self.discriminant_parameters(Z, q)
401
402		# Minimization phase
403
404		# Compute loss under new parameters
405	1	L_tcp = self.neg_log_likelihood(Z, theta_tcp)
406
407		# Gradient is difference in losses
408	1	Dq = L_tcp - L_ref
409
410		# Update learning rate
411	1	alpha = self.learning_rate_t(t)
412
413		# Steepest descent step
414	1	q -= alpha*Dq
415
416		# Project back onto simplex
417	1	for m in range(M):
418	1	q[m, :] = self.project_simplex(q[m, :])
419
420		# Monitor progress
421
422		# Check for
423
424		# Risks of current parameters
425	1	R_tcp = self.risk(Z, theta_tcp, q)
426	1	R_ref = self.risk(Z, theta_ref, q)
427
428		# Assert no numerical problems
429	1	if np.isnan(R_tcp) or np.isnan(R_ref):
430		raise RuntimeError('Objective is NaN.')
431
432		# Current TCP risk
433	1	TCPRt_ = R_tcp - R_ref
434
435		# Change in risk difference for this iteration
436	1	dR = al.norm(TCPRt - TCPRt_)
437
438		# Check for change smaller than tolerance
439	1	if (dR < self.tolerance):
440	1	print('Broke at iteration '+str(t)+', TCP Risk = '+str(TCPRt_))
441	1	break
442
443		# Report progress
444	1	if (t % 100) == 1:
445	1	print('Iteration ' + str(t) + '/' + str(self.max_iter) +
446		', TCP Risk = ' + str(TCPRt_))
447
448		# Update
449	1	TCPRt = TCPRt_
450
451		# Return estimated parameters
452	1	return theta_tcp
453
454	1	def fit(self, X, y, Z):
455		"""
456		Fit/train an importance-weighted classifier.
457
458		INPUT (1) array 'X': source data (N samples by D features)
459		(2) array 'y': source labels (N samples by 1)
460		(3) array 'Z': target data (M samples by D features)
461		OUTPUT None
462		"""
463		# Data shapes
464	1	N, DX = X.shape
465	1	M, DZ = Z.shape
466
467		# Assert equivalent dimensionalities
468	1	if not DX == DZ:
469		raise ValueError('Dimensionalities of X and Z should be equal.')
470
471	1	if self.loss in ['lda', 'qda']:
472
473		# Discriminant analysis model for TCPR
474	1	self.parameters = self.tcpr_da(X, y, Z)
475
476		else:
477		# Other loss functions are not implemented
478		raise ValueError('Loss function unknown.')
479
480		# Extract and store classes
481	1	self.classes = np.unique(y)
482
483		# Mark classifier as trained
484	1	self.is_trained = True
485
486		# Store training data dimensionality
487	1	self.train_data_dim = DX
488
489	1	def predict(self, Z_):
490		"""
491		Make predictions on new dataset.
492
493		INPUT (1) array 'Z_': new data set (M samples by D features)
494		OUTPUT (2) array 'preds': label predictions (M samples by 1)
495		"""
496		# Data shape
497	1	M, D = Z_.shape
498
499		# If classifier is trained, check for same dimensionality
500	1	if self.is_trained:
501	1	assert D == self.train_data_dim
502
503	1	if self.loss in ['lda', 'qda']:
504
505		# Compute probabilities under each distribution
506	1	probs = self.neg_log_likelihood(Z_, self.parameters)
507
508		# Take largest probability as predictions
509	1	preds = np.argmax(probs, axis=1)
510
511		# Map predictions back to original labels
512	1	preds = self.classes[preds]
513
514		# Return predictions array
515	1	return preds
516
517	1	def get_params(self):
518		"""Return classifier parameters."""
519		# Check if classifier is trained
520		if self.is_trained:
521		return self.parameters
522
523		else:
524		# Throw soft error
525		print('Classifier is not trained yet.')
526		return []
527
528	1	def error_rate(self, preds, u_):
529		"""Compute classification error rate."""
530		return np.mean(preds != u_, axis=0)
531

wmkouw / libTLDA

Push — master ( 582254...17fb6a )

TargetContrastivePessimisticClassifier B

Complexity

Size/Duplication

Test Coverage

Importance

14 Methods

How to fix Complexity

Complex Class

Duplication Side-by-Side

Filter issues like