TargetContrastivePessimisticClassifier.combine_class_covariances() - Code Metrics - Inspection of "Added sphinx and readthedocs." - wmkouw/libTLDA - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( f3d068...88fa67 )

by Wouter

created 2018-06-17 15:54 UTC

combine_class_covariances() B

↳ Parent: TargetContrastivePessimisticClassifier

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	6
CRAP Score	3.0261

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
cc	3
c	1
b	0
f	0
dl	0
loc	33
ccs	6
cts	7
cp	0.8571
crap	3.0261
rs	8.8571

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import numpy as np
import numpy.linalg as al
from scipy.stats import multivariate_normal as mvn
import sklearn as sk
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.model_selection import cross_val_predict
from os.path import basename

from .util import one_hot, regularize_matrix


class TargetContrastivePessimisticClassifier(object):
    """
    Classifiers based on Target Contrastive Pessimistic Risk minimization.

    Methods contain models, risk functions, parameter estimation, etc.
    """

    def __init__(self, loss='lda', l2=1.0, max_iter=500, tolerance=1e-12,
                 learning_rate=1.0, rate_decay='linear', verbosity=0):
        """
        Select a particular type of TCPR classifier.

        Parameters
        ----------
        loss : str
            loss function for TCP Risk, options: 'ls', 'least_squares', 'lda',
            'linear_discriminant_analysis', 'qda',
            'quadratic_discriminant_analysis' (def: 'lda')
        l2 : float
            l2-regularization parameter value (def:0.01)
        max_iter : int
            maximum number of iterations for optimization (def: 500)
        tolerance : float
            convergence criterion on the TCP parameters (def: 1e-5)
        learning_rate : float
            parameter for size of update of gradient (def: 1.0)
        rate_decay : str
            type of learning rate decay, options: 'linear', 'quadratic',
            'geometric', 'exponential' (def: 'linear')

        Returns
        -------
        None

        """
        # Classifier options
        self.loss = loss
        self.l2 = l2

        # Optimization parameters
        self.max_iter = max_iter
        self.tolerance = tolerance
        self.learning_rate = learning_rate
        self.rate_decay = rate_decay
        self.verbosity = verbosity

        if self.loss in ['linear discriminant analysis', 'lda']:

            # Set to short name
            self.loss = 'lda'

        elif self.loss in ['quadratic discriminant analysis', 'qda']:

            # Set to short name
            self.loss = 'qda'

        else:
            # Other loss functions are not implemented
            raise ValueError('Model not implemented.')

        # Initialize classifier and classes parameters
        self.parameters = []
        self.classes = []

        # Whether model has been trained
        self.is_trained = False

        # Dimensionality of training data
        self.train_data_dim = 0

    def add_intercept(self, X):
        """Add 1's to data as last features."""
        # Data shape
        N, D = X.shape

        # Check if there's not already an intercept column
        if np.any(np.sum(X, axis=0) == N):

            # Report
            print('Intercept is not the last feature. Swapping..')

            # Find which column contains the intercept
            intercept_index = np.argwhere(np.sum(X, axis=0) == N)

            # Swap intercept to last
            X = X[:, np.setdiff1d(np.arange(D), intercept_index)]

        # Add intercept as last column
        X = np.hstack((X, np.ones((N, 1))))

        # Append column of 1's to data, and increment dimensionality
        return X, D+1

    def remove_intercept(self, X):
        """Remove 1's from data as last features."""
        # Data shape
        N, D = X.shape

        # Find which column contains the intercept
        intercept_index = np.argwhere(np.sum(X, axis=0) == N)

        # Swap intercept to last
        X = X[:, np.setdiff1d(np.arange(D), intercept_index)]

        return X, D-1

    def project_simplex(self, v, z=1.0):
        """
        Project vector onto simplex using sorting.

        Reference: "Efficient Projections onto the L1-Ball for Learning in High
        Dimensions (Duchi, Shalev-Shwartz, Singer, Chandra, 2006)."

        Parameters
        ----------
        v : array
            vector to be projected (n dimensions by 0)
        z : float
            constant (def: 1.0)

        Returns
        -------
        w : array
            projected vector (n dimensions by 0)

        """
        # Number of dimensions
        n = v.shape[0]

        # Sort vector
        mu = np.sort(v, axis=0)[::-1]

        # Find rho
        C = np.cumsum(mu) - z
        j = np.arange(n) + 1
        rho = j[mu - C/j > 0][-1]

        # Define theta
        theta = C[mu - C/j > 0][-1] / float(rho)

        # Subtract theta from original vector and cap at 0
        w = np.maximum(v - theta, 0)

        # Return projected vector
        return w

    def learning_rate_t(self, t):
        """
        Compute current learning rate after decay.

        Parameters
        ----------
        t : int
            current iteration

        Returns
        -------
        alpha : float
            current learning rate

        """
        # Select rate decay
        if self.rate_decay == 'linear':

            # Linear dropoff between t=0 and t=T
            alpha = (self.max_iter - t)/(self.learning_rate*self.max_iter)

        elif self.rate_decay == 'quadratic':

            # Quadratic dropoff between t=0 and t=T
            alpha = ((self.max_iter - t)/(self.learning_rate*self.max_iter))**2

        elif self.rate_decay == 'geometric':

            # Drop rate off inversely to time
            alpha = 1 / (self.learning_rate * t)

        elif self.rate_decay == 'exponential':

            # Exponential dropoff
            alpha = np.exp(-self.learning_rate * t)

        else:
            raise ValueError('Rate decay type unknown.')

        return alpha

    def risk(self, Z, theta, q):
        """
        Compute target contrastive pessimistic risk.

        Parameters
        ----------
        Z : array
            target samples (M samples by D features)
        theta : array
            classifier parameters (D features by K classes)
        q : array
            soft labels (M samples by K classes)

        Returns
        -------
        float
            Value of risk function.

        """
        # Number of classes
        K = q.shape[1]

        # Compute negative log-likelihood
        L = self.neg_log_likelihood(Z, theta)

        # Weight loss by soft labels
        for k in range(K):
            L[:, k] *= q[:, k]

        # Sum over weighted losses
        L = np.sum(L, axis=1)

        # Risk is average loss
        return np.mean(L, axis=0)

    def neg_log_likelihood(self, X, theta):
        """
        Compute negative log-likelihood under Gaussian distributions.

        Parameters
        ----------
        X : array
            data (N samples by D features)
        theta : tuple(array, array, array)
            tuple containing class proportions 'pi', class means 'mu',
            and class-covariances 'Si'

        Returns
        -------
        L : array
            loss (N samples by K classes)

        """
        # Unpack parameters
        pi, mu, Si = theta

        # Check if parameter sets match
        if not pi.shape[1] == mu.shape[0]:
            raise ValueError('Number proportions does not match number means.')

        if not mu.shape[1] == Si.shape[0] == Si.shape[1]:
            raise ValueError('''Dimensions of mean does not match dimensions of
                              covariance matrix.''')

        # Number of classes
        K = pi.shape[1]

        # Data shape
        N, D = X.shape

        # Preallocate loss array
        L = np.zeros((N, K))

        for k in range(K):

            # Check for linear or quadratic
            if self.loss == 'lda':

                try:
                    # Probability under k-th Gaussian with shared covariance
                    probs = mvn.pdf(X, mu[k, :], Si)

                except al.LinAlgError as err:
                    print('Covariance matrix is singular. Add regularization.')
                    raise err

            elif self.loss == 'qda':

                try:
                    # Probability under k-th Gaussian with own covariance
                    probs = mvn.pdf(X, mu[k, :], Si[:, :, k])

                except al.LinAlgError as err:
                    print('Covariance matrix is singular. Add regularization.')
                    raise err

            else:
                raise ValueError('Loss unknown.')

            # Negative log-likelihood
            L[:, k] = -np.log(pi[0, k]) - np.log(probs)

        return L

    def discriminant_parameters(self, X, Y):
        """
        Estimate parameters of Gaussian distribution for discriminant analysis.

        Parameters
        ----------
        X : array
            data array (N samples by D features)
        Y : array
            label array (N samples by K classes)

        Returns
        -------
        pi : array
            class proportions (1 by K classes)
        mu : array
            class means (K classes by D features)
        Si : array
            class covariances (D features D features by K classes)

        """
        # Check labels
        K = Y.shape[1]

        # Check for sufficient labels
        if not K > 1:
            raise ValueError('Number of classes should be larger than 1.')

        # Data shape
        N, D = X.shape

        # Preallocate parameter arrays
        pi = np.zeros((1, K))
        mu = np.zeros((K, D))
        Si = np.zeros((D, D, K))

        # For each class
        for k in range(K):

            # Number of samples for current class
            Nk = np.sum(Y[:, k], axis=0)

            # Check for no samples assigned to certain class
            if Nk == 0:

                # Proportion of samples for current class
                pi[0, k] = 0

                # Mean of current class
                mu[k, :] = np.zeros((1, D))

                # Covariance of current class
                Si[:, :, k] = np.eye(D, D)

            else:

                # Proportion of samples for current class
                pi[0, k] = Nk / N

                # Mean of current class
                mu[k, :] = np.dot(Y[:, k].T, X) / Nk

                # Subtract mean from data
                X_ = X - mu[k, :]

                # Diagonalize current label vector
                dYk = np.diag(Y[:, k])

                # Covariance of current class
                Si[:, :, k] = np.dot(np.dot(X_.T, dYk), X_) / Nk

                # Regularization
                Si[:, :, k] = regularize_matrix(Si[:, :, k], a=self.l2)

        # Check for linear or quadratic discriminant analysis
        if self.loss == 'lda':

            # In LDA, the class-covariance matrices are combined
            Si = self.combine_class_covariances(Si, pi)

        # Check for numerical problems
        if np.any(np.isnan(mu)) or np.any(np.isnan(Si)):
            raise RuntimeError('Parameter estimate is NaN.')

        return pi, mu, Si

    def combine_class_covariances(self, Si, pi):
        """
        Linear combination of class covariance matrices.

        Parameters
        ----------
        Si : array
            Covariance matrix (D features by D features by K classes)
        pi : array
            class proportions (1 by K classes)

        Returns
        -------
        Si : array
            Combined covariance matrix (D by D)

        """
        # Number of classes
        K = Si.shape[2]

        # Check if w is size K
        if not pi.shape[1] == K:
            raise ValueError('''Number of proportions does not match with number
                             classes of covariance matrix.''')

        # For each class
        for k in range(K):

            # Weight each class-covariance
            Si[:, :, k] = Si[:, :, k] * pi[0, k]

        # Sum over weighted class-covariances
        return np.sum(Si, axis=2)

    def tcpr_da(self, X, y, Z):
        """
        Target Contrastive Pessimistic Risk - discriminant analysis.

        Parameters
        ----------
        X : array
            source data (N samples by D features)
        y : array
            source labels (N samples by 1)
        Z : array
            target data (M samples by D features)

        Returns
        -------
        theta : array
            classifier parameters (D features by K classes)

        """
        # Data shapes
        N, DX = X.shape
        M, DZ = Z.shape

        # Assert equivalent dimensionalities
        if not DX == DZ:
            raise ValueError('Dimensionalities of X and Z should be equal.')

        # Augment data with bias if necessary
        X, DX = self.remove_intercept(X)
        Z, DZ = self.remove_intercept(Z)

        # Map labels to one-hot-encoding
        Y, classes = one_hot(y)

        # Number of classes
        K = len(classes)

        # Check for at least 2 classes
        if not K > 1:
            raise ValueError('Number of classes should be larger than 1.')

        # Estimate parameters of source model
        theta_ref = self.discriminant_parameters(X, Y)

        # Loss is negative log-likelihood under reference parameters
        L_ref = self.neg_log_likelihood(Z, theta_ref)

        # Initialize target posterior
        q = np.ones((M, K)) / K

        print('Starting TCP optimization')

        TCPRt = np.inf
        for t in range(self.max_iter):

            # Maximization phase

            # Estimate parameters using TCP risk
            theta_tcp = self.discriminant_parameters(Z, q)

            # Minimization phase

            # Compute loss under new parameters
            L_tcp = self.neg_log_likelihood(Z, theta_tcp)

            # Gradient is difference in losses
            Dq = L_tcp - L_ref

            # Update learning rate
            alpha = self.learning_rate_t(t)

            # Steepest descent step
            q -= alpha*Dq

            # Project back onto simplex
            for m in range(M):
                q[m, :] = self.project_simplex(q[m, :])

            # Monitor progress

            # Risks of current parameters
            R_tcp = self.risk(Z, theta_tcp, q)
            R_ref = self.risk(Z, theta_ref, q)

            # Assert no numerical problems
            if np.isnan(R_tcp) or np.isnan(R_ref):
                raise RuntimeError('Objective is NaN.')

            # Current TCP risk
            TCPRt_ = R_tcp - R_ref

            # Change in risk difference for this iteration
            dR = al.norm(TCPRt - TCPRt_)

            # Check for change smaller than tolerance
            if (dR < self.tolerance):
                print('Broke at iteration '+str(t)+', TCP Risk = '+str(TCPRt_))
                break

            # Report progress
            if (t % 100) == 1:
                print('Iteration ' + str(t) + '/' + str(self.max_iter) +
                      ', TCP Risk = ' + str(TCPRt_))

            # Update
            TCPRt = TCPRt_

        # Return estimated parameters
        return theta_tcp

    def fit(self, X, y, Z):
        """
        Fit/train an importance-weighted classifier.

        Parameters
        ----------
        X : array
            source data (N samples by D features)
        y : array
            source labels (N samples by 1)
        Z : array
            target data (M samples by D features)

        Returns
        -------
        None

        """
        # Data shapes
        N, DX = X.shape
        M, DZ = Z.shape

        # Assert equivalent dimensionalities
        if not DX == DZ:
            raise ValueError('Dimensionalities of X and Z should be equal.')

        if self.loss in ['lda', 'qda']:

            # Discriminant analysis model for TCPR
            self.parameters = self.tcpr_da(X, y, Z)

        else:
            # Other loss functions are not implemented
            raise ValueError('Loss function unknown.')

        # Extract and store classes
        self.classes = np.unique(y)

        # Mark classifier as trained
        self.is_trained = True

        # Store training data dimensionality
        self.train_data_dim = DX

    def predict(self, Z_):
        """
        Make predictions on new dataset.
        
        Parameters
        ----------
        Z : array
            new data set (M samples by D features)

        Returns
        -------
        preds : array
            label predictions (M samples by 1)

        """
        # Data shape
        M, D = Z_.shape

        # If classifier is trained, check for same dimensionality
        if self.is_trained:
            if not self.train_data_dim == D:
                raise ValueError('''Test data is of different dimensionality
                                 than training data.''')

        if self.loss in ['lda', 'qda']:

            # Compute probabilities under each distribution
            probs = self.neg_log_likelihood(Z_, self.parameters)

            # Take largest probability as predictions
            preds = np.argmax(probs, axis=1)

        # Map predictions back to original labels
        preds = self.classes[preds]

        # Return predictions array
        return preds

    def get_params(self):
        """Return classifier parameters."""
        # Check if classifier is trained
        if self.is_trained:
            return self.parameters

        else:
            # Throw soft error
            print('Classifier is not trained yet.')
            return []

    def error_rate(self, preds, u_):
        """Compute classification error rate."""
        return np.mean(preds != u_, axis=0)


1		#!/usr/bin/env python
2		# -- coding: utf-8 --
3
4	1	import numpy as np
5	1	import numpy.linalg as al
6	1	from scipy.stats import multivariate_normal as mvn
7	1	import sklearn as sk
8	1	from sklearn.svm import LinearSVC
9	1	from sklearn.linear_model import LogisticRegression, LinearRegression
10	1	from sklearn.model_selection import cross_val_predict
11	1	from os.path import basename
12
13	1	from .util import one_hot, regularize_matrix
14
15
16	1	class TargetContrastivePessimisticClassifier(object):
17		"""
18		Classifiers based on Target Contrastive Pessimistic Risk minimization.
19
20		Methods contain models, risk functions, parameter estimation, etc.
21		"""
22
23	1	def __init__(self, loss='lda', l2=1.0, max_iter=500, tolerance=1e-12,
24		learning_rate=1.0, rate_decay='linear', verbosity=0):
25		"""
26		Select a particular type of TCPR classifier.
27
28		Parameters
29		----------
30		loss : str
31		loss function for TCP Risk, options: 'ls', 'least_squares', 'lda',
32		'linear_discriminant_analysis', 'qda',
33		'quadratic_discriminant_analysis' (def: 'lda')
34		l2 : float
35		l2-regularization parameter value (def:0.01)
36		max_iter : int
37		maximum number of iterations for optimization (def: 500)
38		tolerance : float
39		convergence criterion on the TCP parameters (def: 1e-5)
40		learning_rate : float
41		parameter for size of update of gradient (def: 1.0)
42		rate_decay : str
43		type of learning rate decay, options: 'linear', 'quadratic',
44		'geometric', 'exponential' (def: 'linear')
45
46		Returns
47		-------
48		None
49
50		"""
51		# Classifier options
52	1	self.loss = loss
53	1	self.l2 = l2
54
55		# Optimization parameters
56	1	self.max_iter = max_iter
57	1	self.tolerance = tolerance
58	1	self.learning_rate = learning_rate
59	1	self.rate_decay = rate_decay
60	1	self.verbosity = verbosity
61
62	1	if self.loss in ['linear discriminant analysis', 'lda']:
63
64		# Set to short name
65	1	self.loss = 'lda'
66
67		elif self.loss in ['quadratic discriminant analysis', 'qda']:
68
69		# Set to short name
70		self.loss = 'qda'
71
72		else:
73		# Other loss functions are not implemented
74		raise ValueError('Model not implemented.')
75
76		# Initialize classifier and classes parameters
77	1	self.parameters = []
78	1	self.classes = []
79
80		# Whether model has been trained
81	1	self.is_trained = False
82
83		# Dimensionality of training data
84	1	self.train_data_dim = 0
85
86	1	def add_intercept(self, X):
87		"""Add 1's to data as last features."""
88		# Data shape
89		N, D = X.shape
90
91		# Check if there's not already an intercept column
92		if np.any(np.sum(X, axis=0) == N):
93
94		# Report
95		print('Intercept is not the last feature. Swapping..')
96
97		# Find which column contains the intercept
98		intercept_index = np.argwhere(np.sum(X, axis=0) == N)
99
100		# Swap intercept to last
101		X = X[:, np.setdiff1d(np.arange(D), intercept_index)]
102
103		# Add intercept as last column
104		X = np.hstack((X, np.ones((N, 1))))
105
106		# Append column of 1's to data, and increment dimensionality
107		return X, D+1
108
109	1	def remove_intercept(self, X):
110		"""Remove 1's from data as last features."""
111		# Data shape
112	1	N, D = X.shape
113
114		# Find which column contains the intercept
115	1	intercept_index = np.argwhere(np.sum(X, axis=0) == N)
116
117		# Swap intercept to last
118	1	X = X[:, np.setdiff1d(np.arange(D), intercept_index)]
119
120	1	return X, D-1
121
122	1	def project_simplex(self, v, z=1.0):
123		"""
124		Project vector onto simplex using sorting.
125
126		Reference: "Efficient Projections onto the L1-Ball for Learning in High
127		Dimensions (Duchi, Shalev-Shwartz, Singer, Chandra, 2006)."
128
129		Parameters
130		----------
131		v : array
132		vector to be projected (n dimensions by 0)
133		z : float
134		constant (def: 1.0)
135
136		Returns
137		-------
138		w : array
139		projected vector (n dimensions by 0)
140
141		"""
142		# Number of dimensions
143	1	n = v.shape[0]
144
145		# Sort vector
146	1	mu = np.sort(v, axis=0)[::-1]
147
148		# Find rho
149	1	C = np.cumsum(mu) - z
150	1	j = np.arange(n) + 1
151	1	rho = j[mu - C/j > 0][-1]
152
153		# Define theta
154	1	theta = C[mu - C/j > 0][-1] / float(rho)
155
156		# Subtract theta from original vector and cap at 0
157	1	w = np.maximum(v - theta, 0)
158
159		# Return projected vector
160	1	return w
161
162	1	def learning_rate_t(self, t):
163		"""
164		Compute current learning rate after decay.
165
166		Parameters
167		----------
168		t : int
169		current iteration
170
171		Returns
172		-------
173		alpha : float
174		current learning rate
175
176		"""
177		# Select rate decay
178	1	if self.rate_decay == 'linear':
179
180		# Linear dropoff between t=0 and t=T
181	1	alpha = (self.max_iter - t)/(self.learning_rate*self.max_iter)
182
183		elif self.rate_decay == 'quadratic':
184
185		# Quadratic dropoff between t=0 and t=T
186		alpha = ((self.max_iter - t)/(self.learning_rateself.max_iter))*2
187
188		elif self.rate_decay == 'geometric':
189
190		# Drop rate off inversely to time
191		alpha = 1 / (self.learning_rate * t)
192
193		elif self.rate_decay == 'exponential':
194
195		# Exponential dropoff
196		alpha = np.exp(-self.learning_rate * t)
197
198		else:
199		raise ValueError('Rate decay type unknown.')
200
201	1	return alpha
202
203	1	def risk(self, Z, theta, q):
204		"""
205		Compute target contrastive pessimistic risk.
206
207		Parameters
208		----------
209		Z : array
210		target samples (M samples by D features)
211		theta : array
212		classifier parameters (D features by K classes)
213		q : array
214		soft labels (M samples by K classes)
215
216		Returns
217		-------
218		float
219		Value of risk function.
220
221		"""
222		# Number of classes
223	1	K = q.shape[1]
224
225		# Compute negative log-likelihood
226	1	L = self.neg_log_likelihood(Z, theta)
227
228		# Weight loss by soft labels
229	1	for k in range(K):
230	1	L[:, k] *= q[:, k]
231
232		# Sum over weighted losses
233	1	L = np.sum(L, axis=1)
234
235		# Risk is average loss
236	1	return np.mean(L, axis=0)
237
238	1	def neg_log_likelihood(self, X, theta):
239		"""
240		Compute negative log-likelihood under Gaussian distributions.
241
242		Parameters
243		----------
244		X : array
245		data (N samples by D features)
246		theta : tuple(array, array, array)
247		tuple containing class proportions 'pi', class means 'mu',
248		and class-covariances 'Si'
249
250		Returns
251		-------
252		L : array
253		loss (N samples by K classes)
254
255		"""
256		# Unpack parameters
257	1	pi, mu, Si = theta
258
259		# Check if parameter sets match
260	1	if not pi.shape[1] == mu.shape[0]:
261		raise ValueError('Number proportions does not match number means.')
262
263	1	if not mu.shape[1] == Si.shape[0] == Si.shape[1]:
264		raise ValueError('''Dimensions of mean does not match dimensions of
265		covariance matrix.''')
266
267		# Number of classes
268	1	K = pi.shape[1]
269
270		# Data shape
271	1	N, D = X.shape
272
273		# Preallocate loss array
274	1	L = np.zeros((N, K))
275
276	1	for k in range(K):
277
278		# Check for linear or quadratic
279	1	if self.loss == 'lda':
280
281	1	try:
282		# Probability under k-th Gaussian with shared covariance
283	1	probs = mvn.pdf(X, mu[k, :], Si)
284
285		except al.LinAlgError as err:
286		print('Covariance matrix is singular. Add regularization.')
287		raise err
288
289		elif self.loss == 'qda':
290
291		try:
292		# Probability under k-th Gaussian with own covariance
293		probs = mvn.pdf(X, mu[k, :], Si[:, :, k])
294
295		except al.LinAlgError as err:
296		print('Covariance matrix is singular. Add regularization.')
297		raise err
298
299		else:
300		raise ValueError('Loss unknown.')
301
302		# Negative log-likelihood
303	1	L[:, k] = -np.log(pi[0, k]) - np.log(probs)
304
305	1	return L
306
307	1	def discriminant_parameters(self, X, Y):
308		"""
309		Estimate parameters of Gaussian distribution for discriminant analysis.
310
311		Parameters
312		----------
313		X : array
314		data array (N samples by D features)
315		Y : array
316		label array (N samples by K classes)
317
318		Returns
319		-------
320		pi : array
321		class proportions (1 by K classes)
322		mu : array
323		class means (K classes by D features)
324		Si : array
325		class covariances (D features D features by K classes)
326
327		"""
328		# Check labels
329	1	K = Y.shape[1]
330
331		# Check for sufficient labels
332	1	if not K > 1:
333		raise ValueError('Number of classes should be larger than 1.')
334
335		# Data shape
336	1	N, D = X.shape
337
338		# Preallocate parameter arrays
339	1	pi = np.zeros((1, K))
340	1	mu = np.zeros((K, D))
341	1	Si = np.zeros((D, D, K))
342
343		# For each class
344	1	for k in range(K):
345
346		# Number of samples for current class
347	1	Nk = np.sum(Y[:, k], axis=0)
348
349		# Check for no samples assigned to certain class
350	1	if Nk == 0:
351
352		# Proportion of samples for current class
353		pi[0, k] = 0
354
355		# Mean of current class
356		mu[k, :] = np.zeros((1, D))
357
358		# Covariance of current class
359		Si[:, :, k] = np.eye(D, D)
360
361		else:
362
363		# Proportion of samples for current class
364	1	pi[0, k] = Nk / N
365
366		# Mean of current class
367	1	mu[k, :] = np.dot(Y[:, k].T, X) / Nk
368
369		# Subtract mean from data
370	1	X_ = X - mu[k, :]
371
372		# Diagonalize current label vector
373	1	dYk = np.diag(Y[:, k])
374
375		# Covariance of current class
376	1	Si[:, :, k] = np.dot(np.dot(X_.T, dYk), X_) / Nk
377
378		# Regularization
379	1	Si[:, :, k] = regularize_matrix(Si[:, :, k], a=self.l2)
380
381		# Check for linear or quadratic discriminant analysis
382	1	if self.loss == 'lda':
383
384		# In LDA, the class-covariance matrices are combined
385	1	Si = self.combine_class_covariances(Si, pi)
386
387		# Check for numerical problems
388	1	if np.any(np.isnan(mu)) or np.any(np.isnan(Si)):
389		raise RuntimeError('Parameter estimate is NaN.')
390
391	1	return pi, mu, Si
392
393	1	def combine_class_covariances(self, Si, pi):
394		"""
395		Linear combination of class covariance matrices.
396
397		Parameters
398		----------
399		Si : array
400		Covariance matrix (D features by D features by K classes)
401		pi : array
402		class proportions (1 by K classes)
403
404		Returns
405		-------
406		Si : array
407		Combined covariance matrix (D by D)
408
409		"""
410		# Number of classes
411	1	K = Si.shape[2]
412
413		# Check if w is size K
414	1	if not pi.shape[1] == K:
415		raise ValueError('''Number of proportions does not match with number
416		classes of covariance matrix.''')
417
418		# For each class
419	1	for k in range(K):
420
421		# Weight each class-covariance
422	1	Si[:, :, k] = Si[:, :, k] * pi[0, k]
423
424		# Sum over weighted class-covariances
425	1	return np.sum(Si, axis=2)
426
427	1	def tcpr_da(self, X, y, Z):
428		"""
429		Target Contrastive Pessimistic Risk - discriminant analysis.
430
431		Parameters
432		----------
433		X : array
434		source data (N samples by D features)
435		y : array
436		source labels (N samples by 1)
437		Z : array
438		target data (M samples by D features)
439
440		Returns
441		-------
442		theta : array
443		classifier parameters (D features by K classes)
444
445		"""
446		# Data shapes
447	1	N, DX = X.shape
448	1	M, DZ = Z.shape
449
450		# Assert equivalent dimensionalities
451	1	if not DX == DZ:
452		raise ValueError('Dimensionalities of X and Z should be equal.')
453
454		# Augment data with bias if necessary
455	1	X, DX = self.remove_intercept(X)
456	1	Z, DZ = self.remove_intercept(Z)
457
458		# Map labels to one-hot-encoding
459	1	Y, classes = one_hot(y)
460
461		# Number of classes
462	1	K = len(classes)
463
464		# Check for at least 2 classes
465	1	if not K > 1:
466		raise ValueError('Number of classes should be larger than 1.')
467
468		# Estimate parameters of source model
469	1	theta_ref = self.discriminant_parameters(X, Y)
470
471		# Loss is negative log-likelihood under reference parameters
472	1	L_ref = self.neg_log_likelihood(Z, theta_ref)
473
474		# Initialize target posterior
475	1	q = np.ones((M, K)) / K
476
477	1	print('Starting TCP optimization')
478
479	1	TCPRt = np.inf
480	1	for t in range(self.max_iter):
481
482		# Maximization phase
483
484		# Estimate parameters using TCP risk
485	1	theta_tcp = self.discriminant_parameters(Z, q)
486
487		# Minimization phase
488
489		# Compute loss under new parameters
490	1	L_tcp = self.neg_log_likelihood(Z, theta_tcp)
491
492		# Gradient is difference in losses
493	1	Dq = L_tcp - L_ref
494
495		# Update learning rate
496	1	alpha = self.learning_rate_t(t)
497
498		# Steepest descent step
499	1	q -= alpha*Dq
500
501		# Project back onto simplex
502	1	for m in range(M):
503	1	q[m, :] = self.project_simplex(q[m, :])
504
505		# Monitor progress
506
507		# Risks of current parameters
508	1	R_tcp = self.risk(Z, theta_tcp, q)
509	1	R_ref = self.risk(Z, theta_ref, q)
510
511		# Assert no numerical problems
512	1	if np.isnan(R_tcp) or np.isnan(R_ref):
513		raise RuntimeError('Objective is NaN.')
514
515		# Current TCP risk
516	1	TCPRt_ = R_tcp - R_ref
517
518		# Change in risk difference for this iteration
519	1	dR = al.norm(TCPRt - TCPRt_)
520
521		# Check for change smaller than tolerance
522	1	if (dR < self.tolerance):
523	1	print('Broke at iteration '+str(t)+', TCP Risk = '+str(TCPRt_))
524	1	break
525
526		# Report progress
527	1	if (t % 100) == 1:
528	1	print('Iteration ' + str(t) + '/' + str(self.max_iter) +
529		', TCP Risk = ' + str(TCPRt_))
530
531		# Update
532	1	TCPRt = TCPRt_
533
534		# Return estimated parameters
535	1	return theta_tcp
536
537	1	def fit(self, X, y, Z):
538		"""
539		Fit/train an importance-weighted classifier.
540
541		Parameters
542		----------
543		X : array
544		source data (N samples by D features)
545		y : array
546		source labels (N samples by 1)
547		Z : array
548		target data (M samples by D features)
549
550		Returns
551		-------
552		None
553
554		"""
555		# Data shapes
556	1	N, DX = X.shape
557	1	M, DZ = Z.shape
558
559		# Assert equivalent dimensionalities
560	1	if not DX == DZ:
561		raise ValueError('Dimensionalities of X and Z should be equal.')
562
563	1	if self.loss in ['lda', 'qda']:
564
565		# Discriminant analysis model for TCPR
566	1	self.parameters = self.tcpr_da(X, y, Z)
567
568		else:
569		# Other loss functions are not implemented
570		raise ValueError('Loss function unknown.')
571
572		# Extract and store classes
573	1	self.classes = np.unique(y)
574
575		# Mark classifier as trained
576	1	self.is_trained = True
577
578		# Store training data dimensionality
579	1	self.train_data_dim = DX
580
581	1	def predict(self, Z_):
582		"""
583		Make predictions on new dataset.
584
585		Parameters
586		----------
587		Z : array
588		new data set (M samples by D features)
589
590		Returns
591		-------
592		preds : array
593		label predictions (M samples by 1)
594
595		"""
596		# Data shape
597	1	M, D = Z_.shape
598
599		# If classifier is trained, check for same dimensionality
600	1	if self.is_trained:
601	1	if not self.train_data_dim == D:
602		raise ValueError('''Test data is of different dimensionality
603		than training data.''')
604
605	1	if self.loss in ['lda', 'qda']:
606
607		# Compute probabilities under each distribution
608	1	probs = self.neg_log_likelihood(Z_, self.parameters)
609
610		# Take largest probability as predictions
611	1	preds = np.argmax(probs, axis=1)
612
613		# Map predictions back to original labels
614	1	preds = self.classes[preds]
615
616		# Return predictions array
617	1	return preds
618
619	1	def get_params(self):
620		"""Return classifier parameters."""
621		# Check if classifier is trained
622		if self.is_trained:
623		return self.parameters
624
625		else:
626		# Throw soft error
627		print('Classifier is not trained yet.')
628		return []
629
630	1	def error_rate(self, preds, u_):
631		"""Compute classification error rate."""
632		return np.mean(preds != u_, axis=0)
633

wmkouw / libTLDA

Push — master ( f3d068...88fa67 )

combine_class_covariances() B

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like