SubspaceAlignedClassifier.subspace_alignment() - Code Metrics - Inspection of "More work on unit tests and docstrings." - wmkouw/libTLDA - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( fe60a9...f3d068 )

by Wouter

created 2018-06-15 19:16 UTC

SubspaceAlignedClassifier.subspace_alignment() A

↳ Parent: SubspaceAlignedClassifier

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	8
CRAP Score	2.0054

Importance

Changes

Metric	Value
cc	2
dl	0
loc	47
ccs	8
cts	9
cp	0.8889
crap	2.0054
rs	9.0303
c	0
b	0
f	0

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import numpy as np
import scipy.stats as st
from scipy.sparse.linalg import eigs
from scipy.spatial.distance import cdist
import sklearn as sk
from sklearn.decomposition import PCA
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.model_selection import cross_val_predict
from os.path import basename

from .util import is_pos_def


class SubspaceAlignedClassifier(object):
    """
    Class of classifiers based on Subspace Alignment.

    Methods contain the alignment itself, classifiers and general utilities.
    """

    def __init__(self, loss='logistic', l2=1.0, num_components=1):

        """
        Select a particular type of subspace aligned classifier.

        Arguments
        ---------
        loss : str
            loss function for weighted classifier, options: 'logistic',
            'quadratic', 'hinge' (def: 'logistic')
        l2 : float
            l2-regularization parameter value (def:0.01)
        num_components : int
            number of transfer components to maintain (def: 1)

        Returns
        -------
        None

        Examples
        --------
        clf = SubspaceAlignedClassifier(loss='hinge', l2=0.1)

        """
        self.loss = loss
        self.l2 = l2
        self.num_components = num_components

        # Initialize untrained classifiers
        if self.loss == 'logistic':
            # Logistic regression model
            self.clf = LogisticRegression()
        elif self.loss == 'quadratic':
            # Least-squares model
            self.clf = LinearRegression()
        elif self.loss == 'hinge':
            # Linear support vector machine
            self.clf = LinearSVC()
        else:
            # Other loss functions are not implemented
            raise NotImplementedError('Loss function not implemented.')

        # Whether model has been trained
        self.is_trained = False

        # Dimensionality of training data
        self.train_data_dim = ''

    def subspace_alignment(self, X, Z, num_components=1):
        """
        Compute subspace and alignment matrix.

        Arguments
        ---------
        X : array
            source data set (N samples by D features)
        Z : array
            target data set (M samples by D features)
        num_components : int
            number of components (def: 1)

        Returns
        -------
        V : array
            transformation matrix (D features by D features)
        CX : array
            source principal component coefficients
        CZ : array
            target principal component coefficients

        Examples
        --------
        X = np.random.randn(100, 10)
        Z = np.random.randn(100, 10)*2 + 1
        clf = SubspaceAlignedClassifier()
        V, CX, CZ = clf.subspace_alignment(X, Z, num_components=2)

        """
        # Data shapes
        N, DX = X.shape
        M, DZ = Z.shape

        # Assert equivalent dimensionalities
        if not DX == DZ:
            raise ValueError('Dimensionalities of X and Z should be equal.')

        # Compute principal components
        CX = PCA(n_components=num_components, whiten=True).fit(X).components_.T
        CZ = PCA(n_components=num_components, whiten=True).fit(Z).components_.T

        # Aligned source components
        V = np.dot(CX.T, CZ)

        # Return transformation matrix and principal component coefficients
        return V, CX, CZ

    def fit(self, X, y, Z):
        """
        Fit/train a classifier on data mapped onto transfer components.

        Arguments
        X : array
            source data (N samples by D features)
        y : array
            source labels (N samples by 1)
        Z : array
            target data (M samples by D features)

        Returns
        -------
        None

        Examples
        --------
        X = np.random.randn(10, 2)
        y = np.vstack((-np.ones((5,)), np.ones((5,))))
        Z = np.random.randn(10, 2)
        clf = SubspaceAlignedClassifier()
        clf.fit(X, y, Z)

        """
        # Data shapes
        N, DX = X.shape
        M, DZ = Z.shape

        # Assert equivalent dimensionalities
        if not DX == DZ:
            raise ValueError('Dimensionalities of X and Z should be equal.')

        # Transfer component analysis
        V, CX, CZ = self.subspace_alignment(X, Z,
                                            num_components=self.num_components)

        # Store target subspace
        self.target_subspace = CZ

        # Map source data onto source principal components
        X = np.dot(X, CX)

        # Align source data to target subspace
        X = np.dot(X, V)

        # Train a weighted classifier
        if self.loss == 'logistic':
            # Logistic regression model with sample weights
            self.clf.fit(X, y)
        elif self.loss == 'quadratic':
            # Least-squares model with sample weights
            self.clf.fit(X, y)
        elif self.loss == 'hinge':
            # Linear support vector machine with sample weights
            self.clf.fit(X, y)
        else:
            # Other loss functions are not implemented
            raise NotImplementedError

        # Mark classifier as trained
        self.is_trained = True

        # Store training data dimensionality
        self.train_data_dim = DX

    def predict(self, Z_, whiten=False):
        """
        Make predictions on new dataset.

        Arguments
        ---------
        Z_ : array
            new data set (M samples by D features)
        whiten : boolean
            whether to whiten new data (def: false)

        Returns
        -------
        preds : array
            label predictions (M samples by 1)

        Examples
        --------
        X = np.random.randn(10, 2)
        y = np.vstack((-np.ones((5,)), np.ones((5,))))
        Z = np.random.randn(10, 2)
        clf = SubspaceAlignedClassifier()
        clf.fit(X, y, Z)
        preds = clf.predict(Z)

        """
        # Data shape
        M, D = Z_.shape

        # If classifier is trained, check for same dimensionality
        if self.is_trained:
            assert self.train_data_dim == D

        # Check for need to whiten data beforehand
        if whiten:
            Z_ = st.zscore(Z_)

        # Map new target data onto target subspace
        Z_ = np.dot(Z_, self.target_subspace)

        # Call scikit's predict function
        preds = self.clf.predict(Z_)

        # For quadratic loss function, correct predictions
        if self.loss == 'quadratic':
            preds = (np.sign(preds)+1)/2.

        # Return predictions array
        return preds

    def get_params(self):
        """Get classifier parameters."""
        return self.clf.get_params()

    def is_trained(self):
        """Check whether classifier is trained."""
        return self.is_trained


1			#!/usr/bin/env python
2			# -- coding: utf-8 --
3
4	1		import numpy as np
5	1		import scipy.stats as st
6	1		from scipy.sparse.linalg import eigs
7	1		from scipy.spatial.distance import cdist
8	1		import sklearn as sk
9	1		from sklearn.decomposition import PCA
10	1		from sklearn.svm import LinearSVC
11	1		from sklearn.linear_model import LogisticRegression, LinearRegression
12	1		from sklearn.model_selection import cross_val_predict
13	1		from os.path import basename
14
15	1		from .util import is_pos_def
16
17
18	1		class SubspaceAlignedClassifier(object):
19			"""
20			Class of classifiers based on Subspace Alignment.
21
22			Methods contain the alignment itself, classifiers and general utilities.
23			"""
24
25	1	View Code Duplication	def __init__(self, loss='logistic', l2=1.0, num_components=1):
			0 ignored issues – show Duplication introduced 2018-06-14 07:43 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
26			"""
27			Select a particular type of subspace aligned classifier.
28
29			Arguments
30			---------
31			loss : str
32			loss function for weighted classifier, options: 'logistic',
33			'quadratic', 'hinge' (def: 'logistic')
34			l2 : float
35			l2-regularization parameter value (def:0.01)
36			num_components : int
37			number of transfer components to maintain (def: 1)
38
39			Returns
40			-------
41			None
42
43			Examples
44			--------
45			clf = SubspaceAlignedClassifier(loss='hinge', l2=0.1)
46
47			"""
48	1		self.loss = loss
49	1		self.l2 = l2
50	1		self.num_components = num_components
51
52			# Initialize untrained classifiers
53	1		if self.loss == 'logistic':
54			# Logistic regression model
55	1		self.clf = LogisticRegression()
56			elif self.loss == 'quadratic':
57			# Least-squares model
58			self.clf = LinearRegression()
59			elif self.loss == 'hinge':
60			# Linear support vector machine
61			self.clf = LinearSVC()
62			else:
63			# Other loss functions are not implemented
64			raise NotImplementedError('Loss function not implemented.')
65
66			# Whether model has been trained
67	1		self.is_trained = False
68
69			# Dimensionality of training data
70	1		self.train_data_dim = ''
71
72	1		def subspace_alignment(self, X, Z, num_components=1):
73			"""
74			Compute subspace and alignment matrix.
75
76			Arguments
77			---------
78			X : array
79			source data set (N samples by D features)
80			Z : array
81			target data set (M samples by D features)
82			num_components : int
83			number of components (def: 1)
84
85			Returns
86			-------
87			V : array
88			transformation matrix (D features by D features)
89			CX : array
90			source principal component coefficients
91			CZ : array
92			target principal component coefficients
93
94			Examples
95			--------
96			X = np.random.randn(100, 10)
97			Z = np.random.randn(100, 10)*2 + 1
98			clf = SubspaceAlignedClassifier()
99			V, CX, CZ = clf.subspace_alignment(X, Z, num_components=2)
100
101			"""
102			# Data shapes
103	1		N, DX = X.shape
104	1		M, DZ = Z.shape
105
106			# Assert equivalent dimensionalities
107	1		if not DX == DZ:
108			raise ValueError('Dimensionalities of X and Z should be equal.')
109
110			# Compute principal components
111	1		CX = PCA(n_components=num_components, whiten=True).fit(X).components_.T
112	1		CZ = PCA(n_components=num_components, whiten=True).fit(Z).components_.T
113
114			# Aligned source components
115	1		V = np.dot(CX.T, CZ)
116
117			# Return transformation matrix and principal component coefficients
118	1		return V, CX, CZ
119
120	1		def fit(self, X, y, Z):
121			"""
122			Fit/train a classifier on data mapped onto transfer components.
123
124			Arguments
125			X : array
126			source data (N samples by D features)
127			y : array
128			source labels (N samples by 1)
129			Z : array
130			target data (M samples by D features)
131
132			Returns
133			-------
134			None
135
136			Examples
137			--------
138			X = np.random.randn(10, 2)
139			y = np.vstack((-np.ones((5,)), np.ones((5,))))
140			Z = np.random.randn(10, 2)
141			clf = SubspaceAlignedClassifier()
142			clf.fit(X, y, Z)
143
144			"""
145			# Data shapes
146	1		N, DX = X.shape
147	1		M, DZ = Z.shape
148
149			# Assert equivalent dimensionalities
150	1		if not DX == DZ:
151			raise ValueError('Dimensionalities of X and Z should be equal.')
152
153			# Transfer component analysis
154	1		V, CX, CZ = self.subspace_alignment(X, Z,
155			num_components=self.num_components)
156
157			# Store target subspace
158	1		self.target_subspace = CZ
159
160			# Map source data onto source principal components
161	1		X = np.dot(X, CX)
162
163			# Align source data to target subspace
164	1		X = np.dot(X, V)
165
166			# Train a weighted classifier
167	1		if self.loss == 'logistic':
168			# Logistic regression model with sample weights
169	1		self.clf.fit(X, y)
170			elif self.loss == 'quadratic':
171			# Least-squares model with sample weights
172			self.clf.fit(X, y)
173			elif self.loss == 'hinge':
174			# Linear support vector machine with sample weights
175			self.clf.fit(X, y)
176			else:
177			# Other loss functions are not implemented
178			raise NotImplementedError
179
180			# Mark classifier as trained
181	1		self.is_trained = True
182
183			# Store training data dimensionality
184	1		self.train_data_dim = DX
185
186	1		def predict(self, Z_, whiten=False):
187			"""
188			Make predictions on new dataset.
189
190			Arguments
191			---------
192			Z_ : array
193			new data set (M samples by D features)
194			whiten : boolean
195			whether to whiten new data (def: false)
196
197			Returns
198			-------
199			preds : array
200			label predictions (M samples by 1)
201
202			Examples
203			--------
204			X = np.random.randn(10, 2)
205			y = np.vstack((-np.ones((5,)), np.ones((5,))))
206			Z = np.random.randn(10, 2)
207			clf = SubspaceAlignedClassifier()
208			clf.fit(X, y, Z)
209			preds = clf.predict(Z)
210
211			"""
212			# Data shape
213	1		M, D = Z_.shape
214
215			# If classifier is trained, check for same dimensionality
216	1		if self.is_trained:
217	1		assert self.train_data_dim == D
218
219			# Check for need to whiten data beforehand
220	1		if whiten:
221			Z_ = st.zscore(Z_)
222
223			# Map new target data onto target subspace
224	1		Z_ = np.dot(Z_, self.target_subspace)
225
226			# Call scikit's predict function
227	1		preds = self.clf.predict(Z_)
228
229			# For quadratic loss function, correct predictions
230	1		if self.loss == 'quadratic':
231			preds = (np.sign(preds)+1)/2.
232
233			# Return predictions array
234	1		return preds
235
236	1		def get_params(self):
237			"""Get classifier parameters."""
238			return self.clf.get_params()
239
240	1		def is_trained(self):
241			"""Check whether classifier is trained."""
242			return self.is_trained
243

wmkouw / libTLDA

Push — master ( fe60a9...f3d068 )

SubspaceAlignedClassifier.subspace_alignment() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like