Completed
Push — master ( fe60a9...f3d068 )
by Wouter
04:03
created

SubspaceAlignedClassifier.subspace_alignment()   A

Complexity

Conditions 2

Size

Total Lines 47

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 2.0054

Importance

Changes 0
Metric Value
cc 2
dl 0
loc 47
ccs 8
cts 9
cp 0.8889
crap 2.0054
rs 9.0303
c 0
b 0
f 0
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4 1
import numpy as np
5 1
import scipy.stats as st
6 1
from scipy.sparse.linalg import eigs
7 1
from scipy.spatial.distance import cdist
8 1
import sklearn as sk
9 1
from sklearn.decomposition import PCA
10 1
from sklearn.svm import LinearSVC
11 1
from sklearn.linear_model import LogisticRegression, LinearRegression
12 1
from sklearn.model_selection import cross_val_predict
13 1
from os.path import basename
14
15 1
from .util import is_pos_def
16
17
18 1
class SubspaceAlignedClassifier(object):
19
    """
20
    Class of classifiers based on Subspace Alignment.
21
22
    Methods contain the alignment itself, classifiers and general utilities.
23
    """
24
25 1 View Code Duplication
    def __init__(self, loss='logistic', l2=1.0, num_components=1):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
26
        """
27
        Select a particular type of subspace aligned classifier.
28
29
        Arguments
30
        ---------
31
        loss : str
32
            loss function for weighted classifier, options: 'logistic',
33
            'quadratic', 'hinge' (def: 'logistic')
34
        l2 : float
35
            l2-regularization parameter value (def:0.01)
36
        num_components : int
37
            number of transfer components to maintain (def: 1)
38
39
        Returns
40
        -------
41
        None
42
43
        Examples
44
        --------
45
        clf = SubspaceAlignedClassifier(loss='hinge', l2=0.1)
46
47
        """
48 1
        self.loss = loss
49 1
        self.l2 = l2
50 1
        self.num_components = num_components
51
52
        # Initialize untrained classifiers
53 1
        if self.loss == 'logistic':
54
            # Logistic regression model
55 1
            self.clf = LogisticRegression()
56
        elif self.loss == 'quadratic':
57
            # Least-squares model
58
            self.clf = LinearRegression()
59
        elif self.loss == 'hinge':
60
            # Linear support vector machine
61
            self.clf = LinearSVC()
62
        else:
63
            # Other loss functions are not implemented
64
            raise NotImplementedError('Loss function not implemented.')
65
66
        # Whether model has been trained
67 1
        self.is_trained = False
68
69
        # Dimensionality of training data
70 1
        self.train_data_dim = ''
71
72 1
    def subspace_alignment(self, X, Z, num_components=1):
73
        """
74
        Compute subspace and alignment matrix.
75
76
        Arguments
77
        ---------
78
        X : array
79
            source data set (N samples by D features)
80
        Z : array
81
            target data set (M samples by D features)
82
        num_components : int
83
            number of components (def: 1)
84
85
        Returns
86
        -------
87
        V : array
88
            transformation matrix (D features by D features)
89
        CX : array
90
            source principal component coefficients
91
        CZ : array
92
            target principal component coefficients
93
94
        Examples
95
        --------
96
        X = np.random.randn(100, 10)
97
        Z = np.random.randn(100, 10)*2 + 1
98
        clf = SubspaceAlignedClassifier()
99
        V, CX, CZ = clf.subspace_alignment(X, Z, num_components=2)
100
101
        """
102
        # Data shapes
103 1
        N, DX = X.shape
104 1
        M, DZ = Z.shape
105
106
        # Assert equivalent dimensionalities
107 1
        if not DX == DZ:
108
            raise ValueError('Dimensionalities of X and Z should be equal.')
109
110
        # Compute principal components
111 1
        CX = PCA(n_components=num_components, whiten=True).fit(X).components_.T
112 1
        CZ = PCA(n_components=num_components, whiten=True).fit(Z).components_.T
113
114
        # Aligned source components
115 1
        V = np.dot(CX.T, CZ)
116
117
        # Return transformation matrix and principal component coefficients
118 1
        return V, CX, CZ
119
120 1
    def fit(self, X, y, Z):
121
        """
122
        Fit/train a classifier on data mapped onto transfer components.
123
124
        Arguments
125
        X : array
126
            source data (N samples by D features)
127
        y : array
128
            source labels (N samples by 1)
129
        Z : array
130
            target data (M samples by D features)
131
132
        Returns
133
        -------
134
        None
135
136
        Examples
137
        --------
138
        X = np.random.randn(10, 2)
139
        y = np.vstack((-np.ones((5,)), np.ones((5,))))
140
        Z = np.random.randn(10, 2)
141
        clf = SubspaceAlignedClassifier()
142
        clf.fit(X, y, Z)
143
144
        """
145
        # Data shapes
146 1
        N, DX = X.shape
147 1
        M, DZ = Z.shape
148
149
        # Assert equivalent dimensionalities
150 1
        if not DX == DZ:
151
            raise ValueError('Dimensionalities of X and Z should be equal.')
152
153
        # Transfer component analysis
154 1
        V, CX, CZ = self.subspace_alignment(X, Z,
155
                                            num_components=self.num_components)
156
157
        # Store target subspace
158 1
        self.target_subspace = CZ
159
160
        # Map source data onto source principal components
161 1
        X = np.dot(X, CX)
162
163
        # Align source data to target subspace
164 1
        X = np.dot(X, V)
165
166
        # Train a weighted classifier
167 1
        if self.loss == 'logistic':
168
            # Logistic regression model with sample weights
169 1
            self.clf.fit(X, y)
170
        elif self.loss == 'quadratic':
171
            # Least-squares model with sample weights
172
            self.clf.fit(X, y)
173
        elif self.loss == 'hinge':
174
            # Linear support vector machine with sample weights
175
            self.clf.fit(X, y)
176
        else:
177
            # Other loss functions are not implemented
178
            raise NotImplementedError
179
180
        # Mark classifier as trained
181 1
        self.is_trained = True
182
183
        # Store training data dimensionality
184 1
        self.train_data_dim = DX
185
186 1
    def predict(self, Z_, whiten=False):
187
        """
188
        Make predictions on new dataset.
189
190
        Arguments
191
        ---------
192
        Z_ : array
193
            new data set (M samples by D features)
194
        whiten : boolean
195
            whether to whiten new data (def: false)
196
197
        Returns
198
        -------
199
        preds : array
200
            label predictions (M samples by 1)
201
202
        Examples
203
        --------
204
        X = np.random.randn(10, 2)
205
        y = np.vstack((-np.ones((5,)), np.ones((5,))))
206
        Z = np.random.randn(10, 2)
207
        clf = SubspaceAlignedClassifier()
208
        clf.fit(X, y, Z)
209
        preds = clf.predict(Z)
210
211
        """
212
        # Data shape
213 1
        M, D = Z_.shape
214
215
        # If classifier is trained, check for same dimensionality
216 1
        if self.is_trained:
217 1
            assert self.train_data_dim == D
218
219
        # Check for need to whiten data beforehand
220 1
        if whiten:
221
            Z_ = st.zscore(Z_)
222
223
        # Map new target data onto target subspace
224 1
        Z_ = np.dot(Z_, self.target_subspace)
225
226
        # Call scikit's predict function
227 1
        preds = self.clf.predict(Z_)
228
229
        # For quadratic loss function, correct predictions
230 1
        if self.loss == 'quadratic':
231
            preds = (np.sign(preds)+1)/2.
232
233
        # Return predictions array
234 1
        return preds
235
236 1
    def get_params(self):
237
        """Get classifier parameters."""
238
        return self.clf.get_params()
239
240 1
    def is_trained(self):
241
        """Check whether classifier is trained."""
242
        return self.is_trained
243