Completed
Push — master ( 582254...17fb6a )
by Wouter
03:58
created

SubspaceAlignedClassifier.subspace_alignment()   B

Complexity

Conditions 2

Size

Total Lines 28

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 2.0054

Importance

Changes 0
Metric Value
cc 2
c 0
b 0
f 0
dl 0
loc 28
ccs 8
cts 9
cp 0.8889
crap 2.0054
rs 8.8571
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4 1
import numpy as np
5 1
import scipy.stats as st
6 1
from scipy.sparse.linalg import eigs
7 1
from scipy.spatial.distance import cdist
8 1
import sklearn as sk
9 1
from sklearn.decomposition import PCA
10 1
from sklearn.svm import LinearSVC
11 1
from sklearn.linear_model import LogisticRegression, LinearRegression
12 1
from sklearn.model_selection import cross_val_predict
13 1
from os.path import basename
14
15 1
from .util import is_pos_def
16
17
18 1
class SubspaceAlignedClassifier(object):
19
    """
20
    Class of classifiers based on Subspace Alignment.
21
22
    Methods contain the alignment itself, classifiers and general utilities.
23
    """
24
25 1 View Code Duplication
    def __init__(self, loss='logistic', l2=1.0, num_components=1):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
26
        """
27
        Select a particular type of subspace aligned classifier.
28
29
        INPUT   (1) str 'loss': loss function for weighted classifier, options:
30
                    'logistic', 'quadratic', 'hinge' (def: 'logistic')
31
                (2) float 'l2': l2-regularization parameter value (def:0.01)
32
                (3) int 'num_components': number of transfer components to
33
                    maintain (def: 1)
34
        """
35 1
        self.loss = loss
36 1
        self.l2 = l2
37 1
        self.num_components = num_components
38
39
        # Initialize untrained classifiers
40 1
        if self.loss == 'logistic':
41
            # Logistic regression model
42 1
            self.clf = LogisticRegression()
43
        elif self.loss == 'quadratic':
44
            # Least-squares model
45
            self.clf = LinearRegression()
46
        elif self.loss == 'hinge':
47
            # Linear support vector machine
48
            self.clf = LinearSVC()
49
        else:
50
            # Other loss functions are not implemented
51
            raise NotImplementedError
52
53
        # Maintain target principal component coefficients
54 1
        self.CZ = ''
55
56
        # Whether model has been trained
57 1
        self.is_trained = False
58
59
        # Dimensionality of training data
60 1
        self.train_data_dim = ''
61
62 1
    def subspace_alignment(self, X, Z, num_components=1):
63
        """
64
        Compute subspace and alignment matrix.
65
66
        INPUT   (1) array 'X': source data set (N samples by D features)
67
                (2) array 'Z': target data set (M samples by D features)
68
                (3) int 'num_components': number of components (def: 1)
69
        OUTPUT  (1) array 'V': transformation matrix (D features by D features)
70
                (2) array 'CX': source principal component coefficients
71
                (3) array 'CZ': target principal component coefficients
72
        """
73
        # Data shapes
74 1
        N, DX = X.shape
75 1
        M, DZ = Z.shape
76
77
        # Assert equivalent dimensionalities
78 1
        if not DX == DZ:             
79
            raise ValueError('Dimensionalities of X and Z should be equal.')
80
81
        # Compute principal components
82 1
        CX = PCA(n_components=num_components, whiten=True).fit(X).components_.T
83 1
        CZ = PCA(n_components=num_components, whiten=True).fit(Z).components_.T
84
85
        # Aligned source components
86 1
        V = np.dot(CX.T, CZ)
87
88
        # Return transformation matrix and principal component coefficients
89 1
        return V, CX, CZ
90
91 1 View Code Duplication
    def fit(self, X, y, Z):
0 ignored issues
show
Duplication introduced
This code seems to be duplicated in your project.
Loading history...
92
        """
93
        Fit/train a classifier on data mapped onto transfer components.
94
95
        INPUT   (1) array 'X': source data (N samples by D features)
96
                (2) array 'y': source labels (N samples by 1)
97
                (3) array 'Z': target data (M samples by D features)
98
        OUTPUT  None
99
        """
100
        # Data shapes
101 1
        N, DX = X.shape
102 1
        M, DZ = Z.shape
103
104
        # Assert equivalent dimensionalities
105 1
        if not DX == DZ:             raise ValueError('Dimensionalities of X and Z should be equal.')
106
107
        # Transfer component analysis (store target subspace)
108 1
        V, CX, self.CZ = self.subspace_alignment(X, Z, num_components=self.
109
                                                 num_components)
110
111
        # Map source data onto source principal components
112 1
        X = np.dot(X, CX)
113
114
        # Align source data to target subspace
115 1
        X = np.dot(X, V)
116
117
        # Train a weighted classifier
118 1
        if self.loss == 'logistic':
119
            # Logistic regression model with sample weights
120 1
            self.clf.fit(X, y)
121
        elif self.loss == 'quadratic':
122
            # Least-squares model with sample weights
123
            self.clf.fit(X, y)
124
        elif self.loss == 'hinge':
125
            # Linear support vector machine with sample weights
126
            self.clf.fit(X, y)
127
        else:
128
            # Other loss functions are not implemented
129
            raise NotImplementedError
130
131
        # Mark classifier as trained
132 1
        self.is_trained = True
133
134
        # Store training data dimensionality
135 1
        self.train_data_dim = DX
136
137 1
    def predict(self, Z_, whiten=False):
138
        """
139
        Make predictions on new dataset.
140
141
        INPUT   (1) array 'Z_': new data set (M samples by D features)
142
                (2) boolean 'whiten': whether to whiten new data (def: false)
143
        OUTPUT  (1) array 'preds': label predictions (M samples by 1)
144
        """
145
        # Data shape
146 1
        M, D = Z_.shape
147
148
        # If classifier is trained, check for same dimensionality
149 1
        if self.is_trained:
150 1
            assert self.train_data_dim == D
151
152
        # Check for need to whiten data beforehand
153 1
        if whiten:
154
            Z_ = st.zscore(Z_)
155
156
        # Map new target data onto target subspace
157 1
        Z_ = np.dot(Z_, self.CZ)
158
159
        # Call scikit's predict function
160 1
        preds = self.clf.predict(Z_)
161
162
        # For quadratic loss function, correct predictions
163 1
        if self.loss == 'quadratic':
164
            preds = (np.sign(preds)+1)/2.
165
166
        # Return predictions array
167 1
        return preds
168
169 1
    def get_params(self):
170
        """Get classifier parameters."""
171
        return self.clf.get_params()
172
173 1
    def is_trained(self):
174
        """Check whether classifier is trained."""
175
        return self.is_trained
176