Completed
Push — master ( 748d0e...582254 )
by Wouter
03:55
created

SubspaceAlignedClassifier.is_trained()   A

Complexity

Conditions 1

Size

Total Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 1
CRAP Score 1.125

Importance

Changes 0
Metric Value
cc 1
c 0
b 0
f 0
dl 0
loc 3
ccs 1
cts 2
cp 0.5
crap 1.125
rs 10
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4 1
import numpy as np
5 1
import scipy.stats as st
6 1
from scipy.sparse.linalg import eigs
7 1
from scipy.spatial.distance import cdist
8 1
import sklearn as sk
9 1
from sklearn.decomposition import PCA
10 1
from sklearn.svm import LinearSVC
11 1
from sklearn.linear_model import LogisticRegression, LinearRegression
12 1
from sklearn.model_selection import cross_val_predict
13 1
from os.path import basename
14
15 1
from .util import is_pos_def
16
17
18 1
class SubspaceAlignedClassifier(object):
19
    """
20
    Class of classifiers based on Subspace Alignment.
21
22
    Methods contain the alignment itself, classifiers and general utilities.
23
    """
24
25 1
    def __init__(self, loss='logistic', l2=1.0, num_components=1):
26
        """
27
        Select a particular type of subspace aligned classifier.
28
29
        INPUT   (1) str 'loss': loss function for weighted classifier, options:
30
                    'logistic', 'quadratic', 'hinge' (def: 'logistic')
31
                (2) float 'l2': l2-regularization parameter value (def:0.01)
32
                (3) int 'num_components': number of transfer components to
33
                    maintain (def: 1)
34
        """
35 1
        self.loss = loss
36 1
        self.l2 = l2
37 1
        self.num_components = num_components
38
39
        # Initialize untrained classifiers
40 1
        if self.loss == 'logistic':
41
            # Logistic regression model
42 1
            self.clf = LogisticRegression()
43
        elif self.loss == 'quadratic':
44
            # Least-squares model
45
            self.clf = LinearRegression()
46
        elif self.loss == 'hinge':
47
            # Linear support vector machine
48
            self.clf = LinearSVC()
49
        else:
50
            # Other loss functions are not implemented
51
            raise NotImplementedError
52
53
        # Maintain target principal component coefficients
54 1
        self.CZ = ''
55
56
        # Whether model has been trained
57 1
        self.is_trained = False
58
59
        # Dimensionality of training data
60 1
        self.train_data_dim = ''
61
62 1
    def subspace_alignment(self, X, Z, num_components=1):
63
        """
64
        Compute subspace and alignment matrix.
65
66
        INPUT   (1) array 'X': source data set (N samples by D features)
67
                (2) array 'Z': target data set (M samples by D features)
68
                (3) int 'num_components': number of components (def: 1)
69
        OUTPUT  (1) array 'V': transformation matrix (D features by D features)
70
                (2) array 'CX': source principal component coefficients
71
                (3) array 'CZ': target principal component coefficients
72
        """
73
        # Data shapes
74 1
        N, DX = X.shape
75 1
        M, DZ = Z.shape
76
77
        # Assert equivalent dimensionalities
78 1
        assert DX == DZ
79
80
        # Compute principal components
81 1
        CX = PCA(n_components=num_components, whiten=True).fit(X).components_.T
82 1
        CZ = PCA(n_components=num_components, whiten=True).fit(Z).components_.T
83
84
        # Aligned source components
85 1
        V = np.dot(CX.T, CZ)
86
87
        # Return transformation matrix and principal component coefficients
88 1
        return V, CX, CZ
89
90 1 View Code Duplication
    def fit(self, X, y, Z):
0 ignored issues
show
Duplication introduced
This code seems to be duplicated in your project.
Loading history...
91
        """
92
        Fit/train a classifier on data mapped onto transfer components.
93
94
        INPUT   (1) array 'X': source data (N samples by D features)
95
                (2) array 'y': source labels (N samples by 1)
96
                (3) array 'Z': target data (M samples by D features)
97
        OUTPUT  None
98
        """
99
        # Data shapes
100 1
        N, DX = X.shape
101 1
        M, DZ = Z.shape
102
103
        # Assert equivalent dimensionalities
104 1
        assert DX == DZ
105
106
        # Transfer component analysis (store target subspace)
107 1
        V, CX, self.CZ = self.subspace_alignment(X, Z, num_components=self.
108
                                                 num_components)
109
110
        # Map source data onto source principal components
111 1
        X = np.dot(X, CX)
112
113
        # Align source data to target subspace
114 1
        X = np.dot(X, V)
115
116
        # Train a weighted classifier
117 1
        if self.loss == 'logistic':
118
            # Logistic regression model with sample weights
119 1
            self.clf.fit(X, y)
120
        elif self.loss == 'quadratic':
121
            # Least-squares model with sample weights
122
            self.clf.fit(X, y)
123
        elif self.loss == 'hinge':
124
            # Linear support vector machine with sample weights
125
            self.clf.fit(X, y)
126
        else:
127
            # Other loss functions are not implemented
128
            raise NotImplementedError
129
130
        # Mark classifier as trained
131 1
        self.is_trained = True
132
133
        # Store training data dimensionality
134 1
        self.train_data_dim = DX
135
136 1
    def predict(self, Z_, whiten=False):
137
        """
138
        Make predictions on new dataset.
139
140
        INPUT   (1) array 'Z_': new data set (M samples by D features)
141
                (2) boolean 'whiten': whether to whiten new data (def: false)
142
        OUTPUT  (1) array 'preds': label predictions (M samples by 1)
143
        """
144
        # Data shape
145 1
        M, D = Z_.shape
146
147
        # If classifier is trained, check for same dimensionality
148 1
        if self.is_trained:
149 1
            assert self.train_data_dim == D
150
151
        # Check for need to whiten data beforehand
152 1
        if whiten:
153
            Z_ = st.zscore(Z_)
154
155
        # Map new target data onto target subspace
156 1
        Z_ = np.dot(Z_, self.CZ)
157
158
        # Call scikit's predict function
159 1
        preds = self.clf.predict(Z_)
160
161
        # For quadratic loss function, correct predictions
162 1
        if self.loss == 'quadratic':
163
            preds = (np.sign(preds)+1)/2.
164
165
        # Return predictions array
166 1
        return preds
167
168 1
    def get_params(self):
169
        """Get classifier parameters."""
170
        return self.clf.get_params()
171
172 1
    def is_trained(self):
173
        """Check whether classifier is trained."""
174
        return self.is_trained
175