SubspaceAlignedClassifier - Code Metrics - Inspection of "Added a few more unit tests." - wmkouw/libTLDA - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 748d0e...582254 )

by Wouter

created 2018-06-13 06:22 UTC

SubspaceAlignedClassifier A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	157
Duplicated Lines	28.66 %

Test Coverage

Coverage

74.07%

Importance

Changes

Metric	Value
c	0
b	0
f	0
dl	45
loc	157
ccs	40
cts	54
cp	0.7407
rs	10
wmc	18

6 Methods

Rating	Name	Duplication	Size	Complexity
A	is_trained()	0	3	1
B	fit()	45	45	5
B	predict()	0	31	5
A	get_params()	0	3	1
B	subspace_alignment()	0	27	2
B	__init__()	0	36	4

How to fix Duplicated Code

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import numpy as np
import scipy.stats as st
from scipy.sparse.linalg import eigs
from scipy.spatial.distance import cdist
import sklearn as sk
from sklearn.decomposition import PCA
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.model_selection import cross_val_predict
from os.path import basename

from .util import is_pos_def


class SubspaceAlignedClassifier(object):
    """
    Class of classifiers based on Subspace Alignment.

    Methods contain the alignment itself, classifiers and general utilities.
    """

    def __init__(self, loss='logistic', l2=1.0, num_components=1):
        """
        Select a particular type of subspace aligned classifier.

        INPUT   (1) str 'loss': loss function for weighted classifier, options:
                    'logistic', 'quadratic', 'hinge' (def: 'logistic')
                (2) float 'l2': l2-regularization parameter value (def:0.01)
                (3) int 'num_components': number of transfer components to
                    maintain (def: 1)
        """
        self.loss = loss
        self.l2 = l2
        self.num_components = num_components

        # Initialize untrained classifiers
        if self.loss == 'logistic':
            # Logistic regression model
            self.clf = LogisticRegression()
        elif self.loss == 'quadratic':
            # Least-squares model
            self.clf = LinearRegression()
        elif self.loss == 'hinge':
            # Linear support vector machine
            self.clf = LinearSVC()
        else:
            # Other loss functions are not implemented
            raise NotImplementedError

        # Maintain target principal component coefficients
        self.CZ = ''

        # Whether model has been trained
        self.is_trained = False

        # Dimensionality of training data
        self.train_data_dim = ''

    def subspace_alignment(self, X, Z, num_components=1):
        """
        Compute subspace and alignment matrix.

        INPUT   (1) array 'X': source data set (N samples by D features)
                (2) array 'Z': target data set (M samples by D features)
                (3) int 'num_components': number of components (def: 1)
        OUTPUT  (1) array 'V': transformation matrix (D features by D features)
                (2) array 'CX': source principal component coefficients
                (3) array 'CZ': target principal component coefficients
        """
        # Data shapes
        N, DX = X.shape
        M, DZ = Z.shape

        # Assert equivalent dimensionalities
        assert DX == DZ

        # Compute principal components
        CX = PCA(n_components=num_components, whiten=True).fit(X).components_.T
        CZ = PCA(n_components=num_components, whiten=True).fit(Z).components_.T

        # Aligned source components
        V = np.dot(CX.T, CZ)

        # Return transformation matrix and principal component coefficients
        return V, CX, CZ

    def fit(self, X, y, Z):

        """
        Fit/train a classifier on data mapped onto transfer components.

        INPUT   (1) array 'X': source data (N samples by D features)
                (2) array 'y': source labels (N samples by 1)
                (3) array 'Z': target data (M samples by D features)
        OUTPUT  None
        """
        # Data shapes
        N, DX = X.shape
        M, DZ = Z.shape

        # Assert equivalent dimensionalities
        assert DX == DZ

        # Transfer component analysis (store target subspace)
        V, CX, self.CZ = self.subspace_alignment(X, Z, num_components=self.
                                                 num_components)

        # Map source data onto source principal components
        X = np.dot(X, CX)

        # Align source data to target subspace
        X = np.dot(X, V)

        # Train a weighted classifier
        if self.loss == 'logistic':
            # Logistic regression model with sample weights
            self.clf.fit(X, y)
        elif self.loss == 'quadratic':
            # Least-squares model with sample weights
            self.clf.fit(X, y)
        elif self.loss == 'hinge':
            # Linear support vector machine with sample weights
            self.clf.fit(X, y)
        else:
            # Other loss functions are not implemented
            raise NotImplementedError

        # Mark classifier as trained
        self.is_trained = True

        # Store training data dimensionality
        self.train_data_dim = DX

    def predict(self, Z_, whiten=False):
        """
        Make predictions on new dataset.

        INPUT   (1) array 'Z_': new data set (M samples by D features)
                (2) boolean 'whiten': whether to whiten new data (def: false)
        OUTPUT  (1) array 'preds': label predictions (M samples by 1)
        """
        # Data shape
        M, D = Z_.shape

        # If classifier is trained, check for same dimensionality
        if self.is_trained:
            assert self.train_data_dim == D

        # Check for need to whiten data beforehand
        if whiten:
            Z_ = st.zscore(Z_)

        # Map new target data onto target subspace
        Z_ = np.dot(Z_, self.CZ)

        # Call scikit's predict function
        preds = self.clf.predict(Z_)

        # For quadratic loss function, correct predictions
        if self.loss == 'quadratic':
            preds = (np.sign(preds)+1)/2.

        # Return predictions array
        return preds

    def get_params(self):
        """Get classifier parameters."""
        return self.clf.get_params()

    def is_trained(self):
        """Check whether classifier is trained."""
        return self.is_trained


1			#!/usr/bin/env python
2			# -- coding: utf-8 --
3
4	1		import numpy as np
5	1		import scipy.stats as st
6	1		from scipy.sparse.linalg import eigs
7	1		from scipy.spatial.distance import cdist
8	1		import sklearn as sk
9	1		from sklearn.decomposition import PCA
10	1		from sklearn.svm import LinearSVC
11	1		from sklearn.linear_model import LogisticRegression, LinearRegression
12	1		from sklearn.model_selection import cross_val_predict
13	1		from os.path import basename
14
15	1		from .util import is_pos_def
16
17
18	1		class SubspaceAlignedClassifier(object):
19			"""
20			Class of classifiers based on Subspace Alignment.
21
22			Methods contain the alignment itself, classifiers and general utilities.
23			"""
24
25	1		def __init__(self, loss='logistic', l2=1.0, num_components=1):
26			"""
27			Select a particular type of subspace aligned classifier.
28
29			INPUT (1) str 'loss': loss function for weighted classifier, options:
30			'logistic', 'quadratic', 'hinge' (def: 'logistic')
31			(2) float 'l2': l2-regularization parameter value (def:0.01)
32			(3) int 'num_components': number of transfer components to
33			maintain (def: 1)
34			"""
35	1		self.loss = loss
36	1		self.l2 = l2
37	1		self.num_components = num_components
38
39			# Initialize untrained classifiers
40	1		if self.loss == 'logistic':
41			# Logistic regression model
42	1		self.clf = LogisticRegression()
43			elif self.loss == 'quadratic':
44			# Least-squares model
45			self.clf = LinearRegression()
46			elif self.loss == 'hinge':
47			# Linear support vector machine
48			self.clf = LinearSVC()
49			else:
50			# Other loss functions are not implemented
51			raise NotImplementedError
52
53			# Maintain target principal component coefficients
54	1		self.CZ = ''
55
56			# Whether model has been trained
57	1		self.is_trained = False
58
59			# Dimensionality of training data
60	1		self.train_data_dim = ''
61
62	1		def subspace_alignment(self, X, Z, num_components=1):
63			"""
64			Compute subspace and alignment matrix.
65
66			INPUT (1) array 'X': source data set (N samples by D features)
67			(2) array 'Z': target data set (M samples by D features)
68			(3) int 'num_components': number of components (def: 1)
69			OUTPUT (1) array 'V': transformation matrix (D features by D features)
70			(2) array 'CX': source principal component coefficients
71			(3) array 'CZ': target principal component coefficients
72			"""
73			# Data shapes
74	1		N, DX = X.shape
75	1		M, DZ = Z.shape
76
77			# Assert equivalent dimensionalities
78	1		assert DX == DZ
79
80			# Compute principal components
81	1		CX = PCA(n_components=num_components, whiten=True).fit(X).components_.T
82	1		CZ = PCA(n_components=num_components, whiten=True).fit(Z).components_.T
83
84			# Aligned source components
85	1		V = np.dot(CX.T, CZ)
86
87			# Return transformation matrix and principal component coefficients
88	1		return V, CX, CZ
89
90	1	View Code Duplication	def fit(self, X, y, Z):
			0 ignored issues – show Duplication introduced 2018-06-12 14:41 UTC Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
91			"""
92			Fit/train a classifier on data mapped onto transfer components.
93
94			INPUT (1) array 'X': source data (N samples by D features)
95			(2) array 'y': source labels (N samples by 1)
96			(3) array 'Z': target data (M samples by D features)
97			OUTPUT None
98			"""
99			# Data shapes
100	1		N, DX = X.shape
101	1		M, DZ = Z.shape
102
103			# Assert equivalent dimensionalities
104	1		assert DX == DZ
105
106			# Transfer component analysis (store target subspace)
107	1		V, CX, self.CZ = self.subspace_alignment(X, Z, num_components=self.
108			num_components)
109
110			# Map source data onto source principal components
111	1		X = np.dot(X, CX)
112
113			# Align source data to target subspace
114	1		X = np.dot(X, V)
115
116			# Train a weighted classifier
117	1		if self.loss == 'logistic':
118			# Logistic regression model with sample weights
119	1		self.clf.fit(X, y)
120			elif self.loss == 'quadratic':
121			# Least-squares model with sample weights
122			self.clf.fit(X, y)
123			elif self.loss == 'hinge':
124			# Linear support vector machine with sample weights
125			self.clf.fit(X, y)
126			else:
127			# Other loss functions are not implemented
128			raise NotImplementedError
129
130			# Mark classifier as trained
131	1		self.is_trained = True
132
133			# Store training data dimensionality
134	1		self.train_data_dim = DX
135
136	1		def predict(self, Z_, whiten=False):
137			"""
138			Make predictions on new dataset.
139
140			INPUT (1) array 'Z_': new data set (M samples by D features)
141			(2) boolean 'whiten': whether to whiten new data (def: false)
142			OUTPUT (1) array 'preds': label predictions (M samples by 1)
143			"""
144			# Data shape
145	1		M, D = Z_.shape
146
147			# If classifier is trained, check for same dimensionality
148	1		if self.is_trained:
149	1		assert self.train_data_dim == D
150
151			# Check for need to whiten data beforehand
152	1		if whiten:
153			Z_ = st.zscore(Z_)
154
155			# Map new target data onto target subspace
156	1		Z_ = np.dot(Z_, self.CZ)
157
158			# Call scikit's predict function
159	1		preds = self.clf.predict(Z_)
160
161			# For quadratic loss function, correct predictions
162	1		if self.loss == 'quadratic':
163			preds = (np.sign(preds)+1)/2.
164
165			# Return predictions array
166	1		return preds
167
168	1		def get_params(self):
169			"""Get classifier parameters."""
170			return self.clf.get_params()
171
172	1		def is_trained(self):
173			"""Check whether classifier is trained."""
174			return self.is_trained
175

wmkouw / libTLDA

Push — master ( 748d0e...582254 )

SubspaceAlignedClassifier A

Complexity

Size/Duplication

Test Coverage

Importance

6 Methods

How to fix Duplicated Code

Duplicated Code

Duplication Side-by-Side

Filter issues like