ImportanceWeightedClassifier - Code Metrics - Inspection of "Added sphinx and readthedocs." - wmkouw/libTLDA - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( f3d068...88fa67 )

by Wouter

created 2018-06-17 15:54 UTC

ImportanceWeightedClassifier B

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	422
Duplicated Lines	14.22 %

Test Coverage

Coverage

68.38%

Importance

Changes

Metric	Value
c	0
b	0
f	0
dl	60
loc	422
ccs	93
cts	136
cp	0.6838
rs	8.439
wmc	47

10 Methods

Rating	Name	Duplication	Size	Complexity
B	predict()	0	33	4
A	is_trained()	0	3	1
B	iwe_logistic_discrimination()	0	40	2
B	__init__()	60	60	4
B	iwe_kernel_mean_matching()	0	58	5
B	iwe_kernel_densities()	0	37	6
B	iwe_nearest_neighbours()	0	42	4
A	get_params()	0	3	1
D	fit()	0	59	10
D	iwe_ratio_gaussians()	0	60	10

How to fix Duplicated Code Complexity

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import numpy as np
import scipy.stats as st
from scipy.spatial.distance import cdist
import sklearn as sk
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.model_selection import cross_val_predict
from os.path import basename
from cvxopt import matrix, solvers

from .util import is_pos_def


class ImportanceWeightedClassifier(object):
    """
    Class of importance-weighted classifiers.

    Methods contain different importance-weight estimators and different loss
    functions.

    Examples
    --------
    | >>>> X = np.random.randn(10, 2)
    | >>>> y = np.vstack((-np.ones((5,)), np.ones((5,))))
    | >>>> Z = np.random.randn(10, 2)
    | >>>> clf = ImportanceWeightedClassifier()
    | >>>> clf.fit(X, y, Z)
    | >>>> u_pred = clf.predict(Z)

    """

    def __init__(self, loss='logistic', l2=1.0, iwe='lr', smoothing=True,

                 clip=-1, kernel_type='rbf', bandwidth=1):
        """
        Select a particular type of importance-weighted classifier.

        Parameters
        ----------
        loss : str
            loss function for weighted classifier, options: 'logistic',
            'quadratic', 'hinge' (def: 'logistic')
        l2 : float
            l2-regularization parameter value (def:0.01)
        iwe : str
            importance weight estimator, options: 'lr', 'nn', 'rg', 'kmm',
            'kde' (def: 'lr')
        smoothing : bool
            whether to apply Laplace smoothing to the nearest-neighbour
            importance-weight estimator (def: True)
        clip : float
            maximum allowable importance-weight value; if set to -1, then the
            weights are not clipped (def:-1)
        kernel_type : str
            what type of kernel to use for kernel density estimation or kernel
            mean matching, options: 'diste', 'rbf' (def: 'rbf')
        bandwidth : float
            kernel bandwidth parameter value for kernel-based weight
            estimators (def: 1)

        Returns
        -------
        None

        """
        self.loss = loss
        self.l2 = l2
        self.iwe = iwe
        self.smoothing = smoothing
        self.clip = clip
        self.kernel_type = kernel_type
        self.bandwidth = bandwidth

        # Initialize untrained classifiers based on choice of loss function
        if self.loss == 'logistic':
            # Logistic regression model
            self.clf = LogisticRegression()
        elif self.loss == 'quadratic':
            # Least-squares model
            self.clf = LinearRegression()
        elif self.loss == 'hinge':
            # Linear support vector machine
            self.clf = LinearSVC()
        else:
            # Other loss functions are not implemented
            raise NotImplementedError('Loss function not implemented.')

        # Whether model has been trained
        self.is_trained = False

        # Dimensionality of training data
        self.train_data_dim = ''

    def iwe_ratio_gaussians(self, X, Z):
        """
        Estimate importance weights based on a ratio of Gaussian distributions.

        Parameters
        ----------
        X : array
            source data (N samples by D features)
        Z : array
            target data (M samples by D features)

        Returns
        -------
        iw : array
            importance weights (N samples by 1)

        """
        # Data shapes
        N, DX = X.shape
        M, DZ = Z.shape

        # Assert equivalent dimensionalities
        if not DX == DZ:
            raise ValueError('Dimensionalities of X and Z should be equal.')

        # Sample means in each domain
        mu_X = np.mean(X, axis=0)
        mu_Z = np.mean(Z, axis=0)

        # Sample covariances
        Si_X = np.cov(X.T)
        Si_Z = np.cov(Z.T)

        # Check for positive-definiteness of covariance matrices
        if not (is_pos_def(Si_X) or is_pos_def(Si_Z)):
            print('Warning: covariate matrices not PSD.')

            regct = -6
            while not (is_pos_def(Si_X) or is_pos_def(Si_Z)):
                print('Adding regularization: ' + str(1**regct))

                # Add regularization
                Si_X += np.eye(DX)*10.**regct
                Si_Z += np.eye(DZ)*10.**regct

                # Increment regularization counter
                regct += 1

        # Compute probability of X under each domain
        pT = st.multivariate_normal.pdf(X, mu_Z, Si_Z)
        pS = st.multivariate_normal.pdf(X, mu_X, Si_X)

        # Check for numerical problems
        if np.any(np.isnan(pT)) or np.any(pT == 0):
            raise ValueError('Source probabilities are NaN or 0.')
        if np.any(np.isnan(pS)) or np.any(pS == 0):
            raise ValueError('Target probabilities are NaN or 0.')

        # Return the ratio of probabilities
        return pT / pS

    def iwe_kernel_densities(self, X, Z):
        """
        Estimate importance weights based on kernel density estimation.

        Parameters
        ----------
        X : array
            source data (N samples by D features)
        Z : array
            target data (M samples by D features)

        Returns
        -------
        array
            importance weights (N samples by 1)

        """
        # Data shapes
        N, DX = X.shape
        M, DZ = Z.shape

        # Assert equivalent dimensionalities
        if not DX == DZ:
            raise ValueError('Dimensionalities of X and Z should be equal.')

        # Compute probabilities based on source kernel densities
        pT = st.gaussian_kde(Z.T).pdf(X.T)
        pS = st.gaussian_kde(X.T).pdf(X.T)

        # Check for numerical problems
        if np.any(np.isnan(pT)) or np.any(pT == 0):
            raise ValueError('Source probabilities are NaN or 0.')
        if np.any(np.isnan(pS)) or np.any(pS == 0):
            raise ValueError('Target probabilities are NaN or 0.')

        # Return the ratio of probabilities
        return pT / pS

    def iwe_logistic_discrimination(self, X, Z):
        """
        Estimate importance weights based on logistic regression.

        Parameters
        ----------
        X : array
            source data (N samples by D features)
        Z : array
            target data (M samples by D features)

        Returns
        -------
        array
            importance weights (N samples by 1)

        """
        # Data shapes
        N, DX = X.shape
        M, DZ = Z.shape

        # Assert equivalent dimensionalities
        if not DX == DZ:
            raise ValueError('Dimensionalities of X and Z should be equal.')

        # Make domain-label variable
        y = np.concatenate((np.zeros((N, 1)),
                            np.ones((M, 1))), axis=0)

        # Concatenate data
        XZ = np.concatenate((X, Z), axis=0)

        # Call a logistic regressor
        lr = LogisticRegression(C=self.l2)

        # Predict probability of belonging to target using cross-validation
        preds = cross_val_predict(lr, XZ, y[:, 0])

        # Return predictions for source samples
        return preds[:N]

    def iwe_nearest_neighbours(self, X, Z):
        """
        Estimate importance weights based on nearest-neighbours.

        Parameters
        ----------
        X : array
            source data (N samples by D features)
        Z : array
            target data (M samples by D features)

        Returns
        -------
        iw : array
            importance weights (N samples by 1)

        """
        # Data shapes
        N, DX = X.shape
        M, DZ = Z.shape

        # Assert equivalent dimensionalities
        if not DX == DZ:
            raise ValueError('Dimensionalities of X and Z should be equal.')

        # Compute Euclidean distance between samples
        d = cdist(X, Z, metric='euclidean')

        # Count target samples within each source Voronoi cell
        ix = np.argmin(d, axis=1)
        iw, _ = np.array(np.histogram(ix, np.arange(N+1)))

        # Laplace smoothing
        if self.smoothing:
            iw = (iw + 1.) / (N + 1)

        # Weight clipping
        if self.clip > 0:
            iw = np.minimum(self.clip, np.maximum(0, iw))

        # Return weights
        return iw

    def iwe_kernel_mean_matching(self, X, Z):
        """
        Estimate importance weights based on kernel mean matching.

        Parameters
        ----------
        X : array
            source data (N samples by D features)
        Z : array
            target data (M samples by D features)

        Returns
        -------
        iw : array
            importance weights (N samples by 1)

        """
        # Data shapes
        N, DX = X.shape
        M, DZ = Z.shape

        # Assert equivalent dimensionalities
        if not DX == DZ:
            raise ValueError('Dimensionalities of X and Z should be equal.')

        # Compute sample pairwise distances
        KXX = cdist(X, X, metric='euclidean')
        KXZ = cdist(X, Z, metric='euclidean')

        # Check non-negative distances
        if not np.all(KXX >= 0):
            raise ValueError('Non-positive distance in source kernel.')
        if not np.all(KXZ >= 0):
            raise ValueError('Non-positive distance in source-target kernel.')

        # Compute kernels
        if self.kernel_type == 'rbf':
            # Radial basis functions
            KXX = np.exp(-KXX / (2*self.bandwidth**2))
            KXZ = np.exp(-KXZ / (2*self.bandwidth**2))

        # Collapse second kernel and normalize
        KXZ = N/M * np.sum(KXZ, axis=1)

        # Prepare for CVXOPT
        Q = matrix(KXX, tc='d')
        p = matrix(KXZ, tc='d')
        G = matrix(np.concatenate((np.ones((1, N)), -1*np.ones((1, N)),
                                   -1.*np.eye(N)), axis=0), tc='d')
        h = matrix(np.concatenate((np.array([N/np.sqrt(N) + N], ndmin=2),
                                   np.array([N/np.sqrt(N) - N], ndmin=2),
                                   np.zeros((N, 1))), axis=0), tc='d')

        # Call quadratic program solver
        sol = solvers.qp(Q, p, G, h)

        # Return optimal coefficients as importance weights
        return np.array(sol['x'])[:, 0]

    def fit(self, X, y, Z):
        """
        Fit/train an importance-weighted classifier.

        Parameters
        ----------
        X : array
            source data (N samples by D features)
        y : array
            source labels (N samples by 1)
        Z : array
            target data (M samples by D features)

        Returns
        -------
        None

        """
        # Data shapes
        N, DX = X.shape
        M, DZ = Z.shape

        # Assert equivalent dimensionalities
        if not DX == DZ:
            raise ValueError('Dimensionalities of X and Z should be equal.')

        # Find importance-weights
        if self.iwe == 'lr':
            w = self.iwe_logistic_discrimination(X, Z)
        elif self.iwe == 'rg':
            w = self.iwe_ratio_gaussians(X, Z)
        elif self.iwe == 'nn':
            w = self.iwe_nearest_neighbours(X, Z)
        elif self.iwe == 'kde':
            w = self.iwe_kernel_densities(X, Z)
        elif self.iwe == 'kmm':
            w = self.iwe_kernel_mean_matching(X, Z)
        else:
            raise NotImplementedError('Estimator not implemented.')

        # Train a weighted classifier
        if self.loss == 'logistic':
            # Logistic regression model with sample weights
            self.clf.fit(X, y, w)
        elif self.loss == 'quadratic':
            # Least-squares model with sample weights
            self.clf.fit(X, y, w)
        elif self.loss == 'hinge':
            # Linear support vector machine with sample weights
            self.clf.fit(X, y, w)
        else:
            # Other loss functions are not implemented
            raise NotImplementedError('Loss function not implemented.')

        # Mark classifier as trained
        self.is_trained = True

        # Store training data dimensionality
        self.train_data_dim = DX

    def predict(self, Z):
        """
        Make predictions on new dataset.

        Parameters
        ----------
        Z : array
            new data set (M samples by D features)

        Returns
        -------
        preds : array
            label predictions (M samples by 1)

        """
        # Data shape
        M, D = Z.shape

        # If classifier is trained, check for same dimensionality
        if self.is_trained:
            if not self.train_data_dim == D:
                raise ValueError('''Test data is of different dimensionality
                                 than training data.''')

        # Call scikit's predict function
        preds = self.clf.predict(Z)

        # For quadratic loss function, correct predictions
        if self.loss == 'quadratic':
            preds = (np.sign(preds)+1)/2.

        # Return predictions array
        return preds

    def get_params(self):
        """Get classifier parameters."""
        return self.clf.get_params()

    def is_trained(self):
        """Check whether classifier is trained."""
        return self.is_trained


1			#!/usr/bin/env python
2			# -- coding: utf-8 --
3
4	1		import numpy as np
5	1		import scipy.stats as st
6	1		from scipy.spatial.distance import cdist
7	1		import sklearn as sk
8	1		from sklearn.svm import LinearSVC
9	1		from sklearn.linear_model import LogisticRegression, LinearRegression
10	1		from sklearn.model_selection import cross_val_predict
11	1		from os.path import basename
12	1		from cvxopt import matrix, solvers
13
14	1		from .util import is_pos_def
15
16
17	1		class ImportanceWeightedClassifier(object):
18			"""
19			Class of importance-weighted classifiers.
20
21			Methods contain different importance-weight estimators and different loss
22			functions.
23
24			Examples
25			--------
26			\| >>>> X = np.random.randn(10, 2)
27			\| >>>> y = np.vstack((-np.ones((5,)), np.ones((5,))))
28			\| >>>> Z = np.random.randn(10, 2)
29			\| >>>> clf = ImportanceWeightedClassifier()
30			\| >>>> clf.fit(X, y, Z)
31			\| >>>> u_pred = clf.predict(Z)
32
33			"""
34
35	1	View Code Duplication	def __init__(self, loss='logistic', l2=1.0, iwe='lr', smoothing=True,
			0 ignored issues – show Duplication introduced 2018-06-12 14:41 UTC Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
36			clip=-1, kernel_type='rbf', bandwidth=1):
37			"""
38			Select a particular type of importance-weighted classifier.
39
40			Parameters
41			----------
42			loss : str
43			loss function for weighted classifier, options: 'logistic',
44			'quadratic', 'hinge' (def: 'logistic')
45			l2 : float
46			l2-regularization parameter value (def:0.01)
47			iwe : str
48			importance weight estimator, options: 'lr', 'nn', 'rg', 'kmm',
49			'kde' (def: 'lr')
50			smoothing : bool
51			whether to apply Laplace smoothing to the nearest-neighbour
52			importance-weight estimator (def: True)
53			clip : float
54			maximum allowable importance-weight value; if set to -1, then the
55			weights are not clipped (def:-1)
56			kernel_type : str
57			what type of kernel to use for kernel density estimation or kernel
58			mean matching, options: 'diste', 'rbf' (def: 'rbf')
59			bandwidth : float
60			kernel bandwidth parameter value for kernel-based weight
61			estimators (def: 1)
62
63			Returns
64			-------
65			None
66
67			"""
68	1		self.loss = loss
69	1		self.l2 = l2
70	1		self.iwe = iwe
71	1		self.smoothing = smoothing
72	1		self.clip = clip
73	1		self.kernel_type = kernel_type
74	1		self.bandwidth = bandwidth
75
76			# Initialize untrained classifiers based on choice of loss function
77	1		if self.loss == 'logistic':
78			# Logistic regression model
79	1		self.clf = LogisticRegression()
80			elif self.loss == 'quadratic':
81			# Least-squares model
82			self.clf = LinearRegression()
83			elif self.loss == 'hinge':
84			# Linear support vector machine
85			self.clf = LinearSVC()
86			else:
87			# Other loss functions are not implemented
88			raise NotImplementedError('Loss function not implemented.')
89
90			# Whether model has been trained
91	1		self.is_trained = False
92
93			# Dimensionality of training data
94	1		self.train_data_dim = ''
95
96	1		def iwe_ratio_gaussians(self, X, Z):
97			"""
98			Estimate importance weights based on a ratio of Gaussian distributions.
99
100			Parameters
101			----------
102			X : array
103			source data (N samples by D features)
104			Z : array
105			target data (M samples by D features)
106
107			Returns
108			-------
109			iw : array
110			importance weights (N samples by 1)
111
112			"""
113			# Data shapes
114	1		N, DX = X.shape
115	1		M, DZ = Z.shape
116
117			# Assert equivalent dimensionalities
118	1		if not DX == DZ:
119			raise ValueError('Dimensionalities of X and Z should be equal.')
120
121			# Sample means in each domain
122	1		mu_X = np.mean(X, axis=0)
123	1		mu_Z = np.mean(Z, axis=0)
124
125			# Sample covariances
126	1		Si_X = np.cov(X.T)
127	1		Si_Z = np.cov(Z.T)
128
129			# Check for positive-definiteness of covariance matrices
130	1		if not (is_pos_def(Si_X) or is_pos_def(Si_Z)):
131			print('Warning: covariate matrices not PSD.')
132
133			regct = -6
134			while not (is_pos_def(Si_X) or is_pos_def(Si_Z)):
135			print('Adding regularization: ' + str(1**regct))
136
137			# Add regularization
138			Si_X += np.eye(DX)10.*regct
139			Si_Z += np.eye(DZ)10.*regct
140
141			# Increment regularization counter
142			regct += 1
143
144			# Compute probability of X under each domain
145	1		pT = st.multivariate_normal.pdf(X, mu_Z, Si_Z)
146	1		pS = st.multivariate_normal.pdf(X, mu_X, Si_X)
147
148			# Check for numerical problems
149	1		if np.any(np.isnan(pT)) or np.any(pT == 0):
150			raise ValueError('Source probabilities are NaN or 0.')
151	1		if np.any(np.isnan(pS)) or np.any(pS == 0):
152			raise ValueError('Target probabilities are NaN or 0.')
153
154			# Return the ratio of probabilities
155	1		return pT / pS
156
157	1		def iwe_kernel_densities(self, X, Z):
158			"""
159			Estimate importance weights based on kernel density estimation.
160
161			Parameters
162			----------
163			X : array
164			source data (N samples by D features)
165			Z : array
166			target data (M samples by D features)
167
168			Returns
169			-------
170			array
171			importance weights (N samples by 1)
172
173			"""
174			# Data shapes
175	1		N, DX = X.shape
176	1		M, DZ = Z.shape
177
178			# Assert equivalent dimensionalities
179	1		if not DX == DZ:
180			raise ValueError('Dimensionalities of X and Z should be equal.')
181
182			# Compute probabilities based on source kernel densities
183	1		pT = st.gaussian_kde(Z.T).pdf(X.T)
184	1		pS = st.gaussian_kde(X.T).pdf(X.T)
185
186			# Check for numerical problems
187	1		if np.any(np.isnan(pT)) or np.any(pT == 0):
188			raise ValueError('Source probabilities are NaN or 0.')
189	1		if np.any(np.isnan(pS)) or np.any(pS == 0):
190			raise ValueError('Target probabilities are NaN or 0.')
191
192			# Return the ratio of probabilities
193	1		return pT / pS
194
195	1		def iwe_logistic_discrimination(self, X, Z):
196			"""
197			Estimate importance weights based on logistic regression.
198
199			Parameters
200			----------
201			X : array
202			source data (N samples by D features)
203			Z : array
204			target data (M samples by D features)
205
206			Returns
207			-------
208			array
209			importance weights (N samples by 1)
210
211			"""
212			# Data shapes
213	1		N, DX = X.shape
214	1		M, DZ = Z.shape
215
216			# Assert equivalent dimensionalities
217	1		if not DX == DZ:
218			raise ValueError('Dimensionalities of X and Z should be equal.')
219
220			# Make domain-label variable
221	1		y = np.concatenate((np.zeros((N, 1)),
222			np.ones((M, 1))), axis=0)
223
224			# Concatenate data
225	1		XZ = np.concatenate((X, Z), axis=0)
226
227			# Call a logistic regressor
228	1		lr = LogisticRegression(C=self.l2)
229
230			# Predict probability of belonging to target using cross-validation
231	1		preds = cross_val_predict(lr, XZ, y[:, 0])
232
233			# Return predictions for source samples
234	1		return preds[:N]
235
236	1		def iwe_nearest_neighbours(self, X, Z):
237			"""
238			Estimate importance weights based on nearest-neighbours.
239
240			Parameters
241			----------
242			X : array
243			source data (N samples by D features)
244			Z : array
245			target data (M samples by D features)
246
247			Returns
248			-------
249			iw : array
250			importance weights (N samples by 1)
251
252			"""
253			# Data shapes
254	1		N, DX = X.shape
255	1		M, DZ = Z.shape
256
257			# Assert equivalent dimensionalities
258	1		if not DX == DZ:
259			raise ValueError('Dimensionalities of X and Z should be equal.')
260
261			# Compute Euclidean distance between samples
262	1		d = cdist(X, Z, metric='euclidean')
263
264			# Count target samples within each source Voronoi cell
265	1		ix = np.argmin(d, axis=1)
266	1		iw, _ = np.array(np.histogram(ix, np.arange(N+1)))
267
268			# Laplace smoothing
269	1		if self.smoothing:
270	1		iw = (iw + 1.) / (N + 1)
271
272			# Weight clipping
273	1		if self.clip > 0:
274			iw = np.minimum(self.clip, np.maximum(0, iw))
275
276			# Return weights
277	1		return iw
278
279	1		def iwe_kernel_mean_matching(self, X, Z):
280			"""
281			Estimate importance weights based on kernel mean matching.
282
283			Parameters
284			----------
285			X : array
286			source data (N samples by D features)
287			Z : array
288			target data (M samples by D features)
289
290			Returns
291			-------
292			iw : array
293			importance weights (N samples by 1)
294
295			"""
296			# Data shapes
297	1		N, DX = X.shape
298	1		M, DZ = Z.shape
299
300			# Assert equivalent dimensionalities
301	1		if not DX == DZ:
302			raise ValueError('Dimensionalities of X and Z should be equal.')
303
304			# Compute sample pairwise distances
305	1		KXX = cdist(X, X, metric='euclidean')
306	1		KXZ = cdist(X, Z, metric='euclidean')
307
308			# Check non-negative distances
309	1		if not np.all(KXX >= 0):
310			raise ValueError('Non-positive distance in source kernel.')
311	1		if not np.all(KXZ >= 0):
312			raise ValueError('Non-positive distance in source-target kernel.')
313
314			# Compute kernels
315	1		if self.kernel_type == 'rbf':
316			# Radial basis functions
317	1		KXX = np.exp(-KXX / (2self.bandwidth*2))
318	1		KXZ = np.exp(-KXZ / (2self.bandwidth*2))
319
320			# Collapse second kernel and normalize
321	1		KXZ = N/M * np.sum(KXZ, axis=1)
322
323			# Prepare for CVXOPT
324	1		Q = matrix(KXX, tc='d')
325	1		p = matrix(KXZ, tc='d')
326	1		G = matrix(np.concatenate((np.ones((1, N)), -1*np.ones((1, N)),
327			-1.*np.eye(N)), axis=0), tc='d')
328	1		h = matrix(np.concatenate((np.array([N/np.sqrt(N) + N], ndmin=2),
329			np.array([N/np.sqrt(N) - N], ndmin=2),
330			np.zeros((N, 1))), axis=0), tc='d')
331
332			# Call quadratic program solver
333	1		sol = solvers.qp(Q, p, G, h)
334
335			# Return optimal coefficients as importance weights
336	1		return np.array(sol['x'])[:, 0]
337
338	1		def fit(self, X, y, Z):
339			"""
340			Fit/train an importance-weighted classifier.
341
342			Parameters
343			----------
344			X : array
345			source data (N samples by D features)
346			y : array
347			source labels (N samples by 1)
348			Z : array
349			target data (M samples by D features)
350
351			Returns
352			-------
353			None
354
355			"""
356			# Data shapes
357	1		N, DX = X.shape
358	1		M, DZ = Z.shape
359
360			# Assert equivalent dimensionalities
361	1		if not DX == DZ:
362			raise ValueError('Dimensionalities of X and Z should be equal.')
363
364			# Find importance-weights
365	1		if self.iwe == 'lr':
366	1		w = self.iwe_logistic_discrimination(X, Z)
367			elif self.iwe == 'rg':
368			w = self.iwe_ratio_gaussians(X, Z)
369			elif self.iwe == 'nn':
370			w = self.iwe_nearest_neighbours(X, Z)
371			elif self.iwe == 'kde':
372			w = self.iwe_kernel_densities(X, Z)
373			elif self.iwe == 'kmm':
374			w = self.iwe_kernel_mean_matching(X, Z)
375			else:
376			raise NotImplementedError('Estimator not implemented.')
377
378			# Train a weighted classifier
379	1		if self.loss == 'logistic':
380			# Logistic regression model with sample weights
381	1		self.clf.fit(X, y, w)
382			elif self.loss == 'quadratic':
383			# Least-squares model with sample weights
384			self.clf.fit(X, y, w)
385			elif self.loss == 'hinge':
386			# Linear support vector machine with sample weights
387			self.clf.fit(X, y, w)
388			else:
389			# Other loss functions are not implemented
390			raise NotImplementedError('Loss function not implemented.')
391
392			# Mark classifier as trained
393	1		self.is_trained = True
394
395			# Store training data dimensionality
396	1		self.train_data_dim = DX
397
398	1		def predict(self, Z):
399			"""
400			Make predictions on new dataset.
401
402			Parameters
403			----------
404			Z : array
405			new data set (M samples by D features)
406
407			Returns
408			-------
409			preds : array
410			label predictions (M samples by 1)
411
412			"""
413			# Data shape
414	1		M, D = Z.shape
415
416			# If classifier is trained, check for same dimensionality
417	1		if self.is_trained:
418	1		if not self.train_data_dim == D:
419			raise ValueError('''Test data is of different dimensionality
420			than training data.''')
421
422			# Call scikit's predict function
423	1		preds = self.clf.predict(Z)
424
425			# For quadratic loss function, correct predictions
426	1		if self.loss == 'quadratic':
427			preds = (np.sign(preds)+1)/2.
428
429			# Return predictions array
430	1		return preds
431
432	1		def get_params(self):
433			"""Get classifier parameters."""
434			return self.clf.get_params()
435
436	1		def is_trained(self):
437			"""Check whether classifier is trained."""
438			return self.is_trained
439

wmkouw / libTLDA

Push — master ( f3d068...88fa67 )

ImportanceWeightedClassifier B

Complexity

Size/Duplication

Test Coverage

Importance

10 Methods

How to fix Duplicated Code Complexity

Duplicated Code

Complex Class

Duplication Side-by-Side

Filter issues like