Orange.regression.LinearRegressionLearner.__init__() - Code Metrics - Inspection of "Merge branch 'develop'. Builds on travis." - hugobuddel/orange3 - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 5890d9...97637f )

by Hugo

created 2015-12-02 15:20 UTC

init() A

↳ Parent: Orange.regression.LinearRegressionLearner

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
cc	1
dl	0
loc	5
rs	9.4286

import numpy as np
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
from scipy.optimize import fmin_l_bfgs_b
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from Orange.regression import Learner, Model
from Orange.preprocess import Normalize, Continuize, Impute, RemoveNaNColumns

__all__ = ["LinearRegressionLearner"]


class LinearRegressionLearner(Learner):
    '''L2 regularized linear regression (a.k.a Ridge regression)

    This model uses the L-BFGS algorithm to minimize the linear least
    squares penalty with L2 regularization. When using this model you
    should:

    - Choose a suitable regularization parameter lambda_
    - Consider appending a column of ones to the dataset (intercept term)

    Parameters
    ----------

    lambda\_ : float, optional (default=1.0)
        Regularization parameter. It controls trade-off between fitting the
        data and keeping parameters small. Higher values of lambda\_ force
        parameters to be smaller.

    preprocessors : list, optional (default="[Normalize(), Continuize(), Impute(), RemoveNaNColumns()])
        Preprocessors are applied to data before training or testing. Default preprocessors
        - transform the dataset so that the columns are on a similar scale,
        - continuize all discrete attributes,
        - remove columns with all values as NaN
        - replace NaN values with suitable values

    fmin_args : dict, optional
        Parameters for L-BFGS algorithm.
    """

    Examples
    --------

        import numpy as np
        from Orange.data import Table
        from Orange.regression.linear_bfgs import LinearRegressionLearner

        data = Table('housing')
        data.X = np.hstack((data.X, np.ones((data.X.shape[0], 1)))) # append ones
        m = LinearRegressionLearner(lambda_=1.0)
        c = m(data) # fit
        print(c(data)) # predict
    '''
    name = 'linear_bfgs'
    preprocessors = [Normalize(),
                     Continuize(),
                     Impute(),
                     RemoveNaNColumns()]

    def __init__(self, lambda_=1.0, preprocessors=None, **fmin_args):
param = 5

class Foo:
    def __init__(self, param):   # "param" would be flagged here
        self.param = param

        super().__init__(preprocessors=preprocessors)
        self.lambda_ = lambda_
        self.fmin_args = fmin_args

    def cost_grad(self, theta, X, y):
        t = X.dot(theta) - y

        cost = t.dot(t)
        cost += self.lambda_ * theta.dot(theta)
        cost /= 2.0 * X.shape[0]

        grad = X.T.dot(t)
        grad += self.lambda_ * theta
        grad /= X.shape[0]

        return cost, grad

    def fit(self, X, Y, W):

        if len(Y.shape) > 1 and Y.shape[1] > 1:
            raise ValueError('Linear regression does not support '
                             'multi-target classification')

        if np.isnan(np.sum(X)) or np.isnan(np.sum(Y)):
            raise ValueError('Linear regression does not support '
                             'unknown values')

        theta = np.zeros(X.shape[1])
        theta, cost, ret = fmin_l_bfgs_b(self.cost_grad, theta,

                                         args=(X, Y.ravel()), **self.fmin_args)

        return LinearRegressionModel(theta)


class LinearRegressionModel(Model):
    def __init__(self, theta):
class SomeParent:
    def __init__(self):
        self.x = 1

class SomeChild(SomeParent):
    def __init__(self):
        # Initialize the super class
        SomeParent.__init__(self)
        self.theta = theta

    def predict(self, X):
        return X.dot(self.theta)


if __name__ == '__main__':
    import Orange.data
    import sklearn.cross_validation as skl_cross_validation
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
    

    np.random.seed(42)

    def numerical_grad(f, params, e=1e-4):

        grad = np.zeros_like(params)
        perturb = np.zeros_like(params)
        for i in range(params.size):
            perturb[i] = e
            j1 = f(params - perturb)
            j2 = f(params + perturb)
            grad[i] = (j2 - j1) / (2.0 * e)
            perturb[i] = 0
        return grad

    d = Orange.data.Table('housing')
    d.X = np.hstack((d.X, np.ones((d.X.shape[0], 1))))
    d.shuffle()

#    m = LinearRegressionLearner(lambda_=1.0)
#    print(m(d)(d))

#    # gradient check
#    m = LinearRegressionLearner(lambda_=1.0)
#    theta = np.random.randn(d.X.shape[1])
#
#    ga = m.cost_grad(theta, d.X, d.Y.ravel())[1]
#    gm = numerical_grad(lambda t: m.cost_grad(t, d.X, d.Y.ravel())[0], theta)
#
#    print(np.sum((ga - gm)**2))

    for lambda_ in (0.01, 0.03, 0.1, 0.3, 1, 3):
        m = LinearRegressionLearner(lambda_=lambda_)
        scores = []
        for tr_ind, te_ind in skl_cross_validation.KFold(d.X.shape[0]):
            s = np.mean((m(d[tr_ind])(d[te_ind]) - d[te_ind].Y.ravel())**2)
            scores.append(s)
        print('{:5.2f} {}'.format(lambda_, np.mean(scores)))

    m = LinearRegressionLearner(lambda_=0)
    print('test data', np.mean((m(d)(d) - d.Y.ravel())**2))
    print('majority', np.mean((np.mean(d.Y.ravel()) - d.Y.ravel())**2))


1			import numpy as np
			0 ignored issues – show Configuration introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report The import `numpy` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
2			from scipy.optimize import fmin_l_bfgs_b
			0 ignored issues – show Configuration introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report The import `scipy.optimize` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
3
4			from Orange.regression import Learner, Model
5			from Orange.preprocess import Normalize, Continuize, Impute, RemoveNaNColumns
6
7			__all__ = ["LinearRegressionLearner"]
8
9
10			class LinearRegressionLearner(Learner):
11			'''L2 regularized linear regression (a.k.a Ridge regression)
12
13			This model uses the L-BFGS algorithm to minimize the linear least
14			squares penalty with L2 regularization. When using this model you
15			should:
16
17			- Choose a suitable regularization parameter lambda_
18			- Consider appending a column of ones to the dataset (intercept term)
19
20			Parameters
21			----------
22
23			lambda\_ : float, optional (default=1.0)
24			Regularization parameter. It controls trade-off between fitting the
25			data and keeping parameters small. Higher values of lambda\_ force
26			parameters to be smaller.
27
28			preprocessors : list, optional (default="[Normalize(), Continuize(), Impute(), RemoveNaNColumns()])
29			Preprocessors are applied to data before training or testing. Default preprocessors
30			- transform the dataset so that the columns are on a similar scale,
31			- continuize all discrete attributes,
32			- remove columns with all values as NaN
33			- replace NaN values with suitable values
34
35			fmin_args : dict, optional
36			Parameters for L-BFGS algorithm.
37			"""
38
39			Examples
40			--------
41
42			import numpy as np
43			from Orange.data import Table
44			from Orange.regression.linear_bfgs import LinearRegressionLearner
45
46			data = Table('housing')
47			data.X = np.hstack((data.X, np.ones((data.X.shape[0], 1)))) # append ones
48			m = LinearRegressionLearner(lambda_=1.0)
49			c = m(data) # fit
50			print(c(data)) # predict
51			'''
52			name = 'linear_bfgs'
53			preprocessors = [Normalize(),
54			Continuize(),
55			Impute(),
56			RemoveNaNColumns()]
57
58			def __init__(self, lambda_=1.0, preprocessors=None, **fmin_args):
			0 ignored issues – show Comprehensibility Bug introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report `lambda_` is re-defining a name which is already available in the outer-scope (previously defined on line `134`). It is generally a bad practice to shadow variables from the outer-scope. In most cases, this is done unintentionally and might lead to unexpected behavior: param = 5 class Foo: def __init__(self, param): # "param" would be flagged here self.param = param Loading history...
59
60			super().__init__(preprocessors=preprocessors)
61			self.lambda_ = lambda_
62			self.fmin_args = fmin_args
63
64			def cost_grad(self, theta, X, y):
65			t = X.dot(theta) - y
66
67			cost = t.dot(t)
68			cost += self.lambda_ * theta.dot(theta)
69			cost /= 2.0 * X.shape[0]
70
71			grad = X.T.dot(t)
72			grad += self.lambda_ * theta
73			grad /= X.shape[0]
74
75			return cost, grad
76
77			def fit(self, X, Y, W):
			0 ignored issues – show Bug Best Practice introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report Signature differs from overridden 'fit' method It is generally a good practice to use signatures that are compatible with the Liskov substitution principle. This allows to pass instances of the child class anywhere where the instances of the super-class/interface would be acceptable. Loading history...
78			if len(Y.shape) > 1 and Y.shape[1] > 1:
79			raise ValueError('Linear regression does not support '
80			'multi-target classification')
81
82			if np.isnan(np.sum(X)) or np.isnan(np.sum(Y)):
83			raise ValueError('Linear regression does not support '
84			'unknown values')
85
86			theta = np.zeros(X.shape[1])
87			theta, cost, ret = fmin_l_bfgs_b(self.cost_grad, theta,
			0 ignored issues – show Unused Code introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report The variable `ret` seems to be unused. Loading history... Unused Code introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report The variable `cost` seems to be unused. Loading history...
88			args=(X, Y.ravel()), **self.fmin_args)
89
90			return LinearRegressionModel(theta)
91
92
93			class LinearRegressionModel(Model):
94			def __init__(self, theta):
			0 ignored issues – show Bug introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report The `__init__` method of the super-class `ModelRegression` is not called. It is generally advisable to initialize the super-class by calling its `__init__` method: class SomeParent: def __init__(self): self.x = 1 class SomeChild(SomeParent): def __init__(self): # Initialize the super class SomeParent.__init__(self) Loading history...
95			self.theta = theta
96
97			def predict(self, X):
98			return X.dot(self.theta)
99
100
101			if __name__ == '__main__':
102			import Orange.data
103			import sklearn.cross_validation as skl_cross_validation
			0 ignored issues – show Configuration introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report The import `sklearn.cross_validation` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
104
			0 ignored issues – show Coding Style introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
105			np.random.seed(42)
106
107			def numerical_grad(f, params, e=1e-4):
			0 ignored issues – show Duplication introduced 2015-12-02 09:15 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
108			grad = np.zeros_like(params)
109			perturb = np.zeros_like(params)
110			for i in range(params.size):
111			perturb[i] = e
112			j1 = f(params - perturb)
113			j2 = f(params + perturb)
114			grad[i] = (j2 - j1) / (2.0 * e)
115			perturb[i] = 0
116			return grad
117
118			d = Orange.data.Table('housing')
119			d.X = np.hstack((d.X, np.ones((d.X.shape[0], 1))))
120			d.shuffle()
121
122			# m = LinearRegressionLearner(lambda_=1.0)
123			# print(m(d)(d))
124
125			# # gradient check
126			# m = LinearRegressionLearner(lambda_=1.0)
127			# theta = np.random.randn(d.X.shape[1])
128			#
129			# ga = m.cost_grad(theta, d.X, d.Y.ravel())[1]
130			# gm = numerical_grad(lambda t: m.cost_grad(t, d.X, d.Y.ravel())[0], theta)
131			#
132			# print(np.sum((ga - gm)**2))
133
134			for lambda_ in (0.01, 0.03, 0.1, 0.3, 1, 3):
135			m = LinearRegressionLearner(lambda_=lambda_)
136			scores = []
137			for tr_ind, te_ind in skl_cross_validation.KFold(d.X.shape[0]):
138			s = np.mean((m(d[tr_ind])(d[te_ind]) - d[te_ind].Y.ravel())**2)
139			scores.append(s)
140			print('{:5.2f} {}'.format(lambda_, np.mean(scores)))
141
142			m = LinearRegressionLearner(lambda_=0)
143			print('test data', np.mean((m(d)(d) - d.Y.ravel())**2))
144			print('majority', np.mean((np.mean(d.Y.ravel()) - d.Y.ravel())**2))
145

GitHub Access Token became invalid

Push — master ( 5890d9...97637f )

init() A

Complexity

Size

Duplication

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

hugobuddel / orange3

GitHub Access Token became invalid

Push — master ( 5890d9...97637f )

__init__() A

Complexity

Size

Duplication

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

init() A

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files