Passed
Push — master ( 61a8e6...a7d091 )
by Simon
03:21
created

tests.test_packages   A

Complexity

Total Complexity 6

Size/Duplication

Total Lines 190
Duplicated Lines 26.32 %

Importance

Changes 0
Metric Value
wmc 6
eloc 111
dl 50
loc 190
rs 10
c 0
b 0
f 0

6 Functions

Rating   Name   Duplication   Size   Complexity  
A test_meta_learn() 25 25 1
A test_keras() 0 45 1
A test_xgboost() 0 15 1
A test_sklearn() 25 25 1
A test_catboost() 0 23 1
A test_lightgbm() 0 20 1

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
# Author: Simon Blanke
2
# Email: [email protected]
3
# License: MIT License
4
5
from sklearn.datasets import load_breast_cancer
6
7
from sklearn.model_selection import cross_val_score
8
from hyperactive import Hyperactive
9
10
data = load_breast_cancer()
11
X, y = data.data, data.target
12
13
14 View Code Duplication
def test_meta_learn():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
15
    from sklearn.tree import DecisionTreeClassifier
16
17
    def model(para, X_train, y_train):
18
        model = DecisionTreeClassifier(
19
            criterion=para["criterion"],
20
            max_depth=para["max_depth"],
21
            min_samples_split=para["min_samples_split"],
22
            min_samples_leaf=para["min_samples_leaf"],
23
        )
24
        scores = cross_val_score(model, X_train, y_train, cv=3)
25
26
        return scores.mean()
27
28
    search_config = {
29
        model: {
30
            "criterion": ["gini", "entropy"],
31
            "max_depth": range(1, 21),
32
            "min_samples_split": range(2, 21),
33
            "min_samples_leaf": range(1, 21),
34
        }
35
    }
36
37
    opt = Hyperactive(search_config, meta_learn=True)
38
    opt.search(X, y)
39
40
41 View Code Duplication
def test_sklearn():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
42
    from sklearn.tree import DecisionTreeClassifier
43
44
    def model(para, X_train, y_train):
45
        model = DecisionTreeClassifier(
46
            criterion=para["criterion"],
47
            max_depth=para["max_depth"],
48
            min_samples_split=para["min_samples_split"],
49
            min_samples_leaf=para["min_samples_leaf"],
50
        )
51
        scores = cross_val_score(model, X_train, y_train, cv=3)
52
53
        return scores.mean()
54
55
    search_config = {
56
        model: {
57
            "criterion": ["gini", "entropy"],
58
            "max_depth": range(1, 21),
59
            "min_samples_split": range(2, 21),
60
            "min_samples_leaf": range(1, 21),
61
        }
62
    }
63
64
    opt = Hyperactive(search_config)
65
    opt.search(X, y)
66
    # opt.predict(X)
67
    # opt.score(X, y)
68
69
70
def test_xgboost():
71
    from xgboost import XGBClassifier
72
73
    def model(para, X_train, y_train):
74
        model = XGBClassifier(
75
            n_estimators=para["n_estimators"], max_depth=para["max_depth"]
76
        )
77
        scores = cross_val_score(model, X_train, y_train, cv=3)
78
79
        return scores.mean()
80
81
    search_config = {model: {"n_estimators": range(2, 20), "max_depth": range(1, 11)}}
82
83
    opt = Hyperactive(search_config)
84
    opt.search(X, y)
85
    # opt.predict(X)
86
    # opt.score(X, y)
87
88
89
def test_lightgbm():
90
    from lightgbm import LGBMClassifier
91
92
    def model(para, X_train, y_train):
93
        model = LGBMClassifier(
94
            num_leaves=para["num_leaves"], learning_rate=para["learning_rate"]
95
        )
96
        scores = cross_val_score(model, X_train, y_train, cv=3)
97
98
        return scores.mean()
99
100
    search_config = {
101
        model: {
102
            "num_leaves": range(2, 20),
103
            "learning_rate": [0.001, 0.005, 00.01, 0.05, 0.1, 0.5, 1],
104
        }
105
    }
106
107
    opt = Hyperactive(search_config)
108
    opt.search(X, y)
109
    # opt.predict(X)
110
    # opt.score(X, y)
111
112
113
def test_catboost():
114
    from catboost import CatBoostClassifier
115
116
    def model(para, X_train, y_train):
117
        model = CatBoostClassifier(
118
            iterations=para["iterations"],
119
            depth=para["depth"],
120
            learning_rate=para["learning_rate"],
121
        )
122
        scores = cross_val_score(model, X_train, y_train, cv=3)
123
124
        return scores.mean()
125
126
    search_config = {
127
        model: {
128
            "iterations": [1],
129
            "depth": range(2, 10),
130
            "learning_rate": [0.001, 0.005, 00.01, 0.05, 0.1, 0.5, 1],
131
        }
132
    }
133
134
    opt = Hyperactive(search_config)
135
    opt.search(X, y)
136
    # opt.predict(X)
137
    # opt.score(X, y)
138
139
140
def test_keras():
141
    from keras.models import Sequential
142
    from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten
143
    from keras.datasets import cifar10
144
    from keras.utils import to_categorical
145
146
    (X_train, y_train), (X_test, y_test) = cifar10.load_data()
147
148
    X_train = X_train[0:1000]
149
    y_train = y_train[0:1000]
150
151
    X_test = X_train[0:1000]
152
    y_test = y_train[0:1000]
153
154
    y_train = to_categorical(y_train, 10)
155
    y_test = to_categorical(y_test, 10)
156
157
    def cnn(para, X_train, y_train):
158
        model = Sequential()
159
160
        model.add(
161
            Conv2D(
162
                filters=para["filters.0"],
163
                kernel_size=para["kernel_size.0"],
164
                activation="relu",
165
            )
166
        )
167
        model.add(MaxPooling2D(pool_size=(2, 2)))
168
169
        model.add(Flatten())
170
        model.add(Dense(10, activation="softmax"))
171
172
        model.compile(
173
            optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
174
        )
175
        model.fit(X_train, y_train, epochs=1)
176
177
        loss, score = model.evaluate(x=X_test, y=y_test)
178
179
        return score
180
181
    search_config = {cnn: {"filters.0": [32, 64], "kernel_size.0": [3, 4]}}
182
183
    opt = Hyperactive(search_config)
184
    opt.search(X_train, y_train)
185
    # opt.predict(X)
186
    # opt.score(X, y)
187
188
189
"""
190
def test_pytorch():
191
    import torch
192
    import torchvision
193
    import torchvision.transforms as transforms
194
    import torch.nn as nn
195
    import torch.nn.functional as F
196
    import torch.optim as optim
197
198
    transform = transforms.Compose(
199
        [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
200
    )
201
202
    trainset = torchvision.datasets.CIFAR10(
203
        root="./data", train=True, download=True, transform=transform
204
    )
205
206
    def cnn(para, X_train, y_train):
207
        class Net(nn.Module):
208
            def __init__(self):
209
                super(Net, self).__init__()
210
                self.conv1 = nn.Conv2d(3, 6, 5)
211
                self.pool = nn.MaxPool2d(2, 2)
212
                self.conv2 = nn.Conv2d(6, 16, 5)
213
                self.fc1 = nn.Linear(16 * 5 * 5, 120)
214
                self.fc2 = nn.Linear(120, 84)
215
                self.fc3 = nn.Linear(84, 10)
216
217
            def forward(self, x):
218
                x = self.pool(F.relu(self.conv1(x)))
219
                x = self.pool(F.relu(self.conv2(x)))
220
                x = x.view(-1, 16 * 5 * 5)
221
                x = F.relu(self.fc1(x))
222
                x = F.relu(self.fc2(x))
223
                x = self.fc3(x)
224
                return x
225
226
        trainloader = torch.utils.data.DataLoader(
227
            trainset, batch_size=4, shuffle=True, num_workers=2
228
        )
229
230
        net = Net()
231
232
        criterion = nn.CrossEntropyLoss()
233
        optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
234
235
        for epoch in range(1):  # loop over the dataset multiple times
236
237
            running_loss = 0.0
238
            for i, data in enumerate(trainloader, 0):
239
                # get the inputs; data is a list of [inputs, labels]
240
                inputs, labels = data
241
242
                # zero the parameter gradients
243
                optimizer.zero_grad()
244
245
                # forward + backward + optimize
246
                outputs = net(inputs)
247
                loss = criterion(outputs, labels)
248
                loss.backward()
249
                optimizer.step()
250
251
                # print statistics
252
                running_loss += loss.item()
253
                if i % 2000 == 1999:  # print every 2000 mini-batches
254
                    print(
255
                        "[%d, %5d] loss: %.3f" % (epoch + 1, i + 1, running_loss / 2000)
256
                    )
257
                    running_loss = 0.0
258
259
        return running_loss
260
261
    search_config = {cnn: {"filters.0": [32, 64], "kernel_size.0": [3, 4]}}
262
263
    opt = Hyperactive(search_config)
264
    opt.search(None, None)
265
    # opt.predict(X)
266
    # opt.score(X, y)
267
268
269
270
def test_chainer():
271
    def cnn(para, X_train, y_train):
272
        pass
273
274
    search_config = {cnn: {"filters.0": [32, 64], "kernel_size.0": [3, 4]}}
275
    opt = Hyperactive(search_config)
276
    opt.search(None, None)
277
    # opt.predict(X)
278
    # opt.score(X, y)
279
"""
280