feature_selection.get_feature_indices() - Code Metrics - Inspection of "add doc to example for meta data collection" - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 36b6ae...3d094b )

by Simon

created 2021-01-28 16:44 UTC

feature_selection.get_feature_indices() A

↳ Parent: feature_selection

Complexity

Conditions

Size

Total Lines	12
Code Lines	10

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	4
eloc	10
nop	1
dl	0
loc	12
rs	9.9
c	0
b	0
f	0

"""
This example shows how to select the best features for a model 
and dataset. 

The boston dataset has 13 features, therefore we have 13 search space 
dimensions for the feature selection.

The function "get_feature_indices" returns the list of features that
where selected. This can be used to select the subset of features in "x_new".
"""

import numpy as np
import itertools
from sklearn.datasets import load_boston
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsRegressor
from hyperactive import Hyperactive, EvolutionStrategyOptimizer

data = load_boston()
X, y = data.data, data.target


# helper function that returns the selected training data features by index
def get_feature_indices(opt):
    feature_indices = []
    for key in opt.keys():
        if "feature" not in key:
            continue
        if opt[key] is False:
            continue

        nth_feature = int(key.rsplit(".", 1)[1])
        feature_indices.append(nth_feature)

    return feature_indices


def model(opt):
    feature_indices = get_feature_indices(opt)
    if len(feature_indices) == 0:
        return 0

    feature_idx_list = [idx for idx in feature_indices if idx is not None]
    x_new = X[:, feature_idx_list]

    knr = KNeighborsRegressor(n_neighbors=opt["n_neighbors"])
    scores = cross_val_score(knr, x_new, y, cv=5)
    score = scores.mean()

    return score


# each feature is used for training (True) or not used for training (False)
search_space = {
    "n_neighbors": list(range(1, 100)),
    "feature.0": [True, False],
    "feature.1": [True, False],
    "feature.2": [True, False],
    "feature.3": [True, False],
    "feature.4": [True, False],
    "feature.5": [True, False],
    "feature.6": [True, False],
    "feature.7": [True, False],
    "feature.8": [True, False],
    "feature.9": [True, False],
    "feature.10": [True, False],
    "feature.11": [True, False],
    "feature.12": [True, False],
}


optimizer = EvolutionStrategyOptimizer(rand_rest_p=0.20)

hyper = Hyperactive()
hyper.add_search(
    model,
    search_space,
    n_iter=200,
    initialize={"random": 15},
    optimizer=optimizer,
)
hyper.run()


1			"""
2			This example shows how to select the best features for a model
3			and dataset.
4
5			The boston dataset has 13 features, therefore we have 13 search space
6			dimensions for the feature selection.
7
8			The function "get_feature_indices" returns the list of features that
9			where selected. This can be used to select the subset of features in "x_new".
10			"""
11
12			import numpy as np
13			import itertools
14			from sklearn.datasets import load_boston
15			from sklearn.model_selection import cross_val_score
16			from sklearn.neighbors import KNeighborsRegressor
17			from hyperactive import Hyperactive, EvolutionStrategyOptimizer
18
19			data = load_boston()
20			X, y = data.data, data.target
21
22
23			# helper function that returns the selected training data features by index
24			def get_feature_indices(opt):
25			feature_indices = []
26			for key in opt.keys():
27			if "feature" not in key:
28			continue
29			if opt[key] is False:
30			continue
31
32			nth_feature = int(key.rsplit(".", 1)[1])
33			feature_indices.append(nth_feature)
34
35			return feature_indices
36
37
38			def model(opt):
39			feature_indices = get_feature_indices(opt)
40			if len(feature_indices) == 0:
41			return 0
42
43			feature_idx_list = [idx for idx in feature_indices if idx is not None]
44			x_new = X[:, feature_idx_list]
45
46			knr = KNeighborsRegressor(n_neighbors=opt["n_neighbors"])
47			scores = cross_val_score(knr, x_new, y, cv=5)
48			score = scores.mean()
49
50			return score
51
52
53			# each feature is used for training (True) or not used for training (False)
54			search_space = {
55			"n_neighbors": list(range(1, 100)),
56			"feature.0": [True, False],
57			"feature.1": [True, False],
58			"feature.2": [True, False],
59			"feature.3": [True, False],
60			"feature.4": [True, False],
61			"feature.5": [True, False],
62			"feature.6": [True, False],
63			"feature.7": [True, False],
64			"feature.8": [True, False],
65			"feature.9": [True, False],
66			"feature.10": [True, False],
67			"feature.11": [True, False],
68			"feature.12": [True, False],
69			}
70
71
72			optimizer = EvolutionStrategyOptimizer(rand_rest_p=0.20)
73
74			hyper = Hyperactive()
75			hyper.add_search(
76			model,
77			search_space,
78			n_iter=200,
79			initialize={"random": 15},
80			optimizer=optimizer,
81			)
82			hyper.run()
83

SimonBlanke / Hyperactive

Push — master ( 36b6ae...3d094b )

feature_selection.get_feature_indices() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like