feature_selection - Code Metrics - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

feature_selection A
last analyzed 2025-08-17 14:42 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	82
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	51
dl	0
loc	82
rs	10
c	0
b	0
f	0
wmc	6

2 Functions

Rating	Name	Duplication	Size	Complexity
A	get_feature_indices()	0	12	4
A	model()	0	13	2

"""
This example shows how to select the best features for a model
and dataset.

The boston dataset has 13 features, therefore we have 13 search space
dimensions for the feature selection.

The function "get_feature_indices" returns the list of features that
where selected. This can be used to select the subset of features in "x_new".
"""

import numpy as np
import itertools
from sklearn.datasets import load_diabetes
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsRegressor
from hyperactive import Hyperactive
from hyperactive.optimizers import EvolutionStrategyOptimizer


data = load_diabetes()
X, y = data.data, data.target


# helper function that returns the selected training data features by index
def get_feature_indices(opt):
    feature_indices = []
    for key in opt.keys():
        if "feature" not in key:
            continue
        if opt[key] == 0:
            continue

        nth_feature = int(key.rsplit(".", 1)[1])
        feature_indices.append(nth_feature)

    return feature_indices


def model(opt):
    feature_indices = get_feature_indices(opt)
    if len(feature_indices) == 0:
        return 0

    feature_idx_list = [idx for idx in feature_indices if idx is not None]
    x_new = X[:, feature_idx_list]

    knr = KNeighborsRegressor(n_neighbors=opt["n_neighbors"])
    scores = cross_val_score(knr, x_new, y, cv=5)
    score = scores.mean()

    return score


# each feature is used for training (1) or not used for training (0)
search_space = {
    "n_neighbors": list(range(1, 100)),
    "feature.0": [1, 0],
    "feature.1": [1, 0],
    "feature.2": [1, 0],
    "feature.3": [1, 0],
    "feature.4": [1, 0],
    "feature.5": [1, 0],
    "feature.6": [1, 0],
    "feature.7": [1, 0],
    "feature.8": [1, 0],
    "feature.9": [1, 0],
}


optimizer = EvolutionStrategyOptimizer(rand_rest_p=0.20)

hyper = Hyperactive()
hyper.add_search(
    model,
    search_space,
    n_iter=200,
    initialize={"random": 15},
    optimizer=optimizer,
)
hyper.run()


1			"""
2			This example shows how to select the best features for a model
3			and dataset.
4
5			The boston dataset has 13 features, therefore we have 13 search space
6			dimensions for the feature selection.
7
8			The function "get_feature_indices" returns the list of features that
9			where selected. This can be used to select the subset of features in "x_new".
10			"""
11
12			import numpy as np
13			import itertools
14			from sklearn.datasets import load_diabetes
15			from sklearn.model_selection import cross_val_score
16			from sklearn.neighbors import KNeighborsRegressor
17			from hyperactive import Hyperactive
18			from hyperactive.optimizers import EvolutionStrategyOptimizer
19
20
21			data = load_diabetes()
22			X, y = data.data, data.target
23
24
25			# helper function that returns the selected training data features by index
26			def get_feature_indices(opt):
27			feature_indices = []
28			for key in opt.keys():
29			if "feature" not in key:
30			continue
31			if opt[key] == 0:
32			continue
33
34			nth_feature = int(key.rsplit(".", 1)[1])
35			feature_indices.append(nth_feature)
36
37			return feature_indices
38
39
40			def model(opt):
41			feature_indices = get_feature_indices(opt)
42			if len(feature_indices) == 0:
43			return 0
44
45			feature_idx_list = [idx for idx in feature_indices if idx is not None]
46			x_new = X[:, feature_idx_list]
47
48			knr = KNeighborsRegressor(n_neighbors=opt["n_neighbors"])
49			scores = cross_val_score(knr, x_new, y, cv=5)
50			score = scores.mean()
51
52			return score
53
54
55			# each feature is used for training (1) or not used for training (0)
56			search_space = {
57			"n_neighbors": list(range(1, 100)),
58			"feature.0": [1, 0],
59			"feature.1": [1, 0],
60			"feature.2": [1, 0],
61			"feature.3": [1, 0],
62			"feature.4": [1, 0],
63			"feature.5": [1, 0],
64			"feature.6": [1, 0],
65			"feature.7": [1, 0],
66			"feature.8": [1, 0],
67			"feature.9": [1, 0],
68			}
69
70
71			optimizer = EvolutionStrategyOptimizer(rand_rest_p=0.20)
72
73			hyper = Hyperactive()
74			hyper.add_search(
75			model,
76			search_space,
77			n_iter=200,
78			initialize={"random": 15},
79			optimizer=optimizer,
80			)
81			hyper.run()
82

SimonBlanke / Hyperactive

feature_selection A last analyzed 2025-08-17 14:42 UTC

Complexity

Size/Duplication

Importance

2 Functions

Duplication Side-by-Side

Filter issues like

feature_selection A
last analyzed 2025-08-17 14:42 UTC