tpe_sampler_example - Code Metrics - Inspection of "[ENH] `optuna` optimizer interface (#155)" - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( c241e4...b050e9 )

by Simon

created 2025-08-16 16:30 UTC

tpe_sampler_example A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	101
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	1
eloc	39
dl	0
loc	101
rs	10
c	0
b	0
f	0

1 Function

Rating	Name	Duplication	Size	Complexity
A	main()	0	72	1

"""
TPESampler Example - Tree-structured Parzen Estimator

The TPESampler is Optuna's default and most popular Bayesian optimization algorithm.
It uses a Tree-structured Parzen Estimator to model the relationship between
hyperparameters and objective values, making it efficient at finding optimal regions.

Characteristics:
- Bayesian optimization approach
- Good balance of exploration vs exploitation
- Works well with mixed parameter types (continuous, discrete, categorical)
- Efficient for moderate-dimensional problems
- Default choice for most hyperparameter optimization tasks
"""

import numpy as np
from sklearn.datasets import load_wine
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

from hyperactive.experiment.integrations import SklearnCvExperiment
from hyperactive.opt.optuna import TPESampler


def main():
    # === TPESampler Example ===
    # Tree-structured Parzen Estimator - Bayesian Optimization

    # Load dataset
    X, y = load_wine(return_X_y=True)
    print(f"Dataset: Wine classification ({X.shape[0]} samples, {X.shape[1]} features)")

    # Create experiment
    estimator = RandomForestClassifier(random_state=42)
    experiment = SklearnCvExperiment(estimator=estimator, X=X, y=y, cv=3)

    # Define search space
    param_space = {
        "n_estimators": (10, 200),           # Continuous integer
        "max_depth": (1, 20),                # Continuous integer
        "min_samples_split": (2, 20),        # Continuous integer
        "min_samples_leaf": (1, 10),         # Continuous integer
        "max_features": ["sqrt", "log2", None],  # Categorical
        "bootstrap": [True, False],          # Categorical boolean
    }

    # Search Space:
    # for param, space in param_space.items():
    #   print(f"  {param}: {space}")

    # Configure TPESampler with warm start
    warm_start_points = [
        {"n_estimators": 100, "max_depth": 10, "min_samples_split": 2,
         "min_samples_leaf": 1, "max_features": "sqrt", "bootstrap": True}
    ]

    optimizer = TPESampler(
        param_space=param_space,
        n_trials=50,
        random_state=42,
        initialize={"warm_start": warm_start_points},
        experiment=experiment,
        n_startup_trials=10,  # Random trials before TPE kicks in
        n_ei_candidates=24    # Number of candidates for expected improvement
    )

    # TPESampler Configuration:
    # n_trials: configured above
    # n_startup_trials: random exploration phase
    # n_ei_candidates: number of expected improvement candidates
    # warm_start: initial point(s) provided

    # Run optimization
    # Running optimization...
    best_params = optimizer.run()

    # Results
    print("\n=== Results ===")
    print(f"Best parameters: {best_params}")
    print(f"Best score: {optimizer.best_score_:.4f}")
    print()

    # TPE Behavior Analysis:
    # - First 10 trials: Random exploration (n_startup_trials)
    # - Trials 11-50: TPE-guided exploration based on past results
    # - TPE builds probabilistic models of good vs bad parameter regions
    # - Balances exploration of uncertain areas with exploitation of promising regions

    # Parameter Space Exploration:
    # TPESampler effectively explores the joint parameter space by:
    # 1. Modeling P(x|y) - probability of parameters given objective values
    # 2. Using separate models for 'good' and 'bad' performing regions
    # 3. Selecting next points to maximize expected improvement
    # 4. Handling mixed parameter types (continuous, discrete, categorical)

    return best_params, optimizer.best_score_


if __name__ == "__main__":
    best_params, best_score = main()


1			"""
2			TPESampler Example - Tree-structured Parzen Estimator
3
4			The TPESampler is Optuna's default and most popular Bayesian optimization algorithm.
5			It uses a Tree-structured Parzen Estimator to model the relationship between
6			hyperparameters and objective values, making it efficient at finding optimal regions.
7
8			Characteristics:
9			- Bayesian optimization approach
10			- Good balance of exploration vs exploitation
11			- Works well with mixed parameter types (continuous, discrete, categorical)
12			- Efficient for moderate-dimensional problems
13			- Default choice for most hyperparameter optimization tasks
14			"""
15
16			import numpy as np
17			from sklearn.datasets import load_wine
18			from sklearn.ensemble import RandomForestClassifier
19			from sklearn.model_selection import cross_val_score
20
21			from hyperactive.experiment.integrations import SklearnCvExperiment
22			from hyperactive.opt.optuna import TPESampler
23
24
25			def main():
26			# === TPESampler Example ===
27			# Tree-structured Parzen Estimator - Bayesian Optimization
28
29			# Load dataset
30			X, y = load_wine(return_X_y=True)
31			print(f"Dataset: Wine classification ({X.shape[0]} samples, {X.shape[1]} features)")
32
33			# Create experiment
34			estimator = RandomForestClassifier(random_state=42)
35			experiment = SklearnCvExperiment(estimator=estimator, X=X, y=y, cv=3)
36
37			# Define search space
38			param_space = {
39			"n_estimators": (10, 200), # Continuous integer
40			"max_depth": (1, 20), # Continuous integer
41			"min_samples_split": (2, 20), # Continuous integer
42			"min_samples_leaf": (1, 10), # Continuous integer
43			"max_features": ["sqrt", "log2", None], # Categorical
44			"bootstrap": [True, False], # Categorical boolean
45			}
46
47			# Search Space:
48			# for param, space in param_space.items():
49			# print(f" {param}: {space}")
50
51			# Configure TPESampler with warm start
52			warm_start_points = [
53			{"n_estimators": 100, "max_depth": 10, "min_samples_split": 2,
54			"min_samples_leaf": 1, "max_features": "sqrt", "bootstrap": True}
55			]
56
57			optimizer = TPESampler(
58			param_space=param_space,
59			n_trials=50,
60			random_state=42,
61			initialize={"warm_start": warm_start_points},
62			experiment=experiment,
63			n_startup_trials=10, # Random trials before TPE kicks in
64			n_ei_candidates=24 # Number of candidates for expected improvement
65			)
66
67			# TPESampler Configuration:
68			# n_trials: configured above
69			# n_startup_trials: random exploration phase
70			# n_ei_candidates: number of expected improvement candidates
71			# warm_start: initial point(s) provided
72
73			# Run optimization
74			# Running optimization...
75			best_params = optimizer.run()
76
77			# Results
78			print("\n=== Results ===")
79			print(f"Best parameters: {best_params}")
80			print(f"Best score: {optimizer.best_score_:.4f}")
81			print()
82
83			# TPE Behavior Analysis:
84			# - First 10 trials: Random exploration (n_startup_trials)
85			# - Trials 11-50: TPE-guided exploration based on past results
86			# - TPE builds probabilistic models of good vs bad parameter regions
87			# - Balances exploration of uncertain areas with exploitation of promising regions
88
89			# Parameter Space Exploration:
90			# TPESampler effectively explores the joint parameter space by:
91			# 1. Modeling P(x\|y) - probability of parameters given objective values
92			# 2. Using separate models for 'good' and 'bad' performing regions
93			# 3. Selecting next points to maximize expected improvement
94			# 4. Handling mixed parameter types (continuous, discrete, categorical)
95
96			return best_params, optimizer.best_score_
97
98
99			if __name__ == "__main__":
100			best_params, best_score = main()
101

SimonBlanke / Hyperactive

Push — master ( c241e4...b050e9 )

tpe_sampler_example A

Complexity

Size/Duplication

Importance

1 Function

Duplication Side-by-Side

Filter issues like