Passed
Push — master ( c241e4...b050e9 )
by Simon
01:57
created

tpe_sampler_example   A

Complexity

Total Complexity 1

Size/Duplication

Total Lines 101
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 1
eloc 39
dl 0
loc 101
rs 10
c 0
b 0
f 0

1 Function

Rating   Name   Duplication   Size   Complexity  
A main() 0 72 1
1
"""
2
TPESampler Example - Tree-structured Parzen Estimator
3
4
The TPESampler is Optuna's default and most popular Bayesian optimization algorithm.
5
It uses a Tree-structured Parzen Estimator to model the relationship between
6
hyperparameters and objective values, making it efficient at finding optimal regions.
7
8
Characteristics:
9
- Bayesian optimization approach
10
- Good balance of exploration vs exploitation
11
- Works well with mixed parameter types (continuous, discrete, categorical)
12
- Efficient for moderate-dimensional problems
13
- Default choice for most hyperparameter optimization tasks
14
"""
15
16
import numpy as np
17
from sklearn.datasets import load_wine
18
from sklearn.ensemble import RandomForestClassifier
19
from sklearn.model_selection import cross_val_score
20
21
from hyperactive.experiment.integrations import SklearnCvExperiment
22
from hyperactive.opt.optuna import TPESampler
23
24
25
def main():
26
    # === TPESampler Example ===
27
    # Tree-structured Parzen Estimator - Bayesian Optimization
28
29
    # Load dataset
30
    X, y = load_wine(return_X_y=True)
31
    print(f"Dataset: Wine classification ({X.shape[0]} samples, {X.shape[1]} features)")
32
33
    # Create experiment
34
    estimator = RandomForestClassifier(random_state=42)
35
    experiment = SklearnCvExperiment(estimator=estimator, X=X, y=y, cv=3)
36
37
    # Define search space
38
    param_space = {
39
        "n_estimators": (10, 200),           # Continuous integer
40
        "max_depth": (1, 20),                # Continuous integer
41
        "min_samples_split": (2, 20),        # Continuous integer
42
        "min_samples_leaf": (1, 10),         # Continuous integer
43
        "max_features": ["sqrt", "log2", None],  # Categorical
44
        "bootstrap": [True, False],          # Categorical boolean
45
    }
46
47
    # Search Space:
48
    # for param, space in param_space.items():
49
    #   print(f"  {param}: {space}")
50
51
    # Configure TPESampler with warm start
52
    warm_start_points = [
53
        {"n_estimators": 100, "max_depth": 10, "min_samples_split": 2,
54
         "min_samples_leaf": 1, "max_features": "sqrt", "bootstrap": True}
55
    ]
56
57
    optimizer = TPESampler(
58
        param_space=param_space,
59
        n_trials=50,
60
        random_state=42,
61
        initialize={"warm_start": warm_start_points},
62
        experiment=experiment,
63
        n_startup_trials=10,  # Random trials before TPE kicks in
64
        n_ei_candidates=24    # Number of candidates for expected improvement
65
    )
66
67
    # TPESampler Configuration:
68
    # n_trials: configured above
69
    # n_startup_trials: random exploration phase
70
    # n_ei_candidates: number of expected improvement candidates
71
    # warm_start: initial point(s) provided
72
73
    # Run optimization
74
    # Running optimization...
75
    best_params = optimizer.run()
76
77
    # Results
78
    print("\n=== Results ===")
79
    print(f"Best parameters: {best_params}")
80
    print(f"Best score: {optimizer.best_score_:.4f}")
81
    print()
82
83
    # TPE Behavior Analysis:
84
    # - First 10 trials: Random exploration (n_startup_trials)
85
    # - Trials 11-50: TPE-guided exploration based on past results
86
    # - TPE builds probabilistic models of good vs bad parameter regions
87
    # - Balances exploration of uncertain areas with exploitation of promising regions
88
89
    # Parameter Space Exploration:
90
    # TPESampler effectively explores the joint parameter space by:
91
    # 1. Modeling P(x|y) - probability of parameters given objective values
92
    # 2. Using separate models for 'good' and 'bad' performing regions
93
    # 3. Selecting next points to maximize expected improvement
94
    # 4. Handling mixed parameter types (continuous, discrete, categorical)
95
96
    return best_params, optimizer.best_score_
97
98
99
if __name__ == "__main__":
100
    best_params, best_score = main()
101