Conditions | 1 |
Total Lines | 72 |
Code Lines | 29 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
1 | """ |
||
25 | def main(): |
||
26 | # === TPESampler Example === |
||
27 | # Tree-structured Parzen Estimator - Bayesian Optimization |
||
28 | |||
29 | # Load dataset |
||
30 | X, y = load_wine(return_X_y=True) |
||
31 | print(f"Dataset: Wine classification ({X.shape[0]} samples, {X.shape[1]} features)") |
||
32 | |||
33 | # Create experiment |
||
34 | estimator = RandomForestClassifier(random_state=42) |
||
35 | experiment = SklearnCvExperiment(estimator=estimator, X=X, y=y, cv=3) |
||
36 | |||
37 | # Define search space |
||
38 | param_space = { |
||
39 | "n_estimators": (10, 200), # Continuous integer |
||
40 | "max_depth": (1, 20), # Continuous integer |
||
41 | "min_samples_split": (2, 20), # Continuous integer |
||
42 | "min_samples_leaf": (1, 10), # Continuous integer |
||
43 | "max_features": ["sqrt", "log2", None], # Categorical |
||
44 | "bootstrap": [True, False], # Categorical boolean |
||
45 | } |
||
46 | |||
47 | # Search Space: |
||
48 | # for param, space in param_space.items(): |
||
49 | # print(f" {param}: {space}") |
||
50 | |||
51 | # Configure TPESampler with warm start |
||
52 | warm_start_points = [ |
||
53 | {"n_estimators": 100, "max_depth": 10, "min_samples_split": 2, |
||
54 | "min_samples_leaf": 1, "max_features": "sqrt", "bootstrap": True} |
||
55 | ] |
||
56 | |||
57 | optimizer = TPESampler( |
||
58 | param_space=param_space, |
||
59 | n_trials=50, |
||
60 | random_state=42, |
||
61 | initialize={"warm_start": warm_start_points}, |
||
62 | experiment=experiment, |
||
63 | n_startup_trials=10, # Random trials before TPE kicks in |
||
64 | n_ei_candidates=24 # Number of candidates for expected improvement |
||
65 | ) |
||
66 | |||
67 | # TPESampler Configuration: |
||
68 | # n_trials: configured above |
||
69 | # n_startup_trials: random exploration phase |
||
70 | # n_ei_candidates: number of expected improvement candidates |
||
71 | # warm_start: initial point(s) provided |
||
72 | |||
73 | # Run optimization |
||
74 | # Running optimization... |
||
75 | best_params = optimizer.run() |
||
76 | |||
77 | # Results |
||
78 | print("\n=== Results ===") |
||
79 | print(f"Best parameters: {best_params}") |
||
80 | print(f"Best score: {optimizer.best_score_:.4f}") |
||
81 | print() |
||
82 | |||
83 | # TPE Behavior Analysis: |
||
84 | # - First 10 trials: Random exploration (n_startup_trials) |
||
85 | # - Trials 11-50: TPE-guided exploration based on past results |
||
86 | # - TPE builds probabilistic models of good vs bad parameter regions |
||
87 | # - Balances exploration of uncertain areas with exploitation of promising regions |
||
88 | |||
89 | # Parameter Space Exploration: |
||
90 | # TPESampler effectively explores the joint parameter space by: |
||
91 | # 1. Modeling P(x|y) - probability of parameters given objective values |
||
92 | # 2. Using separate models for 'good' and 'bad' performing regions |
||
93 | # 3. Selecting next points to maximize expected improvement |
||
94 | # 4. Handling mixed parameter types (continuous, discrete, categorical) |
||
95 | |||
96 | return best_params, optimizer.best_score_ |
||
97 | |||
101 |