random_sampler_example - Code Metrics - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

random_sampler_example A
last analyzed 2025-08-17 14:42 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	120
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	2
eloc	34
dl	0
loc	120
rs	10
c	0
b	0
f	0

2 Functions

Rating	Name	Duplication	Size	Complexity
A	main()	0	78	1
A	objective_function_analysis()	0	2	1

"""
RandomSampler Example - Random Search

The RandomSampler performs pure random sampling from the parameter space.
It serves as a baseline and is surprisingly effective for many problems,
especially when the parameter space is high-dimensional or when you have
limited computational budget.

Characteristics:
- No learning from previous trials
- Uniform sampling from parameter distributions
- Excellent baseline for comparison
- Works well in high-dimensional spaces
- Embarrassingly parallel
- Good when objective function is noisy
"""

import numpy as np
from sklearn.datasets import load_digits
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

from hyperactive.experiment.integrations import SklearnCvExperiment
from hyperactive.opt.optuna import RandomSampler


def objective_function_analysis():
    """Demonstrate when random sampling is effective."""
    # When Random Sampling Works Well:
    # 1. High-dimensional parameter spaces (curse of dimensionality)
    # 2. Noisy objective functions
    # 3. Limited computational budget
    # 4. As a baseline for comparison
    # 5. When parallel evaluation is important
    # 6. Uniform exploration is desired


def main():
    # === RandomSampler Example ===
    # Pure Random Search - Uniform Parameter Space Exploration

    objective_function_analysis()

    # Load dataset - using digits for a more challenging problem
    X, y = load_digits(return_X_y=True)
    print(f"Dataset: Handwritten digits ({X.shape[0]} samples, {X.shape[1]} features)")

    # Create experiment
    estimator = SVC(random_state=42)
    experiment = SklearnCvExperiment(estimator=estimator, X=X, y=y, cv=3)

    # Define search space - SVM hyperparameters
    param_space = {
        "C": (0.001, 1000),  # Regularization - log scale would be better
        "gamma": (1e-6, 1e2),  # RBF kernel parameter
        "kernel": ["rbf", "poly", "sigmoid"],  # Kernel type
        "degree": (2, 5),  # Polynomial degree (only for poly kernel)
        "coef0": (0.0, 10.0),  # Independent term (poly/sigmoid)
    }

    # Search Space:
    # for param, space in param_space.items():
    #   print(f"  {param}: {space}")

    # Configure RandomSampler
    optimizer = RandomSampler(
        param_space=param_space,
        n_trials=30,  # More trials to show random behavior
        random_state=42,  # For reproducible random sampling
        experiment=experiment,
    )

    # RandomSampler Configuration:
    # n_trials: configured above
    # random_state: set for reproducibility
    # No learning parameters - pure random sampling

    # Run optimization
    # Running random search...
    best_params = optimizer.solve()

    # Results
    print("\n=== Results ===")
    print(f"Best parameters: {best_params}")
    print(f"Best score: {optimizer.best_score_:.4f}")
    print()

    # Analysis of Random Sampling behavior:
    #  Each trial is independent - no learning from history
    #  Uniform coverage of parameter space
    #  No convergence issues or local optima concerns
    #  Embarrassingly parallel - can run trials simultaneously
    #  Works equally well for continuous, discrete, and categorical parameters

    # Comparison with Other Methods:
    # vs Grid Search:
    #   + Better coverage in high dimensions
    #   + More efficient for continuous parameters
    #   - No systematic coverage guarantee
    #
    # vs Bayesian Optimization (TPE, GP):
    #   + No assumptions about objective function smoothness
    #   + Works well with noisy objectives
    #   + No risk of model misspecification
    #   - No exploitation of promising regions
    #   - May waste trials on clearly bad regions

    # Practical Usage:
    #  Use as baseline to validate more sophisticated methods
    #  Good first choice when objective is very noisy
    #  Ideal for parallel optimization setups
    #  Consider for high-dimensional problems (>10 parameters)
    #  Use with log-uniform distributions for scale-sensitive parameters

    return best_params, optimizer.best_score_


if __name__ == "__main__":
    best_params, best_score = main()


1			"""
2			RandomSampler Example - Random Search
3
4			The RandomSampler performs pure random sampling from the parameter space.
5			It serves as a baseline and is surprisingly effective for many problems,
6			especially when the parameter space is high-dimensional or when you have
7			limited computational budget.
8
9			Characteristics:
10			- No learning from previous trials
11			- Uniform sampling from parameter distributions
12			- Excellent baseline for comparison
13			- Works well in high-dimensional spaces
14			- Embarrassingly parallel
15			- Good when objective function is noisy
16			"""
17
18			import numpy as np
19			from sklearn.datasets import load_digits
20			from sklearn.svm import SVC
21			from sklearn.model_selection import cross_val_score
22
23			from hyperactive.experiment.integrations import SklearnCvExperiment
24			from hyperactive.opt.optuna import RandomSampler
25
26
27			def objective_function_analysis():
28			"""Demonstrate when random sampling is effective."""
29			# When Random Sampling Works Well:
30			# 1. High-dimensional parameter spaces (curse of dimensionality)
31			# 2. Noisy objective functions
32			# 3. Limited computational budget
33			# 4. As a baseline for comparison
34			# 5. When parallel evaluation is important
35			# 6. Uniform exploration is desired
36
37
38			def main():
39			# === RandomSampler Example ===
40			# Pure Random Search - Uniform Parameter Space Exploration
41
42			objective_function_analysis()
43
44			# Load dataset - using digits for a more challenging problem
45			X, y = load_digits(return_X_y=True)
46			print(f"Dataset: Handwritten digits ({X.shape[0]} samples, {X.shape[1]} features)")
47
48			# Create experiment
49			estimator = SVC(random_state=42)
50			experiment = SklearnCvExperiment(estimator=estimator, X=X, y=y, cv=3)
51
52			# Define search space - SVM hyperparameters
53			param_space = {
54			"C": (0.001, 1000), # Regularization - log scale would be better
55			"gamma": (1e-6, 1e2), # RBF kernel parameter
56			"kernel": ["rbf", "poly", "sigmoid"], # Kernel type
57			"degree": (2, 5), # Polynomial degree (only for poly kernel)
58			"coef0": (0.0, 10.0), # Independent term (poly/sigmoid)
59			}
60
61			# Search Space:
62			# for param, space in param_space.items():
63			# print(f" {param}: {space}")
64
65			# Configure RandomSampler
66			optimizer = RandomSampler(
67			param_space=param_space,
68			n_trials=30, # More trials to show random behavior
69			random_state=42, # For reproducible random sampling
70			experiment=experiment,
71			)
72
73			# RandomSampler Configuration:
74			# n_trials: configured above
75			# random_state: set for reproducibility
76			# No learning parameters - pure random sampling
77
78			# Run optimization
79			# Running random search...
80			best_params = optimizer.solve()
81
82			# Results
83			print("\n=== Results ===")
84			print(f"Best parameters: {best_params}")
85			print(f"Best score: {optimizer.best_score_:.4f}")
86			print()
87
88			# Analysis of Random Sampling behavior:
89			# Each trial is independent - no learning from history
90			# Uniform coverage of parameter space
91			# No convergence issues or local optima concerns
92			# Embarrassingly parallel - can run trials simultaneously
93			# Works equally well for continuous, discrete, and categorical parameters
94
95			# Comparison with Other Methods:
96			# vs Grid Search:
97			# + Better coverage in high dimensions
98			# + More efficient for continuous parameters
99			# - No systematic coverage guarantee
100			#
101			# vs Bayesian Optimization (TPE, GP):
102			# + No assumptions about objective function smoothness
103			# + Works well with noisy objectives
104			# + No risk of model misspecification
105			# - No exploitation of promising regions
106			# - May waste trials on clearly bad regions
107
108			# Practical Usage:
109			# Use as baseline to validate more sophisticated methods
110			# Good first choice when objective is very noisy
111			# Ideal for parallel optimization setups
112			# Consider for high-dimensional problems (>10 parameters)
113			# Use with log-uniform distributions for scale-sensitive parameters
114
115			return best_params, optimizer.best_score_
116
117
118			if __name__ == "__main__":
119			best_params, best_score = main()
120

SimonBlanke / Hyperactive

random_sampler_example A last analyzed 2025-08-17 14:42 UTC

Complexity

Size/Duplication

Importance

2 Functions

Duplication Side-by-Side

Filter issues like

random_sampler_example A
last analyzed 2025-08-17 14:42 UTC