Code Duplication - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 127-139 lines in 2 locations

examples/optuna/nsga_iii_sampler_example.py 1 location


    #    - Preserve solutions near each reference point


def main():
    # === NSGAIIISampler Example ===
    # Many-objective Optimization with NSGA-III

    nsga_iii_theory()

    # Load dataset
    X, y = load_breast_cancer(return_X_y=True)
    print(
        f"Dataset: Breast cancer classification ({X.shape[0]} samples, {X.shape[1]} features)"
    )

    # Create many-objective experiment
    experiment = ManyObjectiveExperiment(X, y)

    # Many-objective Problem (4 objectives):
    #   Objective 1: Maximize classification accuracy
    #   Objective 2: Minimize model complexity (tree depth)
    #   Objective 3: Minimize training time
    #   Objective 4: Maximize interpretability (smaller trees)
    #   → Complex trade-offs between multiple conflicting goals

    # Define search space
    param_space = {
        "max_depth": (1, 20),  # Tree depth
        "min_samples_split": (2, 50),  # Minimum samples to split
        "min_samples_leaf": (1, 20),  # Minimum samples per leaf
        "max_leaf_nodes": (10, 200),  # Maximum leaf nodes
        "criterion": ["gini", "entropy"],  # Split criterion
    }

    # Search Space:
    # for param, space in param_space.items():
    #   print(f"  {param}: {space}")

    # Configure NSGAIIISampler
    optimizer = NSGAIIISampler(
        param_space=param_space,
        n_trials=60,  # More trials needed for many objectives
        random_state=42,
        experiment=experiment,
        population_size=24,  # Larger population for many objectives
        mutation_prob=0.1,  # Mutation probability
        crossover_prob=0.9,  # Crossover probability
    )

    # NSGAIIISampler Configuration:
    # n_trials: configured above
    # population_size: larger for many objectives
    # mutation_prob: mutation probability
    # crossover_prob: crossover probability
    # Selection: Reference point-based diversity preservation

    # Note: NSGA-III is designed for 3+ objectives.
    # For 2 objectives, NSGA-II is typically preferred.
    # This example demonstrates the interface for many-objective problems.

    # Run optimization
    # Running NSGA-III many-objective optimization...

    try:
        best_params = optimizer.solve()

        # Results
        print("\n=== Results ===")
        print(f"Best parameters: {best_params}")
        print(f"Best score: {optimizer.best_score_:.4f}")
        print()

        # NSGA-III produces a diverse set of solutions across 4D Pareto front:
        #  High accuracy, complex, slower models
        #  Balanced accuracy/complexity trade-offs
        #  Fast, simple, interpretable models
        #  Various combinations optimizing different objectives

    except Exception as e:
        print(f"Many-objective optimization example: {e}")
        print("Note: This demonstrates the interface for many-objective problems.")
        return None, None

    # NSGA-III vs NSGA-II for Many Objectives:
    #
    # NSGA-II Limitations (3+ objectives):
    #  Most solutions become non-dominated
    #  Crowding distance loses effectiveness
    #  Selection pressure decreases
    #  Uneven distribution in objective space

    # NSGA-III Advantages:
    #  Reference points guide search directions
    #  Maintains diversity across all objectives
    #  Better selection pressure in many objectives
    #  Structured exploration of objective space
    #  Adaptive normalization handles different scales

    # Reference Point Mechanism:
    #  Systematic placement on normalized hyperplane
    #  Each point represents a different objective priority
    #  Solutions associated with nearest reference points
    #  Selection maintains balance across all points
    #  Prevents clustering in limited objective regions

    # Many-objective Problem Characteristics:
    #
    # Challenges:
    #  Exponential growth of non-dominated solutions
    #  Difficulty visualizing high-dimensional trade-offs
    #  User preference articulation becomes complex
    #  Increased computational requirements

    # Best Use Cases:
    #  Engineering design with multiple constraints
    #  Multi-criteria decision making (3+ criteria)
    #  Resource allocation problems
    #  System optimization with conflicting requirements
    #  When objective interactions are complex
    #
    # Algorithm Selection Guide:
    #
    # Use NSGA-III when:
    #    3 or more objectives
    #    Objectives are truly conflicting
    #    Comprehensive trade-off analysis needed
    #    Reference point guidance is beneficial
    #
    # Use NSGA-II when:
    #    2 objectives
    #    Simpler Pareto front structure
    #    Established performance for bi-objective problems
    #
    # Use single-objective methods when:
    #    Can formulate as weighted combination
    #    Clear primary objective with constraints
    #    Computational efficiency is critical

    if "best_params" in locals():
        return best_params, optimizer.best_score_
    else:
        return None, None


if __name__ == "__main__":

examples/optuna/nsga_ii_sampler_example.py 1 location


    #    - User chooses final solution based on preferences


def main():
    # === NSGAIISampler Example ===
    # Multi-objective Optimization with NSGA-II

    nsga_ii_theory()

    # Load dataset
    X, y = load_digits(return_X_y=True)
    print(f"Dataset: Handwritten digits ({X.shape[0]} samples, {X.shape[1]} features)")

    # Create multi-objective experiment
    experiment = MultiObjectiveExperiment(X, y)

    # Multi-objective Problem:
    #   Objective 1: Maximize classification accuracy
    #   Objective 2: Minimize model complexity
    #   → Trade-off between performance and simplicity

    # Define search space
    param_space = {
        "n_estimators": (10, 200),  # Number of trees
        "max_depth": (1, 20),  # Tree depth (complexity)
        "min_samples_split": (2, 20),  # Minimum samples to split
        "min_samples_leaf": (1, 10),  # Minimum samples per leaf
        "max_features": ["sqrt", "log2", None],  # Feature sampling
    }

    # Search Space:
    # for param, space in param_space.items():
    #   print(f"  {param}: {space}")

    # Configure NSGAIISampler
    optimizer = NSGAIISampler(
        param_space=param_space,
        n_trials=50,  # Population evolves over multiple generations
        random_state=42,
        experiment=experiment,
        population_size=20,  # Population size for genetic algorithm
        mutation_prob=0.1,  # Mutation probability
        crossover_prob=0.9,  # Crossover probability
    )

    # NSGAIISampler Configuration:
    # n_trials: configured above
    # population_size: for genetic algorithm
    # mutation_prob: mutation probability
    # crossover_prob: crossover probability
    # Selection: Non-dominated sorting + crowding distance

    # Note: This example demonstrates the interface.
    # In practice, NSGA-II returns multiple Pareto-optimal solutions.
    # For single-objective problems, consider TPE or GP samplers instead.

    # Run optimization
    # Running NSGA-II multi-objective optimization...

    try:
        best_params = optimizer.solve()

        # Results
        print("\n=== Results ===")
        print(f"Best parameters: {best_params}")
        print(f"Best score: {optimizer.best_score_:.4f}")
        print()

        # NSGA-II typically returns multiple solutions along Pareto front:
        #  High accuracy, high complexity models
        #  Medium accuracy, medium complexity models
        #  Lower accuracy, low complexity models
        #  User selects based on preferences/constraints

    except Exception as e:
        print(f"Multi-objective optimization example: {e}")
        print("Note: This demonstrates the interface for multi-objective problems.")
        return None, None

    # NSGA-II Evolution Process:
    #
    # Generation 1: Random initialization
    #  Diverse population across parameter space
    #  Wide range of accuracy/complexity trade-offs

    # Generations 2-N: Evolutionary improvement
    #  Non-dominated sorting identifies best fronts
    #  Crowding distance maintains solution diversity
    #  Crossover combines good solutions
    #  Mutation explores new parameter regions

    # Final Population: Pareto front approximation
    #  Multiple non-dominated solutions
    #  Represents optimal trade-offs
    #  User chooses based on domain requirements

    # Key Advantages:
    #  Handles multiple conflicting objectives naturally
    #  Finds diverse set of optimal trade-offs
    #  No need to specify objective weights a priori
    #  Provides insight into objective relationships
    #  Robust to objective scaling differences

    # Best Use Cases:
    #  True multi-objective problems (accuracy vs speed, cost vs quality)
    #  When trade-offs between objectives are important
    #  Robustness analysis with multiple criteria
    #  When single objective formulation is unclear

    # Limitations:
    #  More complex than single-objective methods
    #  Requires more evaluations (population-based)
    #  May be overkill for single-objective problems
    #  Final solution selection still required

    # When to Use NSGA-II vs Single-objective Methods:
    # Use NSGA-II when:
    #    Multiple objectives genuinely conflict
    #    Trade-off analysis is valuable
    #    Objective weights are unknown
    #
    # Use TPE/GP when:
    #    Single clear objective
    #    Computational budget is limited
    #    Faster convergence needed

    if "best_params" in locals():
        return best_params, optimizer.best_score_
    else:
        return None, None


if __name__ == "__main__":

		@@ 96-234 (lines=139) @@
93		# - Preserve solutions near each reference point
94
95
96		def main():
97		# === NSGAIIISampler Example ===
98		# Many-objective Optimization with NSGA-III
99
100		nsga_iii_theory()
101
102		# Load dataset
103		X, y = load_breast_cancer(return_X_y=True)
104		print(
105		f"Dataset: Breast cancer classification ({X.shape[0]} samples, {X.shape[1]} features)"
106		)
107
108		# Create many-objective experiment
109		experiment = ManyObjectiveExperiment(X, y)
110
111		# Many-objective Problem (4 objectives):
112		# Objective 1: Maximize classification accuracy
113		# Objective 2: Minimize model complexity (tree depth)
114		# Objective 3: Minimize training time
115		# Objective 4: Maximize interpretability (smaller trees)
116		# → Complex trade-offs between multiple conflicting goals
117
118		# Define search space
119		param_space = {
120		"max_depth": (1, 20), # Tree depth
121		"min_samples_split": (2, 50), # Minimum samples to split
122		"min_samples_leaf": (1, 20), # Minimum samples per leaf
123		"max_leaf_nodes": (10, 200), # Maximum leaf nodes
124		"criterion": ["gini", "entropy"], # Split criterion
125		}
126
127		# Search Space:
128		# for param, space in param_space.items():
129		# print(f" {param}: {space}")
130
131		# Configure NSGAIIISampler
132		optimizer = NSGAIIISampler(
133		param_space=param_space,
134		n_trials=60, # More trials needed for many objectives
135		random_state=42,
136		experiment=experiment,
137		population_size=24, # Larger population for many objectives
138		mutation_prob=0.1, # Mutation probability
139		crossover_prob=0.9, # Crossover probability
140		)
141
142		# NSGAIIISampler Configuration:
143		# n_trials: configured above
144		# population_size: larger for many objectives
145		# mutation_prob: mutation probability
146		# crossover_prob: crossover probability
147		# Selection: Reference point-based diversity preservation
148
149		# Note: NSGA-III is designed for 3+ objectives.
150		# For 2 objectives, NSGA-II is typically preferred.
151		# This example demonstrates the interface for many-objective problems.
152
153		# Run optimization
154		# Running NSGA-III many-objective optimization...
155
156		try:
157		best_params = optimizer.solve()
158
159		# Results
160		print("\n=== Results ===")
161		print(f"Best parameters: {best_params}")
162		print(f"Best score: {optimizer.best_score_:.4f}")
163		print()
164
165		# NSGA-III produces a diverse set of solutions across 4D Pareto front:
166		# High accuracy, complex, slower models
167		# Balanced accuracy/complexity trade-offs
168		# Fast, simple, interpretable models
169		# Various combinations optimizing different objectives
170
171		except Exception as e:
172		print(f"Many-objective optimization example: {e}")
173		print("Note: This demonstrates the interface for many-objective problems.")
174		return None, None
175
176		# NSGA-III vs NSGA-II for Many Objectives:
177		#
178		# NSGA-II Limitations (3+ objectives):
179		# Most solutions become non-dominated
180		# Crowding distance loses effectiveness
181		# Selection pressure decreases
182		# Uneven distribution in objective space
183
184		# NSGA-III Advantages:
185		# Reference points guide search directions
186		# Maintains diversity across all objectives
187		# Better selection pressure in many objectives
188		# Structured exploration of objective space
189		# Adaptive normalization handles different scales
190
191		# Reference Point Mechanism:
192		# Systematic placement on normalized hyperplane
193		# Each point represents a different objective priority
194		# Solutions associated with nearest reference points
195		# Selection maintains balance across all points
196		# Prevents clustering in limited objective regions
197
198		# Many-objective Problem Characteristics:
199		#
200		# Challenges:
201		# Exponential growth of non-dominated solutions
202		# Difficulty visualizing high-dimensional trade-offs
203		# User preference articulation becomes complex
204		# Increased computational requirements
205
206		# Best Use Cases:
207		# Engineering design with multiple constraints
208		# Multi-criteria decision making (3+ criteria)
209		# Resource allocation problems
210		# System optimization with conflicting requirements
211		# When objective interactions are complex
212		#
213		# Algorithm Selection Guide:
214		#
215		# Use NSGA-III when:
216		# 3 or more objectives
217		# Objectives are truly conflicting
218		# Comprehensive trade-off analysis needed
219		# Reference point guidance is beneficial
220		#
221		# Use NSGA-II when:
222		# 2 objectives
223		# Simpler Pareto front structure
224		# Established performance for bi-objective problems
225		#
226		# Use single-objective methods when:
227		# Can formulate as weighted combination
228		# Clear primary objective with constraints
229		# Computational efficiency is critical
230
231		if "best_params" in locals():
232		return best_params, optimizer.best_score_
233		else:
234		return None, None
235
236
237		if __name__ == "__main__":

		@@ 82-208 (lines=127) @@
79		# - User chooses final solution based on preferences
80
81
82		def main():
83		# === NSGAIISampler Example ===
84		# Multi-objective Optimization with NSGA-II
85
86		nsga_ii_theory()
87
88		# Load dataset
89		X, y = load_digits(return_X_y=True)
90		print(f"Dataset: Handwritten digits ({X.shape[0]} samples, {X.shape[1]} features)")
91
92		# Create multi-objective experiment
93		experiment = MultiObjectiveExperiment(X, y)
94
95		# Multi-objective Problem:
96		# Objective 1: Maximize classification accuracy
97		# Objective 2: Minimize model complexity
98		# → Trade-off between performance and simplicity
99
100		# Define search space
101		param_space = {
102		"n_estimators": (10, 200), # Number of trees
103		"max_depth": (1, 20), # Tree depth (complexity)
104		"min_samples_split": (2, 20), # Minimum samples to split
105		"min_samples_leaf": (1, 10), # Minimum samples per leaf
106		"max_features": ["sqrt", "log2", None], # Feature sampling
107		}
108
109		# Search Space:
110		# for param, space in param_space.items():
111		# print(f" {param}: {space}")
112
113		# Configure NSGAIISampler
114		optimizer = NSGAIISampler(
115		param_space=param_space,
116		n_trials=50, # Population evolves over multiple generations
117		random_state=42,
118		experiment=experiment,
119		population_size=20, # Population size for genetic algorithm
120		mutation_prob=0.1, # Mutation probability
121		crossover_prob=0.9, # Crossover probability
122		)
123
124		# NSGAIISampler Configuration:
125		# n_trials: configured above
126		# population_size: for genetic algorithm
127		# mutation_prob: mutation probability
128		# crossover_prob: crossover probability
129		# Selection: Non-dominated sorting + crowding distance
130
131		# Note: This example demonstrates the interface.
132		# In practice, NSGA-II returns multiple Pareto-optimal solutions.
133		# For single-objective problems, consider TPE or GP samplers instead.
134
135		# Run optimization
136		# Running NSGA-II multi-objective optimization...
137
138		try:
139		best_params = optimizer.solve()
140
141		# Results
142		print("\n=== Results ===")
143		print(f"Best parameters: {best_params}")
144		print(f"Best score: {optimizer.best_score_:.4f}")
145		print()
146
147		# NSGA-II typically returns multiple solutions along Pareto front:
148		# High accuracy, high complexity models
149		# Medium accuracy, medium complexity models
150		# Lower accuracy, low complexity models
151		# User selects based on preferences/constraints
152
153		except Exception as e:
154		print(f"Multi-objective optimization example: {e}")
155		print("Note: This demonstrates the interface for multi-objective problems.")
156		return None, None
157
158		# NSGA-II Evolution Process:
159		#
160		# Generation 1: Random initialization
161		# Diverse population across parameter space
162		# Wide range of accuracy/complexity trade-offs
163
164		# Generations 2-N: Evolutionary improvement
165		# Non-dominated sorting identifies best fronts
166		# Crowding distance maintains solution diversity
167		# Crossover combines good solutions
168		# Mutation explores new parameter regions
169
170		# Final Population: Pareto front approximation
171		# Multiple non-dominated solutions
172		# Represents optimal trade-offs
173		# User chooses based on domain requirements
174
175		# Key Advantages:
176		# Handles multiple conflicting objectives naturally
177		# Finds diverse set of optimal trade-offs
178		# No need to specify objective weights a priori
179		# Provides insight into objective relationships
180		# Robust to objective scaling differences
181
182		# Best Use Cases:
183		# True multi-objective problems (accuracy vs speed, cost vs quality)
184		# When trade-offs between objectives are important
185		# Robustness analysis with multiple criteria
186		# When single objective formulation is unclear
187
188		# Limitations:
189		# More complex than single-objective methods
190		# Requires more evaluations (population-based)
191		# May be overkill for single-objective problems
192		# Final solution selection still required
193
194		# When to Use NSGA-II vs Single-objective Methods:
195		# Use NSGA-II when:
196		# Multiple objectives genuinely conflict
197		# Trade-off analysis is valuable
198		# Objective weights are unknown
199		#
200		# Use TPE/GP when:
201		# Single clear objective
202		# Computational budget is limited
203		# Faster convergence needed
204
205		if "best_params" in locals():
206		return best_params, optimizer.best_score_
207		else:
208		return None, None
209
210
211		if __name__ == "__main__":

SimonBlanke / Hyperactive

Code Duplication Length = 127-139 lines in 2 locations

examples/optuna/nsga_iii_sampler_example.py 1 location

examples/optuna/nsga_ii_sampler_example.py 1 location