hyperactive.hyperactive.Hyperactive.search_data() - Code Metrics - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

hyperactive.hyperactive.Hyperactive.search_data() A
last analyzed 2025-08-17 14:42 UTC

↳ Parent: hyperactive.hyperactive

Complexity

Conditions

Size

Total Lines	35
Code Lines	9

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	9
dl	0
loc	35
rs	9.95
c	0
b	0
f	0
cc	2
nop	3

"""Main Hyperactive module providing the primary optimization interface.

This module contains the Hyperactive class, which is the main entry point
for hyperparameter optimization. It provides methods to add optimization
searches, run them, and retrieve results.

Author: Simon Blanke
Email: [email protected]
License: MIT License
"""

import copy
import multiprocessing as mp
from typing import Union

import pandas as pd

from .optimizers import RandomSearchOptimizer
from .print_results import PrintResults
from .results import Results
from .run_search import run_search
from .search_space import SearchSpace


class Hyperactive:
    """
    Initialize the Hyperactive class to manage optimization processes.

    Parameters
    ----------
    - verbosity: List of verbosity levels
        (default: ["progress_bar", "print_results", "print_times"])
    - distribution: String indicating the distribution method
        (default: "multiprocessing")
    - n_processes: Number of processes to run in parallel or "auto"
        to determine automatically (default: "auto")

    Methods
    -------
    - add_search: Add a new optimization search process with specified parameters
    - run: Execute the optimization searches
    - best_para: Get the best parameters for a specific search
    - best_score: Get the best score for a specific search
    - search_data: Get the search data for a specific search
    """

    def __init__(
        self,
        verbosity: list = ["progress_bar", "print_results", "print_times"],
        distribution: str = "multiprocessing",
        n_processes: Union[str, int] = "auto",
    ):
        super().__init__()
        if verbosity is False:
            verbosity = []

        self.verbosity = verbosity
        self.distribution = distribution
        self.n_processes = n_processes

        self.opt_pros = {}

    def _create_shared_memory(self):
        _bundle_opt_processes = {}

        for opt_pros in self.opt_pros.values():
            if opt_pros.memory != "share":
                continue
            name = opt_pros.objective_function.__name__

            _bundle_opt_processes.setdefault(name, []).append(opt_pros)

        for opt_pros_l in _bundle_opt_processes.values():
            # Check if the lengths of the search spaces of all optimizers
            # in the list are the same.
            if len({len(opt_pros.s_space()) for opt_pros in opt_pros_l}) == 1:
                manager = mp.Manager()  # get new manager.dict
                shared_memory = manager.dict()
                for opt_pros in opt_pros_l:
                    opt_pros.memory = shared_memory
            else:
                for opt_pros in opt_pros_l:
                    opt_pros.memory = opt_pros_l[0].memory  # get same manager.dict

    @staticmethod
    def _default_opt(optimizer):
        if isinstance(optimizer, str):
            if optimizer == "default":
                optimizer = RandomSearchOptimizer()
        return copy.deepcopy(optimizer)

    @staticmethod
    def _default_search_id(search_id, objective_function):
        """Set default search ID based on objective function name if not provided."""
        if not search_id:
            search_id = objective_function.__name__
        return search_id

    @staticmethod
    def check_list(search_space):
        """Validate that search space values are lists."""
        for key in search_space.keys():
            search_dim = search_space[key]

            error_msg = (
                f"Value in '{key}' of search space dictionary must be of type list"
            )
            if not isinstance(search_dim, list):
                print("Warning", error_msg)
                # raise ValueError(error_msg)

    def add_search(
        self,
        objective_function: callable,
        search_space: dict[str, list],
        n_iter: int,
        search_id=None,
        optimizer: Union[str, type[RandomSearchOptimizer]] = "default",
        n_jobs: int = 1,
        initialize: dict[str, int] = {"grid": 4, "random": 2, "vertices": 4},
        constraints: list[callable] = None,
        pass_through: dict = None,
        callbacks: dict[str, callable] = None,
        catch: dict = None,
        max_score: float = None,
        early_stopping: dict = None,
        random_state: int = None,
        memory: Union[str, bool] = "share",
        memory_warm_start: pd.DataFrame = None,
    ):
        """
        Add a new optimization search process with specified parameters.

        Parameters
        ----------
        - objective_function: The objective function to optimize.
        - search_space: Dictionary defining the search space for optimization.
        - n_iter: Number of iterations for the optimization process.
        - search_id: Identifier for the search process (default: None).
        - optimizer: The optimizer to use for the search process (default: "default").
        - n_jobs: Number of parallel jobs to run (default: 1).
        - initialize: Dictionary specifying initialization parameters
            (default: {"grid": 4, "random": 2, "vertices": 4}).
        - constraints: List of constraint functions (default: None).
        - pass_through: Dictionary of additional parameters to pass through
            (default: None).
        - callbacks: Dictionary of callback functions (default: None).
        - catch: Dictionary of exceptions to catch during optimization (default: None).
        - max_score: Maximum score to achieve (default: None).
        - early_stopping: Dictionary specifying early stopping criteria (default: None).
        - random_state: Seed for random number generation (default: None).
        - memory: Option to share memory between processes (default: "share").
        - memory_warm_start: DataFrame containing warm start memory (default: None).
        """
        self.check_list(search_space)

        constraints = constraints or []
        pass_through = pass_through or {}
        callbacks = callbacks or {}
        catch = catch or {}
        early_stopping = early_stopping or {}

        optimizer = self._default_opt(optimizer)
        search_id = self._default_search_id(search_id, objective_function)
        s_space = SearchSpace(search_space)

        optimizer.setup_search(
            objective_function=objective_function,
            s_space=s_space,
            n_iter=n_iter,
            initialize=initialize,
            constraints=constraints,
            pass_through=pass_through,
            callbacks=callbacks,
            catch=catch,
            max_score=max_score,
            early_stopping=early_stopping,
            random_state=random_state,
            memory=memory,
            memory_warm_start=memory_warm_start,
            verbosity=self.verbosity,
        )

        n_jobs = mp.cpu_count() if n_jobs == -1 else n_jobs

        for _ in range(n_jobs):
            nth_process = len(self.opt_pros)
            self.opt_pros[nth_process] = optimizer

    def _print_info(self):
        print_res = PrintResults(self.opt_pros, self.verbosity)

        if self.verbosity:
            for _ in range(len(self.opt_pros)):
                print("")

        for results in self.results_list:
            nth_process = results["nth_process"]
            print_res.print_process(results, nth_process)

    def run(self, max_time: float = None):
        """
        Run the optimization process with an optional maximum time limit.

        Args:
            max_time (float, optional): Maximum time limit for the optimization
                process. Defaults to None.
        """
        self._create_shared_memory()

        for opt in self.opt_pros.values():
            opt.max_time = max_time

        self.results_list = run_search(
            self.opt_pros, self.distribution, self.n_processes
        )

        self.results_ = Results(self.results_list, self.opt_pros)

        self._print_info()

    def best_para(self, id_):
        """
        Retrieve the best parameters for a specific ID from the results.

        Parameters
        ----------
        - id_ (int): The ID of the parameters to retrieve.

        Returns
        -------
        - Union[dict[str, Union[int, float]], None]: The best parameters for the
            specified ID if found, otherwise None.

        Raises
        ------
        - ValueError: If the objective function name is not recognized.
        """
        return self.results_.best_para(id_)

    def best_score(self, id_):
        """
        Return the best score for a specific ID from the results.

        Parameters
        ----------
        - id_ (int): The ID for which the best score is requested.
        """
        return self.results_.best_score(id_)

    def search_data(self, id_, times=False):
        """Retrieve search data for a specific ID from the results.

        Optionally exclude evaluation and iteration times if 'times' is set to False.

        Parameters
        ----------
        - id_ (int): The ID of the search data to retrieve.
        - times (bool, optional): Whether to exclude evaluation and iteration times.
            Defaults to False.

        Returns
        -------
        - pd.DataFrame: The search data for the specified ID.

        columns are

            * "score" : float - The score of the search
            * "n_columns" : int - The number of columns in the search space
            * "metadata" : dict - The metadata returned by the search

        each row is a search iteration

        index is RangeIndex
        """
        search_data_ = self.results_.search_data(id_)

        if not times:
            search_data_.drop(
                labels=["eval_times", "iter_times"],
                axis=1,
                inplace=True,
                errors="ignore",
            )
        return search_data_


1			"""Main Hyperactive module providing the primary optimization interface.
2
3			This module contains the Hyperactive class, which is the main entry point
4			for hyperparameter optimization. It provides methods to add optimization
5			searches, run them, and retrieve results.
6
7			Author: Simon Blanke
8			Email: [email protected]
9			License: MIT License
10			"""
11
12			import copy
13			import multiprocessing as mp
14			from typing import Union
15
16			import pandas as pd
17
18			from .optimizers import RandomSearchOptimizer
19			from .print_results import PrintResults
20			from .results import Results
21			from .run_search import run_search
22			from .search_space import SearchSpace
23
24
25			class Hyperactive:
26			"""
27			Initialize the Hyperactive class to manage optimization processes.
28
29			Parameters
30			----------
31			- verbosity: List of verbosity levels
32			(default: ["progress_bar", "print_results", "print_times"])
33			- distribution: String indicating the distribution method
34			(default: "multiprocessing")
35			- n_processes: Number of processes to run in parallel or "auto"
36			to determine automatically (default: "auto")
37
38			Methods
39			-------
40			- add_search: Add a new optimization search process with specified parameters
41			- run: Execute the optimization searches
42			- best_para: Get the best parameters for a specific search
43			- best_score: Get the best score for a specific search
44			- search_data: Get the search data for a specific search
45			"""
46
47			def __init__(
48			self,
49			verbosity: list = ["progress_bar", "print_results", "print_times"],
50			distribution: str = "multiprocessing",
51			n_processes: Union[str, int] = "auto",
52			):
53			super().__init__()
54			if verbosity is False:
55			verbosity = []
56
57			self.verbosity = verbosity
58			self.distribution = distribution
59			self.n_processes = n_processes
60
61			self.opt_pros = {}
62
63			def _create_shared_memory(self):
64			_bundle_opt_processes = {}
65
66			for opt_pros in self.opt_pros.values():
67			if opt_pros.memory != "share":
68			continue
69			name = opt_pros.objective_function.__name__
70
71			_bundle_opt_processes.setdefault(name, []).append(opt_pros)
72
73			for opt_pros_l in _bundle_opt_processes.values():
74			# Check if the lengths of the search spaces of all optimizers
75			# in the list are the same.
76			if len({len(opt_pros.s_space()) for opt_pros in opt_pros_l}) == 1:
77			manager = mp.Manager() # get new manager.dict
78			shared_memory = manager.dict()
79			for opt_pros in opt_pros_l:
80			opt_pros.memory = shared_memory
81			else:
82			for opt_pros in opt_pros_l:
83			opt_pros.memory = opt_pros_l[0].memory # get same manager.dict
84
85			@staticmethod
86			def _default_opt(optimizer):
87			if isinstance(optimizer, str):
88			if optimizer == "default":
89			optimizer = RandomSearchOptimizer()
90			return copy.deepcopy(optimizer)
91
92			@staticmethod
93			def _default_search_id(search_id, objective_function):
94			"""Set default search ID based on objective function name if not provided."""
95			if not search_id:
96			search_id = objective_function.__name__
97			return search_id
98
99			@staticmethod
100			def check_list(search_space):
101			"""Validate that search space values are lists."""
102			for key in search_space.keys():
103			search_dim = search_space[key]
104
105			error_msg = (
106			f"Value in '{key}' of search space dictionary must be of type list"
107			)
108			if not isinstance(search_dim, list):
109			print("Warning", error_msg)
110			# raise ValueError(error_msg)
111
112			def add_search(
113			self,
114			objective_function: callable,
115			search_space: dict[str, list],
116			n_iter: int,
117			search_id=None,
118			optimizer: Union[str, type[RandomSearchOptimizer]] = "default",
119			n_jobs: int = 1,
120			initialize: dict[str, int] = {"grid": 4, "random": 2, "vertices": 4},
121			constraints: list[callable] = None,
122			pass_through: dict = None,
123			callbacks: dict[str, callable] = None,
124			catch: dict = None,
125			max_score: float = None,
126			early_stopping: dict = None,
127			random_state: int = None,
128			memory: Union[str, bool] = "share",
129			memory_warm_start: pd.DataFrame = None,
130			):
131			"""
132			Add a new optimization search process with specified parameters.
133
134			Parameters
135			----------
136			- objective_function: The objective function to optimize.
137			- search_space: Dictionary defining the search space for optimization.
138			- n_iter: Number of iterations for the optimization process.
139			- search_id: Identifier for the search process (default: None).
140			- optimizer: The optimizer to use for the search process (default: "default").
141			- n_jobs: Number of parallel jobs to run (default: 1).
142			- initialize: Dictionary specifying initialization parameters
143			(default: {"grid": 4, "random": 2, "vertices": 4}).
144			- constraints: List of constraint functions (default: None).
145			- pass_through: Dictionary of additional parameters to pass through
146			(default: None).
147			- callbacks: Dictionary of callback functions (default: None).
148			- catch: Dictionary of exceptions to catch during optimization (default: None).
149			- max_score: Maximum score to achieve (default: None).
150			- early_stopping: Dictionary specifying early stopping criteria (default: None).
151			- random_state: Seed for random number generation (default: None).
152			- memory: Option to share memory between processes (default: "share").
153			- memory_warm_start: DataFrame containing warm start memory (default: None).
154			"""
155			self.check_list(search_space)
156
157			constraints = constraints or []
158			pass_through = pass_through or {}
159			callbacks = callbacks or {}
160			catch = catch or {}
161			early_stopping = early_stopping or {}
162
163			optimizer = self._default_opt(optimizer)
164			search_id = self._default_search_id(search_id, objective_function)
165			s_space = SearchSpace(search_space)
166
167			optimizer.setup_search(
168			objective_function=objective_function,
169			s_space=s_space,
170			n_iter=n_iter,
171			initialize=initialize,
172			constraints=constraints,
173			pass_through=pass_through,
174			callbacks=callbacks,
175			catch=catch,
176			max_score=max_score,
177			early_stopping=early_stopping,
178			random_state=random_state,
179			memory=memory,
180			memory_warm_start=memory_warm_start,
181			verbosity=self.verbosity,
182			)
183
184			n_jobs = mp.cpu_count() if n_jobs == -1 else n_jobs
185
186			for _ in range(n_jobs):
187			nth_process = len(self.opt_pros)
188			self.opt_pros[nth_process] = optimizer
189
190			def _print_info(self):
191			print_res = PrintResults(self.opt_pros, self.verbosity)
192
193			if self.verbosity:
194			for _ in range(len(self.opt_pros)):
195			print("")
196
197			for results in self.results_list:
198			nth_process = results["nth_process"]
199			print_res.print_process(results, nth_process)
200
201			def run(self, max_time: float = None):
202			"""
203			Run the optimization process with an optional maximum time limit.
204
205			Args:
206			max_time (float, optional): Maximum time limit for the optimization
207			process. Defaults to None.
208			"""
209			self._create_shared_memory()
210
211			for opt in self.opt_pros.values():
212			opt.max_time = max_time
213
214			self.results_list = run_search(
215			self.opt_pros, self.distribution, self.n_processes
216			)
217
218			self.results_ = Results(self.results_list, self.opt_pros)
219
220			self._print_info()
221
222			def best_para(self, id_):
223			"""
224			Retrieve the best parameters for a specific ID from the results.
225
226			Parameters
227			----------
228			- id_ (int): The ID of the parameters to retrieve.
229
230			Returns
231			-------
232			- Union[dict[str, Union[int, float]], None]: The best parameters for the
233			specified ID if found, otherwise None.
234
235			Raises
236			------
237			- ValueError: If the objective function name is not recognized.
238			"""
239			return self.results_.best_para(id_)
240
241			def best_score(self, id_):
242			"""
243			Return the best score for a specific ID from the results.
244
245			Parameters
246			----------
247			- id_ (int): The ID for which the best score is requested.
248			"""
249			return self.results_.best_score(id_)
250
251			def search_data(self, id_, times=False):
252			"""Retrieve search data for a specific ID from the results.
253
254			Optionally exclude evaluation and iteration times if 'times' is set to False.
255
256			Parameters
257			----------
258			- id_ (int): The ID of the search data to retrieve.
259			- times (bool, optional): Whether to exclude evaluation and iteration times.
260			Defaults to False.
261
262			Returns
263			-------
264			- pd.DataFrame: The search data for the specified ID.
265
266			columns are
267
268			* "score" : float - The score of the search
269			* "n_columns" : int - The number of columns in the search space
270			* "metadata" : dict - The metadata returned by the search
271
272			each row is a search iteration
273
274			index is RangeIndex
275			"""
276			search_data_ = self.results_.search_data(id_)
277
278			if not times:
279			search_data_.drop(
280			labels=["eval_times", "iter_times"],
281			axis=1,
282			inplace=True,
283			errors="ignore",
284			)
285			return search_data_
286

SimonBlanke / Hyperactive

hyperactive.hyperactive.Hyperactive.search_data() A last analyzed 2025-08-17 14:42 UTC

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like

hyperactive.hyperactive.Hyperactive.search_data() A
last analyzed 2025-08-17 14:42 UTC