hyperactive.hyperactive.Hyperactive.search_data()   A
last analyzed

Complexity

Conditions 2

Size

Total Lines 35
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 9
dl 0
loc 35
rs 9.95
c 0
b 0
f 0
cc 2
nop 3
1
"""Main Hyperactive module providing the primary optimization interface.
2
3
This module contains the Hyperactive class, which is the main entry point
4
for hyperparameter optimization. It provides methods to add optimization
5
searches, run them, and retrieve results.
6
7
Author: Simon Blanke
8
Email: [email protected]
9
License: MIT License
10
"""
11
12
import copy
13
import multiprocessing as mp
14
from typing import Union
15
16
import pandas as pd
17
18
from .optimizers import RandomSearchOptimizer
19
from .print_results import PrintResults
20
from .results import Results
21
from .run_search import run_search
22
from .search_space import SearchSpace
23
24
25
class Hyperactive:
26
    """
27
    Initialize the Hyperactive class to manage optimization processes.
28
29
    Parameters
30
    ----------
31
    - verbosity: List of verbosity levels
32
        (default: ["progress_bar", "print_results", "print_times"])
33
    - distribution: String indicating the distribution method
34
        (default: "multiprocessing")
35
    - n_processes: Number of processes to run in parallel or "auto"
36
        to determine automatically (default: "auto")
37
38
    Methods
39
    -------
40
    - add_search: Add a new optimization search process with specified parameters
41
    - run: Execute the optimization searches
42
    - best_para: Get the best parameters for a specific search
43
    - best_score: Get the best score for a specific search
44
    - search_data: Get the search data for a specific search
45
    """
46
47
    def __init__(
48
        self,
49
        verbosity: list = ["progress_bar", "print_results", "print_times"],
50
        distribution: str = "multiprocessing",
51
        n_processes: Union[str, int] = "auto",
52
    ):
53
        super().__init__()
54
        if verbosity is False:
55
            verbosity = []
56
57
        self.verbosity = verbosity
58
        self.distribution = distribution
59
        self.n_processes = n_processes
60
61
        self.opt_pros = {}
62
63
    def _create_shared_memory(self):
64
        _bundle_opt_processes = {}
65
66
        for opt_pros in self.opt_pros.values():
67
            if opt_pros.memory != "share":
68
                continue
69
            name = opt_pros.objective_function.__name__
70
71
            _bundle_opt_processes.setdefault(name, []).append(opt_pros)
72
73
        for opt_pros_l in _bundle_opt_processes.values():
74
            # Check if the lengths of the search spaces of all optimizers
75
            # in the list are the same.
76
            if len({len(opt_pros.s_space()) for opt_pros in opt_pros_l}) == 1:
77
                manager = mp.Manager()  # get new manager.dict
78
                shared_memory = manager.dict()
79
                for opt_pros in opt_pros_l:
80
                    opt_pros.memory = shared_memory
81
            else:
82
                for opt_pros in opt_pros_l:
83
                    opt_pros.memory = opt_pros_l[0].memory  # get same manager.dict
84
85
    @staticmethod
86
    def _default_opt(optimizer):
87
        if isinstance(optimizer, str):
88
            if optimizer == "default":
89
                optimizer = RandomSearchOptimizer()
90
        return copy.deepcopy(optimizer)
91
92
    @staticmethod
93
    def _default_search_id(search_id, objective_function):
94
        """Set default search ID based on objective function name if not provided."""
95
        if not search_id:
96
            search_id = objective_function.__name__
97
        return search_id
98
99
    @staticmethod
100
    def check_list(search_space):
101
        """Validate that search space values are lists."""
102
        for key in search_space.keys():
103
            search_dim = search_space[key]
104
105
            error_msg = (
106
                f"Value in '{key}' of search space dictionary must be of type list"
107
            )
108
            if not isinstance(search_dim, list):
109
                print("Warning", error_msg)
110
                # raise ValueError(error_msg)
111
112
    def add_search(
113
        self,
114
        objective_function: callable,
115
        search_space: dict[str, list],
116
        n_iter: int,
117
        search_id=None,
118
        optimizer: Union[str, type[RandomSearchOptimizer]] = "default",
119
        n_jobs: int = 1,
120
        initialize: dict[str, int] = {"grid": 4, "random": 2, "vertices": 4},
121
        constraints: list[callable] = None,
122
        pass_through: dict = None,
123
        callbacks: dict[str, callable] = None,
124
        catch: dict = None,
125
        max_score: float = None,
126
        early_stopping: dict = None,
127
        random_state: int = None,
128
        memory: Union[str, bool] = "share",
129
        memory_warm_start: pd.DataFrame = None,
130
    ):
131
        """
132
        Add a new optimization search process with specified parameters.
133
134
        Parameters
135
        ----------
136
        - objective_function: The objective function to optimize.
137
        - search_space: Dictionary defining the search space for optimization.
138
        - n_iter: Number of iterations for the optimization process.
139
        - search_id: Identifier for the search process (default: None).
140
        - optimizer: The optimizer to use for the search process (default: "default").
141
        - n_jobs: Number of parallel jobs to run (default: 1).
142
        - initialize: Dictionary specifying initialization parameters
143
            (default: {"grid": 4, "random": 2, "vertices": 4}).
144
        - constraints: List of constraint functions (default: None).
145
        - pass_through: Dictionary of additional parameters to pass through
146
            (default: None).
147
        - callbacks: Dictionary of callback functions (default: None).
148
        - catch: Dictionary of exceptions to catch during optimization (default: None).
149
        - max_score: Maximum score to achieve (default: None).
150
        - early_stopping: Dictionary specifying early stopping criteria (default: None).
151
        - random_state: Seed for random number generation (default: None).
152
        - memory: Option to share memory between processes (default: "share").
153
        - memory_warm_start: DataFrame containing warm start memory (default: None).
154
        """
155
        self.check_list(search_space)
156
157
        constraints = constraints or []
158
        pass_through = pass_through or {}
159
        callbacks = callbacks or {}
160
        catch = catch or {}
161
        early_stopping = early_stopping or {}
162
163
        optimizer = self._default_opt(optimizer)
164
        search_id = self._default_search_id(search_id, objective_function)
165
        s_space = SearchSpace(search_space)
166
167
        optimizer.setup_search(
168
            objective_function=objective_function,
169
            s_space=s_space,
170
            n_iter=n_iter,
171
            initialize=initialize,
172
            constraints=constraints,
173
            pass_through=pass_through,
174
            callbacks=callbacks,
175
            catch=catch,
176
            max_score=max_score,
177
            early_stopping=early_stopping,
178
            random_state=random_state,
179
            memory=memory,
180
            memory_warm_start=memory_warm_start,
181
            verbosity=self.verbosity,
182
        )
183
184
        n_jobs = mp.cpu_count() if n_jobs == -1 else n_jobs
185
186
        for _ in range(n_jobs):
187
            nth_process = len(self.opt_pros)
188
            self.opt_pros[nth_process] = optimizer
189
190
    def _print_info(self):
191
        print_res = PrintResults(self.opt_pros, self.verbosity)
192
193
        if self.verbosity:
194
            for _ in range(len(self.opt_pros)):
195
                print("")
196
197
        for results in self.results_list:
198
            nth_process = results["nth_process"]
199
            print_res.print_process(results, nth_process)
200
201
    def run(self, max_time: float = None):
202
        """
203
        Run the optimization process with an optional maximum time limit.
204
205
        Args:
206
            max_time (float, optional): Maximum time limit for the optimization
207
                process. Defaults to None.
208
        """
209
        self._create_shared_memory()
210
211
        for opt in self.opt_pros.values():
212
            opt.max_time = max_time
213
214
        self.results_list = run_search(
215
            self.opt_pros, self.distribution, self.n_processes
216
        )
217
218
        self.results_ = Results(self.results_list, self.opt_pros)
219
220
        self._print_info()
221
222
    def best_para(self, id_):
223
        """
224
        Retrieve the best parameters for a specific ID from the results.
225
226
        Parameters
227
        ----------
228
        - id_ (int): The ID of the parameters to retrieve.
229
230
        Returns
231
        -------
232
        - Union[dict[str, Union[int, float]], None]: The best parameters for the
233
            specified ID if found, otherwise None.
234
235
        Raises
236
        ------
237
        - ValueError: If the objective function name is not recognized.
238
        """
239
        return self.results_.best_para(id_)
240
241
    def best_score(self, id_):
242
        """
243
        Return the best score for a specific ID from the results.
244
245
        Parameters
246
        ----------
247
        - id_ (int): The ID for which the best score is requested.
248
        """
249
        return self.results_.best_score(id_)
250
251
    def search_data(self, id_, times=False):
252
        """Retrieve search data for a specific ID from the results.
253
254
        Optionally exclude evaluation and iteration times if 'times' is set to False.
255
256
        Parameters
257
        ----------
258
        - id_ (int): The ID of the search data to retrieve.
259
        - times (bool, optional): Whether to exclude evaluation and iteration times.
260
            Defaults to False.
261
262
        Returns
263
        -------
264
        - pd.DataFrame: The search data for the specified ID.
265
266
        columns are
267
268
            * "score" : float - The score of the search
269
            * "n_columns" : int - The number of columns in the search space
270
            * "metadata" : dict - The metadata returned by the search
271
272
        each row is a search iteration
273
274
        index is RangeIndex
275
        """
276
        search_data_ = self.results_.search_data(id_)
277
278
        if not times:
279
            search_data_.drop(
280
                labels=["eval_times", "iter_times"],
281
                axis=1,
282
                inplace=True,
283
                errors="ignore",
284
            )
285
        return search_data_
286