Passed
Pull Request — master (#101)
by Simon
01:33
created

BaseOptimizer.add_search()   B

Complexity

Conditions 3

Size

Total Lines 73
Code Lines 44

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 44
nop 15
dl 0
loc 73
rs 8.824
c 0
b 0
f 0

How to fix   Long Method    Many Parameters   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
"""Base class for optimizer."""
2
3
from typing import Union, List, Dict
4
import multiprocessing as mp
5
import pandas as pd
6
7
from .backend_stuff.search_space import SearchSpace
8
from .search import Search
9
10
11
from ..composite_optimizer import CompositeOptimizer
12
13
from skbase.base import BaseObject
14
15
16
class BaseOptimizer(BaseObject):
17
    """Base class for optimizer."""
18
19
    n_search: int
20
    searches: list
21
    opt_pros: dict
22
23
    def __init__(self, optimizer_class, opt_params):
24
        super().__init__()
25
26
        self.optimizer_class = optimizer_class
27
        self.opt_params = opt_params
28
29
        self.n_search = 0
30
        self.searches = []
31
32
    @staticmethod
33
    def _default_search_id(search_id, objective_function):
34
        if not search_id:
35
            search_id = objective_function.__name__
36
        return search_id
37
38
    @staticmethod
39
    def check_list(search_space):
40
        for key in search_space.keys():
41
            search_dim = search_space[key]
42
43
            error_msg = "Value in '{}' of search space dictionary must be of type list".format(
44
                key
45
            )
46
            if not isinstance(search_dim, list):
47
                print("Warning", error_msg)
48
                # raise ValueError(error_msg)
49
50
    def add_search(
51
        self,
52
        experiment: callable,
53
        search_space: Dict[str, list],
54
        n_iter: int,
55
        search_id=None,
56
        n_jobs: int = 1,
57
        verbosity: list = ["progress_bar", "print_results", "print_times"],
58
        initialize: Dict[str, int] = {"grid": 4, "random": 2, "vertices": 4},
59
        constraints: List[callable] = None,
60
        pass_through: Dict = None,
61
        max_score: float = None,
62
        early_stopping: Dict = None,
63
        random_state: int = None,
64
        memory: Union[str, bool] = "share",
65
        memory_warm_start: pd.DataFrame = None,
66
    ):
67
        """
68
        Add a new optimization search process with specified parameters.
69
70
        Parameters:
71
        - experiment: Experiment class containing the objective-function to optimize.
72
        - search_space: Dictionary defining the search space for optimization.
73
        - n_iter: Number of iterations for the optimization process.
74
        - search_id: Identifier for the search process (default: None).
75
        - n_jobs: Number of parallel jobs to run (default: 1).
76
        - initialize: Dictionary specifying initialization parameters (default: {"grid": 4, "random": 2, "vertices": 4}).
77
        - constraints: List of constraint functions (default: None).
78
        - pass_through: Dictionary of additional parameters to pass through (default: None).
79
        - callbacks: Dictionary of callback functions (default: None).
80
        - catch: Dictionary of exceptions to catch during optimization (default: None).
81
        - max_score: Maximum score to achieve (default: None).
82
        - early_stopping: Dictionary specifying early stopping criteria (default: None).
83
        - random_state: Seed for random number generation (default: None).
84
        - memory: Option to share memory between processes (default: "share").
85
        - memory_warm_start: DataFrame containing warm start memory (default: None).
86
        """
87
88
        self.n_search += 1
89
90
        self.check_list(search_space)
91
92
        constraints = constraints or []
93
        pass_through = pass_through or {}
94
        early_stopping = early_stopping or {}
95
96
        search_id = self._default_search_id(
97
            search_id, experiment.objective_function
98
        )
99
        s_space = SearchSpace(search_space)
100
        self.verbosity = verbosity
101
102
        n_jobs = mp.cpu_count() if n_jobs == -1 else n_jobs
103
104
        for _ in range(n_jobs):
105
            search = Search(self.optimizer_class, self.opt_params)
106
            search.setup(
107
                experiment=experiment,
108
                s_space=s_space,
109
                n_iter=n_iter,
110
                initialize=initialize,
111
                constraints=constraints,
112
                pass_through=pass_through,
113
                callbacks=experiment.callbacks,
114
                catch=experiment.catch,
115
                max_score=max_score,
116
                early_stopping=early_stopping,
117
                random_state=random_state,
118
                memory=memory,
119
                memory_warm_start=memory_warm_start,
120
                verbosity=verbosity,
121
            )
122
            self.searches.append(search)
123
124
    @property
125
    def nth_search(self):
126
        return len(self.composite_opt.optimizers)
127
128
    def __add__(self, optimizer_instance):
129
        return CompositeOptimizer(self, optimizer_instance)
130
131
    def run(
132
        self,
133
        max_time=None,
134
        distribution: str = "multiprocessing",
135
        n_processes: Union[str, int] = "auto",
136
    ):
137
        self.comp_opt = CompositeOptimizer(self)
138
        self.comp_opt.run(max_time, distribution, n_processes, self.verbosity)
139
140
    def best_para(self, id_):
141
        """
142
        Retrieve the best parameters for a specific ID from the results.
143
144
        Parameters:
145
        - id_ (int): The ID of the parameters to retrieve.
146
147
        Returns:
148
        - Union[Dict[str, Union[int, float]], None]: The best parameters for the specified ID if found, otherwise None.
149
150
        Raises:
151
        - ValueError: If the objective function name is not recognized.
152
        """
153
154
        return self.comp_opt.results_.best_para(id_)
155
156
    def best_score(self, id_):
157
        """
158
        Return the best score for a specific ID from the results.
159
160
        Parameters:
161
        - id_ (int): The ID for which the best score is requested.
162
        """
163
164
        return self.comp_opt.results_.best_score(id_)
165
166
    def search_data(self, id_, times=False):
167
        """
168
        Retrieve search data for a specific ID from the results. Optionally exclude evaluation and iteration times if 'times' is set to False.
169
170
        Parameters:
171
        - id_ (int): The ID of the search data to retrieve.
172
        - times (bool, optional): Whether to exclude evaluation and iteration times. Defaults to False.
173
174
        Returns:
175
        - pd.DataFrame: The search data for the specified ID.
176
        """
177
178
        search_data_ = self.comp_opt.results_.search_data(
179
            id_.objective_function
180
        )
181
182
        if times == False:
183
            search_data_.drop(
184
                labels=["eval_times", "iter_times"],
185
                axis=1,
186
                inplace=True,
187
                errors="ignore",
188
            )
189
        return search_data_
190