Passed
Pull Request — master (#101)
by Simon
06:01
created

BaseOptimizer.add_search()   B

Complexity

Conditions 3

Size

Total Lines 71
Code Lines 42

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 42
nop 15
dl 0
loc 71
rs 8.872
c 0
b 0
f 0

How to fix   Long Method    Many Parameters   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
"""Base class for optimizer."""
2
3
from typing import Union, List, Dict
4
import multiprocessing as mp
5
import pandas as pd
6
7
from .backend_stuff.search_space import SearchSpace
8
from .search import Search
9
10
11
from ..composite_optimizer import CompositeOptimizer
12
13
from skbase.base import BaseObject
14
15
16
class BaseOptimizer(BaseObject):
17
    """Base class for optimizer."""
18
19
    n_search: int
20
    searches: list
21
    opt_pros: dict
22
23
    def __init__(self, optimizer_class, opt_params):
24
        super().__init__()
25
26
        self.optimizer_class = optimizer_class
27
        self.opt_params = opt_params
28
29
        self.n_search = 0
30
        self.searches = []
31
32
    @staticmethod
33
    def _default_search_id(search_id, objective_function):
34
        if not search_id:
35
            search_id = objective_function.__name__
36
        return search_id
37
38
    @staticmethod
39
    def check_list(search_space):
40
        for key in search_space.keys():
41
            search_dim = search_space[key]
42
43
            error_msg = "Value in '{}' of search space dictionary must be of type list".format(
44
                key
45
            )
46
            if not isinstance(search_dim, list):
47
                print("Warning", error_msg)
48
                # raise ValueError(error_msg)
49
50
    def add_search(
51
        self,
52
        experiment: callable,
53
        search_space: Dict[str, list],
54
        n_iter: int,
55
        search_id=None,
56
        n_jobs: int = 1,
57
        verbosity: list = ["progress_bar", "print_results", "print_times"],
58
        initialize: Dict[str, int] = {"grid": 4, "random": 2, "vertices": 4},
59
        constraints: List[callable] = None,
60
        pass_through: Dict = None,
61
        max_score: float = None,
62
        early_stopping: Dict = None,
63
        random_state: int = None,
64
        memory: Union[str, bool] = "share",
65
        memory_warm_start: pd.DataFrame = None,
66
    ):
67
        """
68
        Add a new optimization search process with specified parameters.
69
70
        Parameters:
71
        - experiment: Experiment class containing the objective-function to optimize.
72
        - search_space: Dictionary defining the search space for optimization.
73
        - n_iter: Number of iterations for the optimization process.
74
        - search_id: Identifier for the search process (default: None).
75
        - n_jobs: Number of parallel jobs to run (default: 1).
76
        - initialize: Dictionary specifying initialization parameters (default: {"grid": 4, "random": 2, "vertices": 4}).
77
        - constraints: List of constraint functions (default: None).
78
        - pass_through: Dictionary of additional parameters to pass through (default: None).
79
        - callbacks: Dictionary of callback functions (default: None).
80
        - catch: Dictionary of exceptions to catch during optimization (default: None).
81
        - max_score: Maximum score to achieve (default: None).
82
        - early_stopping: Dictionary specifying early stopping criteria (default: None).
83
        - random_state: Seed for random number generation (default: None).
84
        - memory: Option to share memory between processes (default: "share").
85
        - memory_warm_start: DataFrame containing warm start memory (default: None).
86
        """
87
88
        self.n_search += 1
89
90
        self.check_list(search_space)
91
92
        constraints = constraints or []
93
        pass_through = pass_through or {}
94
        early_stopping = early_stopping or {}
95
96
        search_id = self._default_search_id(
97
            search_id, experiment.objective_function
98
        )
99
        s_space = SearchSpace(search_space)
100
        self.verbosity = verbosity
101
102
        n_jobs = mp.cpu_count() if n_jobs == -1 else n_jobs
103
104
        for _ in range(n_jobs):
105
            search = Search(self.optimizer_class, self.opt_params)
106
            search.setup(
107
                experiment=experiment,
108
                s_space=s_space,
109
                n_iter=n_iter,
110
                initialize=initialize,
111
                constraints=constraints,
112
                pass_through=pass_through,
113
                max_score=max_score,
114
                early_stopping=early_stopping,
115
                random_state=random_state,
116
                memory=memory,
117
                memory_warm_start=memory_warm_start,
118
                verbosity=verbosity,
119
            )
120
            self.searches.append(search)
121
122
    @property
123
    def nth_search(self):
124
        return len(self.composite_opt.optimizers)
125
126
    def __add__(self, optimizer_instance):
127
        return CompositeOptimizer(self, optimizer_instance)
128
129
    def run(
130
        self,
131
        max_time=None,
132
        distribution: str = "multiprocessing",
133
        n_processes: Union[str, int] = "auto",
134
    ):
135
        self.comp_opt = CompositeOptimizer(self)
136
        self.comp_opt.run(max_time, distribution, n_processes, self.verbosity)
137
138
    def best_para(self, id_):
139
        """
140
        Retrieve the best parameters for a specific ID from the results.
141
142
        Parameters:
143
        - id_ (int): The ID of the parameters to retrieve.
144
145
        Returns:
146
        - Union[Dict[str, Union[int, float]], None]: The best parameters for the specified ID if found, otherwise None.
147
148
        Raises:
149
        - ValueError: If the objective function name is not recognized.
150
        """
151
152
        return self.comp_opt.results_.best_para(id_)
153
154
    def best_score(self, id_):
155
        """
156
        Return the best score for a specific ID from the results.
157
158
        Parameters:
159
        - id_ (int): The ID for which the best score is requested.
160
        """
161
162
        return self.comp_opt.results_.best_score(id_)
163
164
    def search_data(self, id_, times=False):
165
        """
166
        Retrieve search data for a specific ID from the results. Optionally exclude evaluation and iteration times if 'times' is set to False.
167
168
        Parameters:
169
        - id_ (int): The ID of the search data to retrieve.
170
        - times (bool, optional): Whether to exclude evaluation and iteration times. Defaults to False.
171
172
        Returns:
173
        - pd.DataFrame: The search data for the specified ID.
174
        """
175
176
        search_data_ = self.comp_opt.results_.search_data(
177
            id_.objective_function
178
        )
179
180
        if times == False:
181
            search_data_.drop(
182
                labels=["eval_times", "iter_times"],
183
                axis=1,
184
                inplace=True,
185
                errors="ignore",
186
            )
187
        return search_data_
188