Passed
Pull Request — master (#110)
by
unknown
01:35
created

BaseOptimizer.add_search()   B

Complexity

Conditions 3

Size

Total Lines 66
Code Lines 38

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 38
nop 14
dl 0
loc 66
rs 8.968
c 0
b 0
f 0

How to fix   Long Method    Many Parameters   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
"""Base class for optimizer."""
2
3
from typing import Union, List, Dict
4
import multiprocessing as mp
5
import pandas as pd
6
7
from ..optimizers.search_space import SearchSpace
8
from ..optimizers._search import Search
9
10
11
from ._composite_optimizer import CompositeOptimizer
12
13
from skbase.base import BaseObject
14
15
16
class BaseOptimizer(BaseObject):
17
    """Base class for optimizer."""
18
19
    n_search: int
20
    searches: list
21
    opt_pros: dict
22
23
    def __init__(self, optimizer_class, opt_params):
24
        super().__init__()
25
26
        self.optimizer_class = optimizer_class
27
        self.opt_params = opt_params
28
29
        self.n_search = 0
30
        self.searches = []
31
32
    @staticmethod
33
    def _default_search_id(search_id, objective_function):
34
        if not search_id:
35
            search_id = objective_function.__name__
36
        return search_id
37
38 View Code Duplication
    @staticmethod
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
39
    def check_list(search_space):
40
        for key in search_space.keys():
41
            search_dim = search_space[key]
42
43
            error_msg = (
44
                "Value in '{}' of search space dictionary must be of type list".format(
45
                    key
46
                )
47
            )
48
            if not isinstance(search_dim, list):
49
                print("Warning", error_msg)
50
                # raise ValueError(error_msg)
51
52
    def add_search(
53
        self,
54
        experiment: callable,
55
        search_space: Dict[str, list],
56
        n_iter: int,
57
        search_id=None,
58
        n_jobs: int = 1,
59
        initialize: Dict[str, int] = {"grid": 4, "random": 2, "vertices": 4},
60
        constraints: List[callable] = None,
61
        pass_through: Dict = None,
62
        max_score: float = None,
63
        early_stopping: Dict = None,
64
        random_state: int = None,
65
        memory: Union[str, bool] = "share",
66
        memory_warm_start: pd.DataFrame = None,
67
    ):
68
        """
69
        Add a new optimization search process with specified parameters.
70
71
        Parameters:
72
        - experiment: Experiment class containing the objective-function to optimize.
73
        - search_space: Dictionary defining the search space for optimization.
74
        - n_iter: Number of iterations for the optimization process.
75
        - search_id: Identifier for the search process (default: None).
76
        - n_jobs: Number of parallel jobs to run (default: 1).
77
        - initialize: Dictionary specifying initialization parameters (default: {"grid": 4, "random": 2, "vertices": 4}).
78
        - constraints: List of constraint functions (default: None).
79
        - pass_through: Dictionary of additional parameters to pass through (default: None).
80
        - callbacks: Dictionary of callback functions (default: None).
81
        - catch: Dictionary of exceptions to catch during optimization (default: None).
82
        - max_score: Maximum score to achieve (default: None).
83
        - early_stopping: Dictionary specifying early stopping criteria (default: None).
84
        - random_state: Seed for random number generation (default: None).
85
        - memory: Option to share memory between processes (default: "share").
86
        - memory_warm_start: DataFrame containing warm start memory (default: None).
87
        """
88
89
        self.n_search += 1
90
91
        self.check_list(search_space)
92
93
        constraints = constraints or []
94
        pass_through = pass_through or {}
95
        early_stopping = early_stopping or {}
96
97
        search_id = self._default_search_id(search_id, experiment.objective_function)
98
        s_space = SearchSpace(search_space)
99
100
        n_jobs = mp.cpu_count() if n_jobs == -1 else n_jobs
101
102
        for _ in range(n_jobs):
103
            search = Search(self.optimizer_class, self.opt_params)
104
            search.setup(
105
                experiment=experiment,
106
                s_space=s_space,
107
                n_iter=n_iter,
108
                initialize=initialize,
109
                constraints=constraints,
110
                pass_through=pass_through,
111
                max_score=max_score,
112
                early_stopping=early_stopping,
113
                random_state=random_state,
114
                memory=memory,
115
                memory_warm_start=memory_warm_start,
116
            )
117
            self.searches.append(search)
118
119
    @property
120
    def nth_search(self):
121
        return len(self.composite_opt.optimizers)
122
123
    def __add__(self, optimizer_instance):
124
        return CompositeOptimizer(self, optimizer_instance)
125
126
    def run(
127
        self,
128
        max_time=None,
129
        distribution: str = "multiprocessing",
130
        n_processes: Union[str, int] = "auto",
131
        verbosity: list = ["progress_bar", "print_results", "print_times"],
132
    ):
133
        self.comp_opt = CompositeOptimizer(self)
134
        self.comp_opt.run(max_time, distribution, n_processes, verbosity)
135
136
    def best_para(self, experiment):
137
        """
138
        Retrieve the best parameters for a specific ID from the results.
139
140
        Parameters:
141
        - experiment (int): The experiment of the optimization run.
142
143
        Returns:
144
        - Union[Dict[str, Union[int, float]], None]: The best parameters for the specified ID if found, otherwise None.
145
146
        Raises:
147
        - ValueError: If the objective function name is not recognized.
148
        """
149
150
        return self.comp_opt.results_.best_para(experiment.objective_function)
151
152
    def best_score(self, experiment):
153
        """
154
        Return the best score for a specific ID from the results.
155
156
        Parameters:
157
        - experiment (int): The experiment of the optimization run.
158
        """
159
160
        return self.comp_opt.results_.best_score(experiment.objective_function)
161
162
    def search_data(self, experiment, times=False):
163
        """
164
        Retrieve search data for a specific ID from the results. Optionally exclude evaluation and iteration times if 'times' is set to False.
165
166
        Parameters:
167
        - experiment (int): The experiment of the optimization run.
168
        - times (bool, optional): Whether to exclude evaluation and iteration times. Defaults to False.
169
170
        Returns:
171
        - pd.DataFrame: The search data for the specified ID.
172
        """
173
174
        search_data_ = self.comp_opt.results_.search_data(experiment.objective_function)
175
176
        if times == False:
177
            search_data_.drop(
178
                labels=["eval_times", "iter_times"],
179
                axis=1,
180
                inplace=True,
181
                errors="ignore",
182
            )
183
        return search_data_
184