Code Duplication - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 240-240 lines in 2 locations

src_old/hyperactive/hyperactive.py 1 location


from .search_space import SearchSpace


class Hyperactive:
    """
    Initialize the Hyperactive class to manage optimization processes.

    Parameters:
    - verbosity: List of verbosity levels (default: ["progress_bar", "print_results", "print_times"])
    - distribution: String indicating the distribution method (default: "multiprocessing")
    - n_processes: Number of processes to run in parallel or "auto" to determine automatically (default: "auto")

    Methods:
    - add_search: Add a new optimization search process with specified parameters
    - run: Execute the optimization searches
    - best_para: Get the best parameters for a specific search
    - best_score: Get the best score for a specific search
    - search_data: Get the search data for a specific search
    """

    def __init__(
        self,
        verbosity: list = ["progress_bar", "print_results", "print_times"],
        distribution: str = "multiprocessing",
        n_processes: Union[str, int] = "auto",
    ):
        super().__init__()
        if verbosity is False:
            verbosity = []

        self.verbosity = verbosity
        self.distribution = distribution
        self.n_processes = n_processes

        self.opt_pros = {}

    def _create_shared_memory(self):
        _bundle_opt_processes = {}

        for opt_pros in self.opt_pros.values():
            if opt_pros.memory != "share":
                continue
            name = opt_pros.objective_function.__name__

            _bundle_opt_processes.setdefault(name, []).append(opt_pros)

        for opt_pros_l in _bundle_opt_processes.values():
            # Check if the lengths of the search spaces of all optimizers in the list are the same.
            if (
                len(set(len(opt_pros.s_space()) for opt_pros in opt_pros_l))
                == 1
            ):
                manager = mp.Manager()  # get new manager.dict
                shared_memory = manager.dict()
                for opt_pros in opt_pros_l:
                    opt_pros.memory = shared_memory
            else:
                for opt_pros in opt_pros_l:
                    opt_pros.memory = opt_pros_l[
                        0
                    ].memory  # get same manager.dict

    @staticmethod
    def _default_opt(optimizer):
        if isinstance(optimizer, str):
            if optimizer == "default":
                optimizer = RandomSearchOptimizer()
        return copy.deepcopy(optimizer)

    @staticmethod
    def _default_search_id(search_id, objective_function):
        if not search_id:
            search_id = objective_function.__name__
        return search_id

    @staticmethod
    def check_list(search_space):
        for key in search_space.keys():
            search_dim = search_space[key]

            error_msg = "Value in '{}' of search space dictionary must be of type list".format(
                key
            )
            if not isinstance(search_dim, list):
                print("Warning", error_msg)
                # raise ValueError(error_msg)

    def add_search(
        self,
        objective_function: callable,
        search_space: Dict[str, list],
        n_iter: int,
        search_id=None,
        optimizer: Union[str, Type[RandomSearchOptimizer]] = "default",
        n_jobs: int = 1,
        initialize: Dict[str, int] = {"grid": 4, "random": 2, "vertices": 4},
        constraints: List[callable] = None,
        pass_through: Dict = None,
        callbacks: Dict[str, callable] = None,
        catch: Dict = None,
        max_score: float = None,
        early_stopping: Dict = None,
        random_state: int = None,
        memory: Union[str, bool] = "share",
        memory_warm_start: pd.DataFrame = None,
    ):
        """
        Add a new optimization search process with specified parameters.

        Parameters:
        - objective_function: The objective function to optimize.
        - search_space: Dictionary defining the search space for optimization.
        - n_iter: Number of iterations for the optimization process.
        - search_id: Identifier for the search process (default: None).
        - optimizer: The optimizer to use for the search process (default: "default").
        - n_jobs: Number of parallel jobs to run (default: 1).
        - initialize: Dictionary specifying initialization parameters (default: {"grid": 4, "random": 2, "vertices": 4}).
        - constraints: List of constraint functions (default: None).
        - pass_through: Dictionary of additional parameters to pass through (default: None).
        - callbacks: Dictionary of callback functions (default: None).
        - catch: Dictionary of exceptions to catch during optimization (default: None).
        - max_score: Maximum score to achieve (default: None).
        - early_stopping: Dictionary specifying early stopping criteria (default: None).
        - random_state: Seed for random number generation (default: None).
        - memory: Option to share memory between processes (default: "share").
        - memory_warm_start: DataFrame containing warm start memory (default: None).
        """

        self.check_list(search_space)

        constraints = constraints or []
        pass_through = pass_through or {}
        callbacks = callbacks or {}
        catch = catch or {}
        early_stopping = early_stopping or {}

        optimizer = self._default_opt(optimizer)
        search_id = self._default_search_id(search_id, objective_function)
        s_space = SearchSpace(search_space)

        optimizer.setup_search(
            objective_function=objective_function,
            s_space=s_space,
            n_iter=n_iter,
            initialize=initialize,
            constraints=constraints,
            pass_through=pass_through,
            callbacks=callbacks,
            catch=catch,
            max_score=max_score,
            early_stopping=early_stopping,
            random_state=random_state,
            memory=memory,
            memory_warm_start=memory_warm_start,
            verbosity=self.verbosity,
        )

        n_jobs = mp.cpu_count() if n_jobs == -1 else n_jobs

        for _ in range(n_jobs):
            nth_process = len(self.opt_pros)
            self.opt_pros[nth_process] = optimizer

    def _print_info(self):
        print_res = PrintResults(self.opt_pros, self.verbosity)

        if self.verbosity:
            for _ in range(len(self.opt_pros)):
                print("")

        for results in self.results_list:
            nth_process = results["nth_process"]
            print_res.print_process(results, nth_process)

    def run(self, max_time: float = None):
        """
        Run the optimization process with an optional maximum time limit.

        Args:
            max_time (float, optional): Maximum time limit for the optimization process. Defaults to None.
        """

        self._create_shared_memory()

        for opt in self.opt_pros.values():
            opt.max_time = max_time

        self.results_list = run_search(
            self.opt_pros, self.distribution, self.n_processes
        )

        self.results_ = Results(self.results_list, self.opt_pros)

        self._print_info()

    def best_para(self, id_):
        """
        Retrieve the best parameters for a specific ID from the results.

        Parameters:
        - id_ (int): The ID of the parameters to retrieve.

        Returns:
        - Union[Dict[str, Union[int, float]], None]: The best parameters for the specified ID if found, otherwise None.

        Raises:
        - ValueError: If the objective function name is not recognized.
        """

        return self.results_.best_para(id_)

    def best_score(self, id_):
        """
        Return the best score for a specific ID from the results.

        Parameters:
        - id_ (int): The ID for which the best score is requested.
        """

        return self.results_.best_score(id_)

    def search_data(self, id_, times=False):
        """
        Retrieve search data for a specific ID from the results. Optionally exclude evaluation and iteration times if 'times' is set to False.

        Parameters:
        - id_ (int): The ID of the search data to retrieve.
        - times (bool, optional): Whether to exclude evaluation and iteration times. Defaults to False.

        Returns:
        - pd.DataFrame: The search data for the specified ID.
        """

        search_data_ = self.results_.search_data(id_)

        if times == False:
            search_data_.drop(
                labels=["eval_times", "iter_times"],
                axis=1,
                inplace=True,
                errors="ignore",
            )
        return search_data_


src/hyperactive/optimizers/backend_stuff/hyperactive.py 1 location


from .search_space import SearchSpace


class Hyperactive:
    """
    Initialize the Hyperactive class to manage optimization processes.

    Parameters:
    - verbosity: List of verbosity levels (default: ["progress_bar", "print_results", "print_times"])
    - distribution: String indicating the distribution method (default: "multiprocessing")
    - n_processes: Number of processes to run in parallel or "auto" to determine automatically (default: "auto")

    Methods:
    - add_search: Add a new optimization search process with specified parameters
    - run: Execute the optimization searches
    - best_para: Get the best parameters for a specific search
    - best_score: Get the best score for a specific search
    - search_data: Get the search data for a specific search
    """

    def __init__(
        self,
        verbosity: list = ["progress_bar", "print_results", "print_times"],
        distribution: str = "multiprocessing",
        n_processes: Union[str, int] = "auto",
    ):
        super().__init__()
        if verbosity is False:
            verbosity = []

        self.verbosity = verbosity
        self.distribution = distribution
        self.n_processes = n_processes

        self.opt_pros = {}

    def _create_shared_memory(self):
        _bundle_opt_processes = {}

        for opt_pros in self.opt_pros.values():
            if opt_pros.memory != "share":
                continue
            name = opt_pros.objective_function.__name__

            _bundle_opt_processes.setdefault(name, []).append(opt_pros)

        for opt_pros_l in _bundle_opt_processes.values():
            # Check if the lengths of the search spaces of all optimizers in the list are the same.
            if (
                len(set(len(opt_pros.s_space()) for opt_pros in opt_pros_l))
                == 1
            ):
                manager = mp.Manager()  # get new manager.dict
                shared_memory = manager.dict()
                for opt_pros in opt_pros_l:
                    opt_pros.memory = shared_memory
            else:
                for opt_pros in opt_pros_l:
                    opt_pros.memory = opt_pros_l[
                        0
                    ].memory  # get same manager.dict

    @staticmethod
    def _default_opt(optimizer):
        if isinstance(optimizer, str):
            if optimizer == "default":
                optimizer = RandomSearchOptimizer()
        return copy.deepcopy(optimizer)

    @staticmethod
    def _default_search_id(search_id, objective_function):
        if not search_id:
            search_id = objective_function.__name__
        return search_id

    @staticmethod
    def check_list(search_space):
        for key in search_space.keys():
            search_dim = search_space[key]

            error_msg = "Value in '{}' of search space dictionary must be of type list".format(
                key
            )
            if not isinstance(search_dim, list):
                print("Warning", error_msg)
                # raise ValueError(error_msg)

    def add_search(
        self,
        objective_function: callable,
        search_space: Dict[str, list],
        n_iter: int,
        search_id=None,
        optimizer: Union[str, Type[RandomSearchOptimizer]] = "default",
        n_jobs: int = 1,
        initialize: Dict[str, int] = {"grid": 4, "random": 2, "vertices": 4},
        constraints: List[callable] = None,
        pass_through: Dict = None,
        callbacks: Dict[str, callable] = None,
        catch: Dict = None,
        max_score: float = None,
        early_stopping: Dict = None,
        random_state: int = None,
        memory: Union[str, bool] = "share",
        memory_warm_start: pd.DataFrame = None,
    ):
        """
        Add a new optimization search process with specified parameters.

        Parameters:
        - objective_function: The objective function to optimize.
        - search_space: Dictionary defining the search space for optimization.
        - n_iter: Number of iterations for the optimization process.
        - search_id: Identifier for the search process (default: None).
        - optimizer: The optimizer to use for the search process (default: "default").
        - n_jobs: Number of parallel jobs to run (default: 1).
        - initialize: Dictionary specifying initialization parameters (default: {"grid": 4, "random": 2, "vertices": 4}).
        - constraints: List of constraint functions (default: None).
        - pass_through: Dictionary of additional parameters to pass through (default: None).
        - callbacks: Dictionary of callback functions (default: None).
        - catch: Dictionary of exceptions to catch during optimization (default: None).
        - max_score: Maximum score to achieve (default: None).
        - early_stopping: Dictionary specifying early stopping criteria (default: None).
        - random_state: Seed for random number generation (default: None).
        - memory: Option to share memory between processes (default: "share").
        - memory_warm_start: DataFrame containing warm start memory (default: None).
        """

        self.check_list(search_space)

        constraints = constraints or []
        pass_through = pass_through or {}
        callbacks = callbacks or {}
        catch = catch or {}
        early_stopping = early_stopping or {}

        optimizer = self._default_opt(optimizer)
        search_id = self._default_search_id(search_id, objective_function)
        s_space = SearchSpace(search_space)

        optimizer.setup_search(
            objective_function=objective_function,
            s_space=s_space,
            n_iter=n_iter,
            initialize=initialize,
            constraints=constraints,
            pass_through=pass_through,
            callbacks=callbacks,
            catch=catch,
            max_score=max_score,
            early_stopping=early_stopping,
            random_state=random_state,
            memory=memory,
            memory_warm_start=memory_warm_start,
            verbosity=self.verbosity,
        )

        n_jobs = mp.cpu_count() if n_jobs == -1 else n_jobs

        for _ in range(n_jobs):
            nth_process = len(self.opt_pros)
            self.opt_pros[nth_process] = optimizer

    def _print_info(self):
        print_res = PrintResults(self.opt_pros, self.verbosity)

        if self.verbosity:
            for _ in range(len(self.opt_pros)):
                print("")

        for results in self.results_list:
            nth_process = results["nth_process"]
            print_res.print_process(results, nth_process)

    def run(self, max_time: float = None):
        """
        Run the optimization process with an optional maximum time limit.

        Args:
            max_time (float, optional): Maximum time limit for the optimization process. Defaults to None.
        """

        self._create_shared_memory()

        for opt in self.opt_pros.values():
            opt.max_time = max_time

        self.results_list = run_search(
            self.opt_pros, self.distribution, self.n_processes
        )

        self.results_ = Results(self.results_list, self.opt_pros)

        self._print_info()

    def best_para(self, id_):
        """
        Retrieve the best parameters for a specific ID from the results.

        Parameters:
        - id_ (int): The ID of the parameters to retrieve.

        Returns:
        - Union[Dict[str, Union[int, float]], None]: The best parameters for the specified ID if found, otherwise None.

        Raises:
        - ValueError: If the objective function name is not recognized.
        """

        return self.results_.best_para(id_)

    def best_score(self, id_):
        """
        Return the best score for a specific ID from the results.

        Parameters:
        - id_ (int): The ID for which the best score is requested.
        """

        return self.results_.best_score(id_)

    def search_data(self, id_, times=False):
        """
        Retrieve search data for a specific ID from the results. Optionally exclude evaluation and iteration times if 'times' is set to False.

        Parameters:
        - id_ (int): The ID of the search data to retrieve.
        - times (bool, optional): Whether to exclude evaluation and iteration times. Defaults to False.

        Returns:
        - pd.DataFrame: The search data for the specified ID.
        """

        search_data_ = self.results_.search_data(id_)

        if times == False:
            search_data_.drop(
                labels=["eval_times", "iter_times"],
                axis=1,
                inplace=True,
                errors="ignore",
            )
        return search_data_


		@@ 20-259 (lines=240) @@
17		from .search_space import SearchSpace
18
19
20		class Hyperactive:
21		"""
22		Initialize the Hyperactive class to manage optimization processes.
23
24		Parameters:
25		- verbosity: List of verbosity levels (default: ["progress_bar", "print_results", "print_times"])
26		- distribution: String indicating the distribution method (default: "multiprocessing")
27		- n_processes: Number of processes to run in parallel or "auto" to determine automatically (default: "auto")
28
29		Methods:
30		- add_search: Add a new optimization search process with specified parameters
31		- run: Execute the optimization searches
32		- best_para: Get the best parameters for a specific search
33		- best_score: Get the best score for a specific search
34		- search_data: Get the search data for a specific search
35		"""
36
37		def __init__(
38		self,
39		verbosity: list = ["progress_bar", "print_results", "print_times"],
40		distribution: str = "multiprocessing",
41		n_processes: Union[str, int] = "auto",
42		):
43		super().__init__()
44		if verbosity is False:
45		verbosity = []
46
47		self.verbosity = verbosity
48		self.distribution = distribution
49		self.n_processes = n_processes
50
51		self.opt_pros = {}
52
53		def _create_shared_memory(self):
54		_bundle_opt_processes = {}
55
56		for opt_pros in self.opt_pros.values():
57		if opt_pros.memory != "share":
58		continue
59		name = opt_pros.objective_function.__name__
60
61		_bundle_opt_processes.setdefault(name, []).append(opt_pros)
62
63		for opt_pros_l in _bundle_opt_processes.values():
64		# Check if the lengths of the search spaces of all optimizers in the list are the same.
65		if (
66		len(set(len(opt_pros.s_space()) for opt_pros in opt_pros_l))
67		== 1
68		):
69		manager = mp.Manager() # get new manager.dict
70		shared_memory = manager.dict()
71		for opt_pros in opt_pros_l:
72		opt_pros.memory = shared_memory
73		else:
74		for opt_pros in opt_pros_l:
75		opt_pros.memory = opt_pros_l[
76		0
77		].memory # get same manager.dict
78
79		@staticmethod
80		def _default_opt(optimizer):
81		if isinstance(optimizer, str):
82		if optimizer == "default":
83		optimizer = RandomSearchOptimizer()
84		return copy.deepcopy(optimizer)
85
86		@staticmethod
87		def _default_search_id(search_id, objective_function):
88		if not search_id:
89		search_id = objective_function.__name__
90		return search_id
91
92		@staticmethod
93		def check_list(search_space):
94		for key in search_space.keys():
95		search_dim = search_space[key]
96
97		error_msg = "Value in '{}' of search space dictionary must be of type list".format(
98		key
99		)
100		if not isinstance(search_dim, list):
101		print("Warning", error_msg)
102		# raise ValueError(error_msg)
103
104		def add_search(
105		self,
106		objective_function: callable,
107		search_space: Dict[str, list],
108		n_iter: int,
109		search_id=None,
110		optimizer: Union[str, Type[RandomSearchOptimizer]] = "default",
111		n_jobs: int = 1,
112		initialize: Dict[str, int] = {"grid": 4, "random": 2, "vertices": 4},
113		constraints: List[callable] = None,
114		pass_through: Dict = None,
115		callbacks: Dict[str, callable] = None,
116		catch: Dict = None,
117		max_score: float = None,
118		early_stopping: Dict = None,
119		random_state: int = None,
120		memory: Union[str, bool] = "share",
121		memory_warm_start: pd.DataFrame = None,
122		):
123		"""
124		Add a new optimization search process with specified parameters.
125
126		Parameters:
127		- objective_function: The objective function to optimize.
128		- search_space: Dictionary defining the search space for optimization.
129		- n_iter: Number of iterations for the optimization process.
130		- search_id: Identifier for the search process (default: None).
131		- optimizer: The optimizer to use for the search process (default: "default").
132		- n_jobs: Number of parallel jobs to run (default: 1).
133		- initialize: Dictionary specifying initialization parameters (default: {"grid": 4, "random": 2, "vertices": 4}).
134		- constraints: List of constraint functions (default: None).
135		- pass_through: Dictionary of additional parameters to pass through (default: None).
136		- callbacks: Dictionary of callback functions (default: None).
137		- catch: Dictionary of exceptions to catch during optimization (default: None).
138		- max_score: Maximum score to achieve (default: None).
139		- early_stopping: Dictionary specifying early stopping criteria (default: None).
140		- random_state: Seed for random number generation (default: None).
141		- memory: Option to share memory between processes (default: "share").
142		- memory_warm_start: DataFrame containing warm start memory (default: None).
143		"""
144
145		self.check_list(search_space)
146
147		constraints = constraints or []
148		pass_through = pass_through or {}
149		callbacks = callbacks or {}
150		catch = catch or {}
151		early_stopping = early_stopping or {}
152
153		optimizer = self._default_opt(optimizer)
154		search_id = self._default_search_id(search_id, objective_function)
155		s_space = SearchSpace(search_space)
156
157		optimizer.setup_search(
158		objective_function=objective_function,
159		s_space=s_space,
160		n_iter=n_iter,
161		initialize=initialize,
162		constraints=constraints,
163		pass_through=pass_through,
164		callbacks=callbacks,
165		catch=catch,
166		max_score=max_score,
167		early_stopping=early_stopping,
168		random_state=random_state,
169		memory=memory,
170		memory_warm_start=memory_warm_start,
171		verbosity=self.verbosity,
172		)
173
174		n_jobs = mp.cpu_count() if n_jobs == -1 else n_jobs
175
176		for _ in range(n_jobs):
177		nth_process = len(self.opt_pros)
178		self.opt_pros[nth_process] = optimizer
179
180		def _print_info(self):
181		print_res = PrintResults(self.opt_pros, self.verbosity)
182
183		if self.verbosity:
184		for _ in range(len(self.opt_pros)):
185		print("")
186
187		for results in self.results_list:
188		nth_process = results["nth_process"]
189		print_res.print_process(results, nth_process)
190
191		def run(self, max_time: float = None):
192		"""
193		Run the optimization process with an optional maximum time limit.
194
195		Args:
196		max_time (float, optional): Maximum time limit for the optimization process. Defaults to None.
197		"""
198
199		self._create_shared_memory()
200
201		for opt in self.opt_pros.values():
202		opt.max_time = max_time
203
204		self.results_list = run_search(
205		self.opt_pros, self.distribution, self.n_processes
206		)
207
208		self.results_ = Results(self.results_list, self.opt_pros)
209
210		self._print_info()
211
212		def best_para(self, id_):
213		"""
214		Retrieve the best parameters for a specific ID from the results.
215
216		Parameters:
217		- id_ (int): The ID of the parameters to retrieve.
218
219		Returns:
220		- Union[Dict[str, Union[int, float]], None]: The best parameters for the specified ID if found, otherwise None.
221
222		Raises:
223		- ValueError: If the objective function name is not recognized.
224		"""
225
226		return self.results_.best_para(id_)
227
228		def best_score(self, id_):
229		"""
230		Return the best score for a specific ID from the results.
231
232		Parameters:
233		- id_ (int): The ID for which the best score is requested.
234		"""
235
236		return self.results_.best_score(id_)
237
238		def search_data(self, id_, times=False):
239		"""
240		Retrieve search data for a specific ID from the results. Optionally exclude evaluation and iteration times if 'times' is set to False.
241
242		Parameters:
243		- id_ (int): The ID of the search data to retrieve.
244		- times (bool, optional): Whether to exclude evaluation and iteration times. Defaults to False.
245
246		Returns:
247		- pd.DataFrame: The search data for the specified ID.
248		"""
249
250		search_data_ = self.results_.search_data(id_)
251
252		if times == False:
253		search_data_.drop(
254		labels=["eval_times", "iter_times"],
255		axis=1,
256		inplace=True,
257		errors="ignore",
258		)
259		return search_data_
260

SimonBlanke / Hyperactive

Code Duplication Length = 240-240 lines in 2 locations

src_old/hyperactive/hyperactive.py 1 location

src/hyperactive/optimizers/backend_stuff/hyperactive.py 1 location