annif.backend.hyperopt.HPObjective._objective_wrapper() - Code Metrics - Inspection of "Merge pull request #873 from NatLibFi/issue688-hyp..." - NatLibFi/Annif - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — main ( da1836...1db6a8 )

by Osma

created 2025-08-18 10:00 UTC

HPObjective._objective_wrapper() A

↳ Parent: annif.backend.hyperopt

Complexity

Conditions

Size

Total Lines	3
Code Lines	3

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	1
eloc	3
nop	2
dl	0
loc	3
rs	10
c	0
b	0
f	0

"""Hyperparameter optimization functionality for backends"""

from __future__ import annotations

import abc
import collections
import tempfile
from typing import TYPE_CHECKING, Any, Callable

import optuna
import optuna.exceptions

import annif.parallel

from .backend import AnnifBackend

if TYPE_CHECKING:
    from click.utils import LazyFile
    from optuna.study.study import Study
    from optuna.trial import Trial

    from annif.corpus.document import DocumentCorpus

HPRecommendation = collections.namedtuple("HPRecommendation", "lines score")


class TrialWriter:
    """Object that writes hyperparameter optimization trial results into a
    TSV file."""

    def __init__(self, results_file: LazyFile, normalize_func: Callable) -> None:
        self.results_file = results_file
        self.normalize_func = normalize_func
        self.header_written = False

    def write(self, trial_data: dict[str, Any]) -> None:
        """Write the results of one trial into the results file.  On the
        first run, write the header line first."""

        if not self.header_written:
            param_names = list(trial_data["params"].keys())
            print("\t".join(["trial", "value"] + param_names), file=self.results_file)
            self.header_written = True
        print(
            "\t".join(
                (
                    str(e)
                    for e in [trial_data["number"], trial_data["value"]]
                    + list(self.normalize_func(trial_data["params"]).values())
                )
            ),
            file=self.results_file,
        )


class HPObjective(annif.parallel.BaseWorker):
    """Base class for hyperparameter optimizer objective functions"""

    @classmethod
    def objective(cls, trial: Trial, args) -> float:
        """Objective function to optimize. To be implemented by subclasses."""

        pass  # pragma: no cover

    @classmethod
    def _objective_wrapper(cls, trial: Trial) -> float:
        return cls.objective(trial, cls.args)

    @classmethod
    def run_trial(
        cls, trial_id: int, storage_url: str, study_name: str
    ) -> dict[str, Any]:

        # use a callback to set the completed trial, to avoid race conditions
        completed_trial = []

        def set_trial_callback(study: Study, trial: Trial) -> None:
            completed_trial.append(trial)

        study = optuna.load_study(storage=storage_url, study_name=study_name)
        study.optimize(
            cls._objective_wrapper,
            n_trials=1,
            callbacks=[set_trial_callback],
        )

        return {
            "number": completed_trial[0].number,
            "value": completed_trial[0].value,
            "params": completed_trial[0].params,
        }


class HyperparameterOptimizer:
    """Base class for hyperparameter optimizers"""

    def __init__(
        self,
        backend: AnnifBackend,
        corpus: DocumentCorpus,
        metric: str,
        objective: HPObjective,
    ) -> None:
        self._backend = backend
        self._corpus = corpus
        self._metric = metric
        self._objective = objective

    def _prepare(self, n_jobs: int = 1):
        """Prepare the optimizer for hyperparameter evaluation.  Up to
        n_jobs parallel threads or processes may be used during the
        operation. The return value will be passed to the objective function."""

        pass  # pragma: no cover

    @abc.abstractmethod
    def _postprocess(self, study: Study) -> HPRecommendation:
        """Convert the study results into hyperparameter recommendations"""
        pass  # pragma: no cover

    def _normalize(self, hps: dict[str, float]) -> dict[str, float]:
        """Normalize the given raw hyperparameters. Intended to be overridden
        by subclasses when necessary. The default is to keep them as-is."""
        return hps

    def optimize(
        self, n_trials: int, n_jobs: int, results_file: LazyFile | None
    ) -> HPRecommendation:
        """Find the optimal hyperparameters by testing up to the given number
        of hyperparameter combinations"""

        objective_args = self._prepare(n_jobs)
        self._objective.init(objective_args)

        writer = TrialWriter(results_file, self._normalize) if results_file else None
        write_callback = writer.write if writer else None

        temp_db = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
        storage_url = f"sqlite:///{temp_db.name}"

        study = optuna.create_study(direction="maximize", storage=storage_url)

        jobs, pool_class = annif.parallel.get_pool(n_jobs)
        with pool_class(jobs) as pool:
            for i in range(n_trials):
                pool.apply_async(
                    self._objective.run_trial,
                    args=(i, storage_url, study.study_name),
                    callback=write_callback,
                )
            pool.close()
            pool.join()

        return self._postprocess(study)


class AnnifHyperoptBackend(AnnifBackend):
    """Base class for Annif backends that can perform hyperparameter
    optimization"""

    @abc.abstractmethod
    def get_hp_optimizer(self, corpus: DocumentCorpus, metric: str):
        """Get a HyperparameterOptimizer object that can look for
        optimal hyperparameter combinations for the given corpus,
        measured using the given metric"""

        pass  # pragma: no cover


1			"""Hyperparameter optimization functionality for backends"""
2
3			from __future__ import annotations
4
5			import abc
6			import collections
7			import tempfile
8			from typing import TYPE_CHECKING, Any, Callable
9
10			import optuna
11			import optuna.exceptions
12
13			import annif.parallel
14
15			from .backend import AnnifBackend
16
17			if TYPE_CHECKING:
18			from click.utils import LazyFile
19			from optuna.study.study import Study
20			from optuna.trial import Trial
21
22			from annif.corpus.document import DocumentCorpus
23
24			HPRecommendation = collections.namedtuple("HPRecommendation", "lines score")
25
26
27			class TrialWriter:
28			"""Object that writes hyperparameter optimization trial results into a
29			TSV file."""
30
31			def __init__(self, results_file: LazyFile, normalize_func: Callable) -> None:
32			self.results_file = results_file
33			self.normalize_func = normalize_func
34			self.header_written = False
35
36			def write(self, trial_data: dict[str, Any]) -> None:
37			"""Write the results of one trial into the results file. On the
38			first run, write the header line first."""
39
40			if not self.header_written:
41			param_names = list(trial_data["params"].keys())
42			print("\t".join(["trial", "value"] + param_names), file=self.results_file)
43			self.header_written = True
44			print(
45			"\t".join(
46			(
47			str(e)
48			for e in [trial_data["number"], trial_data["value"]]
49			+ list(self.normalize_func(trial_data["params"]).values())
50			)
51			),
52			file=self.results_file,
53			)
54
55
56			class HPObjective(annif.parallel.BaseWorker):
57			"""Base class for hyperparameter optimizer objective functions"""
58
59			@classmethod
60			def objective(cls, trial: Trial, args) -> float:
61			"""Objective function to optimize. To be implemented by subclasses."""
62
63			pass # pragma: no cover
64
65			@classmethod
66			def _objective_wrapper(cls, trial: Trial) -> float:
67			return cls.objective(trial, cls.args)
68
69			@classmethod
70			def run_trial(
71			cls, trial_id: int, storage_url: str, study_name: str
72			) -> dict[str, Any]:
73
74			# use a callback to set the completed trial, to avoid race conditions
75			completed_trial = []
76
77			def set_trial_callback(study: Study, trial: Trial) -> None:
78			completed_trial.append(trial)
79
80			study = optuna.load_study(storage=storage_url, study_name=study_name)
81			study.optimize(
82			cls._objective_wrapper,
83			n_trials=1,
84			callbacks=[set_trial_callback],
85			)
86
87			return {
88			"number": completed_trial[0].number,
89			"value": completed_trial[0].value,
90			"params": completed_trial[0].params,
91			}
92
93
94			class HyperparameterOptimizer:
95			"""Base class for hyperparameter optimizers"""
96
97			def __init__(
98			self,
99			backend: AnnifBackend,
100			corpus: DocumentCorpus,
101			metric: str,
102			objective: HPObjective,
103			) -> None:
104			self._backend = backend
105			self._corpus = corpus
106			self._metric = metric
107			self._objective = objective
108
109			def _prepare(self, n_jobs: int = 1):
110			"""Prepare the optimizer for hyperparameter evaluation. Up to
111			n_jobs parallel threads or processes may be used during the
112			operation. The return value will be passed to the objective function."""
113
114			pass # pragma: no cover
115
116			@abc.abstractmethod
117			def _postprocess(self, study: Study) -> HPRecommendation:
118			"""Convert the study results into hyperparameter recommendations"""
119			pass # pragma: no cover
120
121			def _normalize(self, hps: dict[str, float]) -> dict[str, float]:
122			"""Normalize the given raw hyperparameters. Intended to be overridden
123			by subclasses when necessary. The default is to keep them as-is."""
124			return hps
125
126			def optimize(
127			self, n_trials: int, n_jobs: int, results_file: LazyFile \| None
128			) -> HPRecommendation:
129			"""Find the optimal hyperparameters by testing up to the given number
130			of hyperparameter combinations"""
131
132			objective_args = self._prepare(n_jobs)
133			self._objective.init(objective_args)
134
135			writer = TrialWriter(results_file, self._normalize) if results_file else None
136			write_callback = writer.write if writer else None
137
138			temp_db = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
139			storage_url = f"sqlite:///{temp_db.name}"
140
141			study = optuna.create_study(direction="maximize", storage=storage_url)
142
143			jobs, pool_class = annif.parallel.get_pool(n_jobs)
144			with pool_class(jobs) as pool:
145			for i in range(n_trials):
146			pool.apply_async(
147			self._objective.run_trial,
148			args=(i, storage_url, study.study_name),
149			callback=write_callback,
150			)
151			pool.close()
152			pool.join()
153
154			return self._postprocess(study)
155
156
157			class AnnifHyperoptBackend(AnnifBackend):
158			"""Base class for Annif backends that can perform hyperparameter
159			optimization"""
160
161			@abc.abstractmethod
162			def get_hp_optimizer(self, corpus: DocumentCorpus, metric: str):
163			"""Get a HyperparameterOptimizer object that can look for
164			optimal hyperparameter combinations for the given corpus,
165			measured using the given metric"""
166
167			pass # pragma: no cover
168

NatLibFi / Annif

Push — main ( da1836...1db6a8 )

HPObjective._objective_wrapper() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like