annif.backend.hyperopt - Code Metrics - Inspection of "Implement hyperparameter optimization of ensemble..." - NatLibFi/Annif - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — master (#414)

by Osma

created 2020-07-21 12:25 UTC

annif.backend.hyperopt A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	103
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	11
eloc	57
dl	0
loc	103
rs	10
c	0
b	0
f	0

9 Methods

Rating	Name	Size	Complexity
A	HyperparameterOptimizer.optimize()	22	2
A	HyperparameterOptimizer._prepare()	6	1
A	TrialWriter.__init__()	4	1
A	HyperparameterOptimizer._normalize()	4	1
A	AnnifHyperoptBackend.get_hp_optimizer()	7	1
A	TrialWriter.write()	12	2
A	HyperparameterOptimizer._objective()	4	1
A	HyperparameterOptimizer.__init__()	4	1
A	HyperparameterOptimizer._postprocess()	4	1

"""Hyperparameter optimization functionality for backends"""

import abc
import collections
import warnings
import optuna
import optuna.exceptions
from .backend import AnnifBackend
from annif import logger


HPRecommendation = collections.namedtuple('HPRecommendation', 'lines score')


class TrialWriter:
    """Object that writes hyperparameter optimization trial results into a
    TSV file."""

    def __init__(self, results_file, normalize_func):
        self.results_file = results_file
        self.normalize_func = normalize_func
        self.header_written = False

    def write(self, study, trial):
        """Write the results of one trial into the results file.  On the
        first run, write the header line first."""

        if not self.header_written:
            param_names = list(trial.params.keys())
            print('\t'.join(['trial', 'value'] + param_names),
                  file=self.results_file)
            self.header_written = True
        print('\t'.join((str(e) for e in [trial.number, trial.value] +
                         list(self.normalize_func(trial.params).values()))),
              file=self.results_file)


class HyperparameterOptimizer:
    """Base class for hyperparameter optimizers"""

    def __init__(self, backend, corpus, metric):
        self._backend = backend
        self._corpus = corpus
        self._metric = metric

    def _prepare(self, n_jobs=1):
        """Prepare the optimizer for hyperparameter evaluation.  Up to
        n_jobs parallel threads or processes may be used during the
        operation."""

        pass  # pragma: no cover

    @abc.abstractmethod
    def _objective(self, trial):
        """Objective function to optimize"""
        pass  # pragma: no cover

    @abc.abstractmethod
    def _postprocess(self, study):
        """Convert the study results into hyperparameter recommendations"""
        pass  # pragma: no cover

    def _normalize(self, hps):
        """Normalize the given raw hyperparameters. Intended to be overridden
        by subclasses when necessary. The default is to keep them as-is."""
        return hps

    def optimize(self, n_trials, n_jobs, results_file):
        """Find the optimal hyperparameters by testing up to the given number
        of hyperparameter combinations"""

        self._prepare(n_jobs)

        if results_file:
            callbacks = [TrialWriter(results_file, self._normalize).write]
        else:
            callbacks = []

        study = optuna.create_study(direction='maximize')
        # silence the ExperimentalWarning when using the Optuna progress bar
        warnings.filterwarnings("ignore",
                                category=optuna.exceptions.ExperimentalWarning)
        study.optimize(self._objective,
                       n_trials=n_trials,
                       n_jobs=n_jobs,
                       callbacks=callbacks,
                       gc_after_trial=False,
                       show_progress_bar=(n_jobs == 1))
        return self._postprocess(study)


class AnnifHyperoptBackend(AnnifBackend):
    """Base class for Annif backends that can perform hyperparameter
    optimization"""

    @abc.abstractmethod
    def get_hp_optimizer(self, corpus, metric):
        """Get a HyperparameterOptimizer object that can look for
        optimal hyperparameter combinations for the given corpus,
        measured using the given metric"""

        pass  # pragma: no cover


1			"""Hyperparameter optimization functionality for backends"""
2
3			import abc
4			import collections
5			import warnings
6			import optuna
7			import optuna.exceptions
8			from .backend import AnnifBackend
9			from annif import logger
10
11
12			HPRecommendation = collections.namedtuple('HPRecommendation', 'lines score')
13
14
15			class TrialWriter:
16			"""Object that writes hyperparameter optimization trial results into a
17			TSV file."""
18
19			def __init__(self, results_file, normalize_func):
20			self.results_file = results_file
21			self.normalize_func = normalize_func
22			self.header_written = False
23
24			def write(self, study, trial):
25			"""Write the results of one trial into the results file. On the
26			first run, write the header line first."""
27
28			if not self.header_written:
29			param_names = list(trial.params.keys())
30			print('\t'.join(['trial', 'value'] + param_names),
31			file=self.results_file)
32			self.header_written = True
33			print('\t'.join((str(e) for e in [trial.number, trial.value] +
34			list(self.normalize_func(trial.params).values()))),
35			file=self.results_file)
36
37
38			class HyperparameterOptimizer:
39			"""Base class for hyperparameter optimizers"""
40
41			def __init__(self, backend, corpus, metric):
42			self._backend = backend
43			self._corpus = corpus
44			self._metric = metric
45
46			def _prepare(self, n_jobs=1):
47			"""Prepare the optimizer for hyperparameter evaluation. Up to
48			n_jobs parallel threads or processes may be used during the
49			operation."""
50
51			pass # pragma: no cover
52
53			@abc.abstractmethod
54			def _objective(self, trial):
55			"""Objective function to optimize"""
56			pass # pragma: no cover
57
58			@abc.abstractmethod
59			def _postprocess(self, study):
60			"""Convert the study results into hyperparameter recommendations"""
61			pass # pragma: no cover
62
63			def _normalize(self, hps):
64			"""Normalize the given raw hyperparameters. Intended to be overridden
65			by subclasses when necessary. The default is to keep them as-is."""
66			return hps
67
68			def optimize(self, n_trials, n_jobs, results_file):
69			"""Find the optimal hyperparameters by testing up to the given number
70			of hyperparameter combinations"""
71
72			self._prepare(n_jobs)
73
74			if results_file:
75			callbacks = [TrialWriter(results_file, self._normalize).write]
76			else:
77			callbacks = []
78
79			study = optuna.create_study(direction='maximize')
80			# silence the ExperimentalWarning when using the Optuna progress bar
81			warnings.filterwarnings("ignore",
82			category=optuna.exceptions.ExperimentalWarning)
83			study.optimize(self._objective,
84			n_trials=n_trials,
85			n_jobs=n_jobs,
86			callbacks=callbacks,
87			gc_after_trial=False,
88			show_progress_bar=(n_jobs == 1))
89			return self._postprocess(study)
90
91
92			class AnnifHyperoptBackend(AnnifBackend):
93			"""Base class for Annif backends that can perform hyperparameter
94			optimization"""
95
96			@abc.abstractmethod
97			def get_hp_optimizer(self, corpus, metric):
98			"""Get a HyperparameterOptimizer object that can look for
99			optimal hyperparameter combinations for the given corpus,
100			measured using the given metric"""
101
102			pass # pragma: no cover
103

NatLibFi / Annif

Pull Request — master (#414)

annif.backend.hyperopt A

Complexity

Size/Duplication

Importance

9 Methods

Duplication Side-by-Side

Filter issues like