Passed
Pull Request — master (#560)
by Osma
03:28
created

annif.registry   A

Complexity

Total Complexity 22

Size/Duplication

Total Lines 131
Duplicated Lines 67.18 %

Importance

Changes 0
Metric Value
eloc 90
dl 88
loc 131
rs 10
c 0
b 0
f 0
wmc 22

4 Methods

Rating   Name   Duplication   Size   Complexity  
C AnnifRegistry._create_projects() 50 50 11
A AnnifRegistry.get_projects() 8 8 1
A AnnifRegistry.__init__() 7 7 3
A AnnifRegistry.get_project() 8 8 2

3 Functions

Rating   Name   Duplication   Size   Complexity  
A initialize_projects() 0 5 1
A get_projects() 0 8 2
A get_project() 0 8 2

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
"""Registry that keeps track of Annif projects"""
2
3
import collections
4
import configparser
5
import os.path
6
from flask import current_app
7
import tomli
8
import annif
9
import annif.util
10
from annif.exception import ConfigurationException
11
from annif.project import Access, AnnifProject
12
13
logger = annif.logger
14
15
16 View Code Duplication
class AnnifRegistry:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
17
    """Class that keeps track of the Annif projects"""
18
19
    # Note: The individual projects are stored in a shared static variable,
20
    # keyed by the "registry ID" which is unique to the registry instance.
21
    # This is done to make it possible to serialize AnnifRegistry instances
22
    # without including the potentially huge project objects (which contain
23
    # backends with large models, vocabularies with lots of concepts etc).
24
    # Serialized AnnifRegistry instances can then be passed between
25
    # processes when using the multiprocessing module.
26
    _projects = {}
27
28
    def __init__(self, projects_file, datadir, init_projects):
29
        self._rid = id(self)
30
        self._projects[self._rid] = \
31
            self._create_projects(projects_file, datadir)
32
        if init_projects:
33
            for project in self._projects[self._rid].values():
34
                project.initialize()
35
36
    def _create_projects(self, projects_file, datadir):
37
        if projects_file:
38
            if not os.path.exists(projects_file):
39
                logger.warning(
40
                    f'Project configuration file "{projects_file}" is ' +
41
                    'missing. Please provide one. ' +
42
                    'You can set the path to the project configuration ' +
43
                    'file using the ANNIF_PROJECTS environment ' +
44
                    'variable or the command-line option "--projects".')
45
                return {}
46
        else:
47
            if os.path.exists('projects.cfg'):
48
                projects_file = 'projects.cfg'
49
            elif os.path.exists('projects.toml'):
50
                projects_file = 'projects.toml'
51
            else:
52
                logger.warning(
53
                    'Could not find project configuration file ' +
54
                    '"projects.cfg" or "projects.toml". ' +
55
                    'You can set the path to the project configuration ' +
56
                    'file using the ANNIF_PROJECTS environment ' +
57
                    'variable or the command-line option "--projects".')
58
                return {}
59
60
        if projects_file.endswith('.toml'):  # TOML format
61
            with open(projects_file, "rb") as projf:
62
                try:
63
                    config = tomli.load(projf)
64
                except tomli.TOMLDecodeError as err:
65
                    raise ConfigurationException(err)
66
            project_ids = config.keys()
67
        else:  # classic INI style format
68
            config = configparser.ConfigParser()
69
            config.optionxform = annif.util.identity
70
            with open(projects_file, encoding='utf-8-sig') as projf:
71
                try:
72
                    config.read_file(projf)
73
                except (configparser.DuplicateOptionError,
74
                        configparser.DuplicateSectionError) as err:
75
                    raise ConfigurationException(err)
76
            project_ids = config.sections()
77
78
        # create AnnifProject objects from the configuration file
79
        projects = collections.OrderedDict()
80
        for project_id in project_ids:
81
            projects[project_id] = AnnifProject(project_id,
82
                                                config[project_id],
83
                                                datadir,
84
                                                self)
85
        return projects
86
87
    def get_projects(self, min_access=Access.private):
88
        """Return the available projects as a dict of project_id ->
89
        AnnifProject. The min_access parameter may be used to set the minimum
90
        access level required for the returned projects."""
91
92
        return {project_id: project
93
                for project_id, project in self._projects[self._rid].items()
94
                if project.access >= min_access}
95
96
    def get_project(self, project_id, min_access=Access.private):
97
        """return the definition of a single Project by project_id"""
98
99
        projects = self.get_projects(min_access)
100
        try:
101
            return projects[project_id]
102
        except KeyError:
103
            raise ValueError("No such project {}".format(project_id))
104
105
106
def initialize_projects(app):
107
    projects_file = app.config['PROJECTS_FILE']
108
    datadir = app.config['DATADIR']
109
    init_projects = app.config['INITIALIZE_PROJECTS']
110
    app.annif_registry = AnnifRegistry(projects_file, datadir, init_projects)
111
112
113
def get_projects(min_access=Access.private):
114
    """Return the available projects as a dict of project_id ->
115
    AnnifProject. The min_access parameter may be used to set the minimum
116
    access level required for the returned projects."""
117
    if not hasattr(current_app, 'annif_registry'):
118
        initialize_projects(current_app)
119
120
    return current_app.annif_registry.get_projects(min_access)
121
122
123
def get_project(project_id, min_access=Access.private):
124
    """return the definition of a single Project by project_id"""
125
126
    projects = get_projects(min_access)
127
    try:
128
        return projects[project_id]
129
    except KeyError:
130
        raise ValueError("No such project {}".format(project_id))
131