Completed
Branch master (db5e7a)
by Osma
09:07 queued 05:09
created

AnnifProject   A

Complexity

Total Complexity 22

Size/Duplication

Total Lines 85
Duplicated Lines 0 %

Importance

Changes 7
Bugs 1 Features 0
Metric Value
c 7
b 1
f 0
dl 0
loc 85
rs 10
wmc 22

8 Methods

Rating   Name   Duplication   Size   Complexity  
A _initialize_backends() 0 12 3
A __init__() 0 4 1
A _merge_hits() 0 9 3
B _analyze_with_backends() 0 14 6
A analyze() 0 12 1
A dump() 0 6 2
A load_subjects() 0 6 2
A _filter_hits() 0 12 4
1
"""Project management functionality for Annif"""
2
3
import collections
4
import configparser
5
import logging
0 ignored issues
show
Unused Code introduced by
The import logging seems to be unused.
Loading history...
6
from flask import current_app
7
import annif
8
import annif.hit
9
import annif.backend
10
from annif import logger
11
12
13
class AnnifProject:
1 ignored issue
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
14
    """Class representing the configuration of a single Annif project."""
15
16
    def __init__(self, project_id, config):
17
        self.project_id = project_id
18
        self.language = config['language']
19
        self.backends = self._initialize_backends(config['backends'])
20
21
    def _initialize_backends(self, backends_configuration):
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
22
        backends = []
23
        for backenddef in backends_configuration.split(','):
24
            bedefs = backenddef.strip().split(':')
25
            backend_id = bedefs[0]
26
            if len(bedefs) > 1:
27
                weight = float(bedefs[1])
28
            else:
29
                weight = 1.0
30
            backend = annif.backend.get_backend(backend_id)
31
            backends.append((backend, weight))
32
        return backends
33
34
    def _analyze_with_backends(self, text, backend_params):
35
        if backend_params is None:
36
            backend_params = {}
37
        hits_by_uri = collections.defaultdict(list)
38
        for backend, weight in self.backends:
39
            beparams = backend_params.get(backend.backend_id, {})
40
            hits = [hit for hit in backend.analyze(text, params=beparams)
41
                    if hit.score > 0.0]
42
            logger.debug(
43
                'Got {} hits from backend {}'.format(
0 ignored issues
show
introduced by
Use formatting in logging functions and pass the parameters as arguments
Loading history...
44
                    len(hits), backend.backend_id))
45
            for hit in hits:
46
                hits_by_uri[hit.uri].append((hit.score * weight, hit))
47
        return hits_by_uri
48
49
    @classmethod
50
    def _merge_hits(cls, hits_by_uri):
51
        merged_hits = []
52
        for score_hits in hits_by_uri.values():
53
            total = sum([sh[0] for sh in score_hits])
54
            hit = annif.hit.AnalysisHit(
55
                score_hits[0][1].uri, score_hits[0][1].label, total)
56
            merged_hits.append(hit)
57
        return merged_hits
58
59
    @classmethod
60
    def _filter_hits(cls, hits, limit, threshold):
61
        hits.sort(key=lambda hit: hit.score, reverse=True)
62
        hits = hits[:limit]
63
        logger.debug(
64
            '{} hits after applying limit {}'.format(
0 ignored issues
show
introduced by
Use formatting in logging functions and pass the parameters as arguments
Loading history...
65
                len(hits), limit))
66
        hits = [hit for hit in hits if hit.score >= threshold]
67
        logger.debug(
68
            '{} hits after applying threshold {}'.format(
0 ignored issues
show
introduced by
Use formatting in logging functions and pass the parameters as arguments
Loading history...
69
                len(hits), threshold))
70
        return hits
71
72
    def analyze(self, text, limit=10, threshold=0.0, backend_params=None):
73
        """Analyze the given text by passing it to backends and joining the
74
        results. Returns a list of AnalysisHit objects ordered by decreasing
75
        score. The limit parameter defines the maximum number of hits to return.
76
        Only hits whose score is over the threshold are returned."""
77
78
        logger.debug('Analyzing text "{}..." (len={})'.format(
0 ignored issues
show
introduced by
Use formatting in logging functions and pass the parameters as arguments
Loading history...
79
            text[:20], len(text)))
80
        hits_by_uri = self._analyze_with_backends(text, backend_params)
81
        merged_hits = self._merge_hits(hits_by_uri)
82
        logger.debug('{} hits after merging'.format(len(merged_hits)))
0 ignored issues
show
introduced by
Use formatting in logging functions and pass the parameters as arguments
Loading history...
83
        return self._filter_hits(merged_hits, limit, threshold)
84
85
    def load_subjects(self, subjects):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
86
        for backend, weight in self.backends:
0 ignored issues
show
Unused Code introduced by
The variable weight seems to be unused.
Loading history...
87
            logger.debug(
88
                'Loading subjects for backend {}'.format(
0 ignored issues
show
introduced by
Use formatting in logging functions and pass the parameters as arguments
Loading history...
89
                    backend.backend_id))
90
            backend.load_subjects(subjects)
91
92
    def dump(self):
93
        """return this project as a dict"""
94
        return {'project_id': self.project_id,
95
                'language': self.language,
96
                'backends': [{'backend_id': be[0].backend_id,
97
                              'weight': be[1]} for be in self.backends]
98
                }
0 ignored issues
show
Coding Style introduced by
Wrong continued indentation (remove 1 space).
Loading history...
99
100
101
def get_projects():
102
    """return the available projects as a dict of project_id -> AnnifProject"""
103
    projects_file = current_app.config['PROJECTS_FILE']
104
    config = configparser.ConfigParser()
105
    with open(projects_file) as projf:
106
        config.read_file(projf)
107
108
    # create AnnifProject objects from the configuration file
109
    projects = {}
110
    for project_id in config.sections():
111
        projects[project_id] = AnnifProject(project_id, config[project_id])
112
    return projects
113
114
115
def get_project(project_id):
116
    """return the definition of a single Project by project_id"""
117
    projects = get_projects()
118
    try:
119
        return projects[project_id]
120
    except KeyError:
121
        raise ValueError("No such project {}".format(project_id))
122