Passed
Push — master ( c8c370...dee89b )
by Osma
03:14
created

annif.suggestion   A

Complexity

Total Complexity 39

Size/Duplication

Total Lines 187
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 39
eloc 129
dl 0
loc 187
rs 9.28
c 0
b 0
f 0

27 Methods

Rating   Name   Duplication   Size   Complexity  
A SuggestionFilter.__init__() 0 3 1
A SuggestionResult.__getitem__() 0 2 1
A ListSuggestionResult.vector() 0 5 2
A ListSuggestionResult.filter() 0 8 3
A ListSuggestionResult.__len__() 0 2 1
A VectorSuggestionResult.__len__() 0 2 1
A LazySuggestionResult.hits() 0 4 1
A SuggestionResult.vector() 0 6 1
A LazySuggestionResult._initialize() 0 3 2
A VectorSuggestionResult.subject_order() 0 5 2
A ListSuggestionResult.hits() 0 3 1
A VectorSuggestionResult.vector() 0 3 1
A SuggestionResult.filter() 0 5 1
A ListSuggestionResult.__init__() 0 4 1
A LazySuggestionResult.__init__() 0 5 1
A VectorSuggestionResult.hits() 0 5 2
A LazySuggestionResult.filter() 0 3 1
A LazySuggestionResult.__len__() 0 3 1
A SuggestionResult.__len__() 0 4 1
A VectorSuggestionResult.filter() 0 8 2
A LazySuggestionResult.__getitem__() 0 3 1
A VectorSuggestionResult.__init__() 0 5 1
A LazySuggestionResult.vector() 0 4 1
A SuggestionResult.hits() 0 6 1
A SuggestionFilter.__call__() 0 4 2
A VectorSuggestionResult._vector_to_hits() 0 13 3
A ListSuggestionResult._hits_to_vector() 0 7 3
1
"""Representing suggested subjects."""
2
3
import abc
4
import collections
5
import numpy as np
6
7
8
SubjectSuggestion = collections.namedtuple(
9
    'SubjectSuggestion', 'uri label score')
10
WeightedSuggestion = collections.namedtuple(
11
    'WeightedSuggestion', 'hits weight')
12
13
14
class SuggestionFilter:
15
    """A reusable filter for filtering SubjectSuggestion objects."""
16
17
    def __init__(self, limit=None, threshold=0.0):
18
        self._limit = limit
19
        self._threshold = threshold
20
21
    def __call__(self, orighits):
22
        return LazySuggestionResult(
23
            lambda: orighits.filter(
24
                self._limit, self._threshold))
25
26
27
class SuggestionResult(metaclass=abc.ABCMeta):
28
    """Abstract base class for a set of hits returned by an analysis
29
    operation."""
30
31
    @property
32
    @abc.abstractmethod
33
    def hits(self):
34
        """Return the hits as an ordered sequence of SubjectSuggestion objects,
35
        highest scores first."""
36
        pass  # pragma: no cover
37
38
    @property
39
    @abc.abstractmethod
40
    def vector(self):
41
        """Return the hits as a one-dimensional score vector
42
        where the indexes match the given subject index."""
43
        pass  # pragma: no cover
44
45
    @abc.abstractmethod
46
    def filter(self, limit=None, threshold=0.0):
47
        """Return a subset of the hits, filtered by the given limit and
48
        score threshold, as another SuggestionResult object."""
49
        pass  # pragma: no cover
50
51
    @abc.abstractmethod
52
    def __len__(self):
53
        """Return the number of hits with non-zero scores."""
54
        pass  # pragma: no cover
55
56
    def __getitem__(self, idx):
57
        return self.hits[idx]
58
59
60
class LazySuggestionResult(SuggestionResult):
61
    """SuggestionResult implementation that wraps another SuggestionResult which
62
    is initialized lazily only when it is actually accessed. Method calls
63
    will be proxied to the wrapped SuggestionResult."""
64
65
    def __init__(self, construct):
66
        """Create the proxy object. The given construct function will be
67
        called to create the actual SuggestionResult when it is needed."""
68
        self._construct = construct
69
        self._object = None
70
71
    def _initialize(self):
72
        if self._object is None:
73
            self._object = self._construct()
74
75
    @property
76
    def hits(self):
77
        self._initialize()
78
        return self._object.hits
79
80
    @property
81
    def vector(self):
82
        self._initialize()
83
        return self._object.vector
84
85
    def filter(self, limit=None, threshold=0.0):
86
        self._initialize()
87
        return self._object.filter(limit, threshold)
88
89
    def __len__(self):
90
        self._initialize()
91
        return len(self._object)
92
93
    def __getitem__(self, idx):
94
        self._initialize()
95
        return self._object[idx]
96
97
98
class VectorSuggestionResult(SuggestionResult):
99
    """SuggestionResult implementation based primarily on NumPy vectors."""
100
101
    def __init__(self, vector, subject_index):
102
        self._vector = vector
103
        self._subject_index = subject_index
104
        self._subject_order = None
105
        self._hits = None
106
107
    def _vector_to_hits(self):
108
        hits = []
109
        for subject_id in self.subject_order:
110
            score = self._vector[subject_id]
111
            if score <= 0.0:
112
                continue  # we can skip the remaining ones
113
            subject = self._subject_index[subject_id]
114
            hits.append(
115
                SubjectSuggestion(
116
                    uri=subject[0],
117
                    label=subject[1],
118
                    score=score))
119
        return ListSuggestionResult(hits, self._subject_index)
120
121
    @property
122
    def subject_order(self):
123
        if self._subject_order is None:
124
            self._subject_order = np.argsort(self._vector)[::-1]
125
        return self._subject_order
126
127
    @property
128
    def hits(self):
129
        if self._hits is None:
130
            self._hits = self._vector_to_hits()
131
        return self._hits
132
133
    @property
134
    def vector(self):
135
        return self._vector
136
137
    def filter(self, limit=None, threshold=0.0):
138
        mask = (self._vector > threshold)
139
        if limit is not None:
140
            limit_mask = np.zeros(len(self._vector), dtype=np.bool)
141
            top_k_subjects = self.subject_order[:limit]
142
            limit_mask[top_k_subjects] = True
143
            mask = mask & limit_mask
144
        return VectorSuggestionResult(self._vector * mask, self._subject_index)
145
146
    def __len__(self):
147
        return (self._vector > 0.0).sum()
148
149
150
class ListSuggestionResult(SuggestionResult):
151
    """SuggestionResult implementation based primarily on lists of hits."""
152
153
    def __init__(self, hits, subject_index):
154
        self._hits = [hit for hit in hits if hit.score > 0.0]
155
        self._subject_index = subject_index
156
        self._vector = None
157
158
    def _hits_to_vector(self):
159
        vector = np.zeros(len(self._subject_index))
160
        for hit in self._hits:
161
            subject_id = self._subject_index.by_uri(hit.uri)
162
            if subject_id is not None:
163
                vector[subject_id] = hit.score
164
        return vector
165
166
    @property
167
    def hits(self):
168
        return self._hits
169
170
    @property
171
    def vector(self):
172
        if self._vector is None:
173
            self._vector = self._hits_to_vector()
174
        return self._vector
175
176
    def filter(self, limit=None, threshold=0.0):
177
        hits = sorted(self.hits, key=lambda hit: hit.score, reverse=True)
178
        if limit is not None:
179
            hits = hits[:limit]
180
        return ListSuggestionResult([hit for hit in hits
181
                                     if hit.score >= threshold and
182
                                     hit.score > 0.0],
183
                                    self._subject_index)
184
185
    def __len__(self):
186
        return len(self._hits)
187