Passed
Pull Request — master (#604)
by Osma
07:27 queued 11s
created

annif.suggestion.LazySuggestionResult.__init__()   A

Complexity

Conditions 1

Size

Total Lines 5
Code Lines 3

Duplication

Lines 5
Ratio 100 %

Importance

Changes 0
Metric Value
cc 1
eloc 3
nop 2
dl 5
loc 5
rs 10
c 0
b 0
f 0
1
"""Representing suggested subjects."""
2
3
import abc
4
import collections
5
import itertools
6
import numpy as np
7
8
9
SubjectSuggestion = collections.namedtuple(
10
    'SubjectSuggestion', 'subject_id score')
11
WeightedSuggestion = collections.namedtuple(
12
    'WeightedSuggestion', 'hits weight subjects')
13
14
15
class SuggestionFilter:
16
    """A reusable filter for filtering SubjectSuggestion objects."""
17
18
    def __init__(self, subject_index, limit=None, threshold=0.0):
19
        self._subject_index = subject_index
20
        self._limit = limit
21
        self._threshold = threshold
22
23
    def __call__(self, orighits):
24
        return LazySuggestionResult(
25
            lambda: orighits.filter(self._subject_index,
26
                                    self._limit,
27
                                    self._threshold))
28
29
30 View Code Duplication
class SuggestionResult(metaclass=abc.ABCMeta):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
31
    """Abstract base class for a set of hits returned by an analysis
32
    operation."""
33
34
    @abc.abstractmethod
35
    def as_list(self):
36
        """Return the hits as an ordered sequence of SubjectSuggestion objects,
37
        highest scores first."""
38
        pass  # pragma: no cover
39
40
    @abc.abstractmethod
41
    def as_vector(self, size, destination=None):
42
        """Return the hits as a one-dimensional score vector of given size.
43
        If destination array is given (not None) it will be used, otherwise a
44
        new array will be created."""
45
        pass  # pragma: no cover
46
47
    @abc.abstractmethod
48
    def filter(self, subject_index, limit=None, threshold=0.0):
49
        """Return a subset of the hits, filtered by the given limit and
50
        score threshold, as another SuggestionResult object."""
51
        pass  # pragma: no cover
52
53
    @abc.abstractmethod
54
    def __len__(self):
55
        """Return the number of hits with non-zero scores."""
56
        pass  # pragma: no cover
57
58
59 View Code Duplication
class LazySuggestionResult(SuggestionResult):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
60
    """SuggestionResult implementation that wraps another SuggestionResult which
61
    is initialized lazily only when it is actually accessed. Method calls
62
    will be proxied to the wrapped SuggestionResult."""
63
64
    def __init__(self, construct):
65
        """Create the proxy object. The given construct function will be
66
        called to create the actual SuggestionResult when it is needed."""
67
        self._construct = construct
68
        self._object = None
69
70
    def _initialize(self):
71
        if self._object is None:
72
            self._object = self._construct()
73
74
    def as_list(self):
75
        self._initialize()
76
        return self._object.as_list()
77
78
    def as_vector(self, size, destination=None):
79
        self._initialize()
80
        return self._object.as_vector(size, destination)
81
82
    def filter(self, subject_index, limit=None, threshold=0.0):
83
        self._initialize()
84
        return self._object.filter(subject_index, limit, threshold)
85
86
    def __len__(self):
87
        self._initialize()
88
        return len(self._object)
89
90
91 View Code Duplication
class VectorSuggestionResult(SuggestionResult):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
92
    """SuggestionResult implementation based primarily on NumPy vectors."""
93
94
    def __init__(self, vector):
95
        vector_f32 = vector.astype(np.float32)
96
        # limit scores to the range 0.0 .. 1.0
97
        self._vector = np.minimum(np.maximum(vector_f32, 0.0), 1.0)
98
        self._subject_order = None
99
        self._lsr = None
100
101
    def _vector_to_list_suggestion(self):
102
        hits = []
103
        for subject_id in self.subject_order:
104
            score = self._vector[subject_id]
105
            if score <= 0.0:
106
                break  # we can skip the remaining ones
107
            hits.append(
108
                SubjectSuggestion(
109
                    subject_id=subject_id,
110
                    score=float(score)))
111
        return ListSuggestionResult(hits)
112
113
    @property
114
    def subject_order(self):
115
        if self._subject_order is None:
116
            self._subject_order = np.argsort(self._vector)[::-1]
117
        return self._subject_order
118
119
    def as_list(self):
120
        if self._lsr is None:
121
            self._lsr = self._vector_to_list_suggestion()
122
        return self._lsr.as_list()
123
124
    def as_vector(self, size, destination=None):
125
        if destination is not None:
126
            np.copyto(destination, self._vector)
127
            return destination
128
        return self._vector
129
130
    def filter(self, subject_index, limit=None, threshold=0.0):
131
        mask = (self._vector > threshold)
132
        deprecated_ids = subject_index.deprecated_ids()
133
        if limit is not None:
134
            limit_mask = np.zeros_like(self._vector, dtype=bool)
135
            deprecated_set = set(deprecated_ids)
136
            top_k_subjects = itertools.islice(
137
                                (subj for subj in self.subject_order
138
                                 if subj not in deprecated_set), limit)
139
            limit_mask[list(top_k_subjects)] = True
140
            mask = mask & limit_mask
141
        else:
142
            deprecated_mask = np.ones_like(self._vector, dtype=bool)
143
            deprecated_mask[deprecated_ids] = False
144
            mask = mask & deprecated_mask
145
        vsr = VectorSuggestionResult(self._vector * mask)
146
        return ListSuggestionResult(vsr.as_list())
147
148
    def __len__(self):
149
        return (self._vector > 0.0).sum()
150
151
152 View Code Duplication
class ListSuggestionResult(SuggestionResult):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
153
    """SuggestionResult implementation based primarily on lists of hits."""
154
155
    def __init__(self, hits):
156
        self._list = [self._enforce_score_range(hit)
157
                      for hit in hits
158
                      if hit.score > 0.0]
159
        self._vector = None
160
161
    @staticmethod
162
    def _enforce_score_range(hit):
163
        if hit.score > 1.0:
164
            return hit._replace(score=1.0)
165
        return hit
166
167
    def _list_to_vector(self, size, destination):
168
        if destination is None:
169
            destination = np.zeros(size, dtype=np.float32)
170
171
        for hit in self._list:
172
            if hit.subject_id is not None:
173
                destination[hit.subject_id] = hit.score
174
        return destination
175
176
    def as_list(self):
177
        return self._list
178
179
    def as_vector(self, size, destination=None):
180
        if self._vector is None:
181
            self._vector = self._list_to_vector(size, destination)
182
        return self._vector
183
184
    def filter(self, subject_index, limit=None, threshold=0.0):
185
        hits = sorted(self._list, key=lambda hit: hit.score, reverse=True)
186
        filtered_hits = [hit for hit in hits
187
                         if hit.score >= threshold and hit.score > 0.0 and
188
                         hit.subject_id is not None]
189
        if limit is not None:
190
            filtered_hits = filtered_hits[:limit]
191
        return ListSuggestionResult(filtered_hits)
192
193
    def __len__(self):
194
        return len(self._list)
195