annif.suggestion - Code Metrics - Inspection of "Merge pull request #270 from NatLibFi/issue267-cli..." - NatLibFi/Annif - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( c8c370...dee89b )

by Osma

created 2019-04-17 09:24 UTC

annif.suggestion A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	187
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	39
eloc	129
dl	0
loc	187
rs	9.28
c	0
b	0
f	0

27 Methods

Rating	Name	Size	Complexity
A	SuggestionFilter.__init__()	3	1
A	SuggestionResult.__getitem__()	2	1
A	ListSuggestionResult.vector()	5	2
A	ListSuggestionResult.filter()	8	3
A	ListSuggestionResult.__len__()	2	1
A	VectorSuggestionResult.__len__()	2	1
A	LazySuggestionResult.hits()	4	1
A	SuggestionResult.vector()	6	1
A	LazySuggestionResult._initialize()	3	2
A	VectorSuggestionResult.subject_order()	5	2
A	ListSuggestionResult.hits()	3	1
A	VectorSuggestionResult.vector()	3	1
A	SuggestionResult.filter()	5	1
A	ListSuggestionResult.__init__()	4	1
A	LazySuggestionResult.__init__()	5	1
A	VectorSuggestionResult.hits()	5	2
A	LazySuggestionResult.filter()	3	1
A	LazySuggestionResult.__len__()	3	1
A	SuggestionResult.__len__()	4	1
A	VectorSuggestionResult.filter()	8	2
A	LazySuggestionResult.__getitem__()	3	1
A	VectorSuggestionResult.__init__()	5	1
A	LazySuggestionResult.vector()	4	1
A	SuggestionResult.hits()	6	1
A	SuggestionFilter.__call__()	4	2
A	VectorSuggestionResult._vector_to_hits()	13	3
A	ListSuggestionResult._hits_to_vector()	7	3

"""Representing suggested subjects."""

import abc
import collections
import numpy as np


SubjectSuggestion = collections.namedtuple(
    'SubjectSuggestion', 'uri label score')
WeightedSuggestion = collections.namedtuple(
    'WeightedSuggestion', 'hits weight')


class SuggestionFilter:
    """A reusable filter for filtering SubjectSuggestion objects."""

    def __init__(self, limit=None, threshold=0.0):
        self._limit = limit
        self._threshold = threshold

    def __call__(self, orighits):
        return LazySuggestionResult(
            lambda: orighits.filter(
                self._limit, self._threshold))


class SuggestionResult(metaclass=abc.ABCMeta):
    """Abstract base class for a set of hits returned by an analysis
    operation."""

    @property
    @abc.abstractmethod
    def hits(self):
        """Return the hits as an ordered sequence of SubjectSuggestion objects,
        highest scores first."""
        pass  # pragma: no cover

    @property
    @abc.abstractmethod
    def vector(self):
        """Return the hits as a one-dimensional score vector
        where the indexes match the given subject index."""
        pass  # pragma: no cover

    @abc.abstractmethod
    def filter(self, limit=None, threshold=0.0):
        """Return a subset of the hits, filtered by the given limit and
        score threshold, as another SuggestionResult object."""
        pass  # pragma: no cover

    @abc.abstractmethod
    def __len__(self):
        """Return the number of hits with non-zero scores."""
        pass  # pragma: no cover

    def __getitem__(self, idx):
        return self.hits[idx]


class LazySuggestionResult(SuggestionResult):
    """SuggestionResult implementation that wraps another SuggestionResult which
    is initialized lazily only when it is actually accessed. Method calls
    will be proxied to the wrapped SuggestionResult."""

    def __init__(self, construct):
        """Create the proxy object. The given construct function will be
        called to create the actual SuggestionResult when it is needed."""
        self._construct = construct
        self._object = None

    def _initialize(self):
        if self._object is None:
            self._object = self._construct()

    @property
    def hits(self):
        self._initialize()
        return self._object.hits

    @property
    def vector(self):
        self._initialize()
        return self._object.vector

    def filter(self, limit=None, threshold=0.0):
        self._initialize()
        return self._object.filter(limit, threshold)

    def __len__(self):
        self._initialize()
        return len(self._object)

    def __getitem__(self, idx):
        self._initialize()
        return self._object[idx]


class VectorSuggestionResult(SuggestionResult):
    """SuggestionResult implementation based primarily on NumPy vectors."""

    def __init__(self, vector, subject_index):
        self._vector = vector
        self._subject_index = subject_index
        self._subject_order = None
        self._hits = None

    def _vector_to_hits(self):
        hits = []
        for subject_id in self.subject_order:
            score = self._vector[subject_id]
            if score <= 0.0:
                continue  # we can skip the remaining ones
            subject = self._subject_index[subject_id]
            hits.append(
                SubjectSuggestion(
                    uri=subject[0],
                    label=subject[1],
                    score=score))
        return ListSuggestionResult(hits, self._subject_index)

    @property
    def subject_order(self):
        if self._subject_order is None:
            self._subject_order = np.argsort(self._vector)[::-1]
        return self._subject_order

    @property
    def hits(self):
        if self._hits is None:
            self._hits = self._vector_to_hits()
        return self._hits

    @property
    def vector(self):
        return self._vector

    def filter(self, limit=None, threshold=0.0):
        mask = (self._vector > threshold)
        if limit is not None:
            limit_mask = np.zeros(len(self._vector), dtype=np.bool)
            top_k_subjects = self.subject_order[:limit]
            limit_mask[top_k_subjects] = True
            mask = mask & limit_mask
        return VectorSuggestionResult(self._vector * mask, self._subject_index)

    def __len__(self):
        return (self._vector > 0.0).sum()


class ListSuggestionResult(SuggestionResult):
    """SuggestionResult implementation based primarily on lists of hits."""

    def __init__(self, hits, subject_index):
        self._hits = [hit for hit in hits if hit.score > 0.0]
        self._subject_index = subject_index
        self._vector = None

    def _hits_to_vector(self):
        vector = np.zeros(len(self._subject_index))
        for hit in self._hits:
            subject_id = self._subject_index.by_uri(hit.uri)
            if subject_id is not None:
                vector[subject_id] = hit.score
        return vector

    @property
    def hits(self):
        return self._hits

    @property
    def vector(self):
        if self._vector is None:
            self._vector = self._hits_to_vector()
        return self._vector

    def filter(self, limit=None, threshold=0.0):
        hits = sorted(self.hits, key=lambda hit: hit.score, reverse=True)
        if limit is not None:
            hits = hits[:limit]
        return ListSuggestionResult([hit for hit in hits
                                     if hit.score >= threshold and
                                     hit.score > 0.0],
                                    self._subject_index)

    def __len__(self):
        return len(self._hits)


1			"""Representing suggested subjects."""
2
3			import abc
4			import collections
5			import numpy as np
6
7
8			SubjectSuggestion = collections.namedtuple(
9			'SubjectSuggestion', 'uri label score')
10			WeightedSuggestion = collections.namedtuple(
11			'WeightedSuggestion', 'hits weight')
12
13
14			class SuggestionFilter:
15			"""A reusable filter for filtering SubjectSuggestion objects."""
16
17			def __init__(self, limit=None, threshold=0.0):
18			self._limit = limit
19			self._threshold = threshold
20
21			def __call__(self, orighits):
22			return LazySuggestionResult(
23			lambda: orighits.filter(
24			self._limit, self._threshold))
25
26
27			class SuggestionResult(metaclass=abc.ABCMeta):
28			"""Abstract base class for a set of hits returned by an analysis
29			operation."""
30
31			@property
32			@abc.abstractmethod
33			def hits(self):
34			"""Return the hits as an ordered sequence of SubjectSuggestion objects,
35			highest scores first."""
36			pass # pragma: no cover
37
38			@property
39			@abc.abstractmethod
40			def vector(self):
41			"""Return the hits as a one-dimensional score vector
42			where the indexes match the given subject index."""
43			pass # pragma: no cover
44
45			@abc.abstractmethod
46			def filter(self, limit=None, threshold=0.0):
47			"""Return a subset of the hits, filtered by the given limit and
48			score threshold, as another SuggestionResult object."""
49			pass # pragma: no cover
50
51			@abc.abstractmethod
52			def __len__(self):
53			"""Return the number of hits with non-zero scores."""
54			pass # pragma: no cover
55
56			def __getitem__(self, idx):
57			return self.hits[idx]
58
59
60			class LazySuggestionResult(SuggestionResult):
61			"""SuggestionResult implementation that wraps another SuggestionResult which
62			is initialized lazily only when it is actually accessed. Method calls
63			will be proxied to the wrapped SuggestionResult."""
64
65			def __init__(self, construct):
66			"""Create the proxy object. The given construct function will be
67			called to create the actual SuggestionResult when it is needed."""
68			self._construct = construct
69			self._object = None
70
71			def _initialize(self):
72			if self._object is None:
73			self._object = self._construct()
74
75			@property
76			def hits(self):
77			self._initialize()
78			return self._object.hits
79
80			@property
81			def vector(self):
82			self._initialize()
83			return self._object.vector
84
85			def filter(self, limit=None, threshold=0.0):
86			self._initialize()
87			return self._object.filter(limit, threshold)
88
89			def __len__(self):
90			self._initialize()
91			return len(self._object)
92
93			def __getitem__(self, idx):
94			self._initialize()
95			return self._object[idx]
96
97
98			class VectorSuggestionResult(SuggestionResult):
99			"""SuggestionResult implementation based primarily on NumPy vectors."""
100
101			def __init__(self, vector, subject_index):
102			self._vector = vector
103			self._subject_index = subject_index
104			self._subject_order = None
105			self._hits = None
106
107			def _vector_to_hits(self):
108			hits = []
109			for subject_id in self.subject_order:
110			score = self._vector[subject_id]
111			if score <= 0.0:
112			continue # we can skip the remaining ones
113			subject = self._subject_index[subject_id]
114			hits.append(
115			SubjectSuggestion(
116			uri=subject[0],
117			label=subject[1],
118			score=score))
119			return ListSuggestionResult(hits, self._subject_index)
120
121			@property
122			def subject_order(self):
123			if self._subject_order is None:
124			self._subject_order = np.argsort(self._vector)[::-1]
125			return self._subject_order
126
127			@property
128			def hits(self):
129			if self._hits is None:
130			self._hits = self._vector_to_hits()
131			return self._hits
132
133			@property
134			def vector(self):
135			return self._vector
136
137			def filter(self, limit=None, threshold=0.0):
138			mask = (self._vector > threshold)
139			if limit is not None:
140			limit_mask = np.zeros(len(self._vector), dtype=np.bool)
141			top_k_subjects = self.subject_order[:limit]
142			limit_mask[top_k_subjects] = True
143			mask = mask & limit_mask
144			return VectorSuggestionResult(self._vector * mask, self._subject_index)
145
146			def __len__(self):
147			return (self._vector > 0.0).sum()
148
149
150			class ListSuggestionResult(SuggestionResult):
151			"""SuggestionResult implementation based primarily on lists of hits."""
152
153			def __init__(self, hits, subject_index):
154			self._hits = [hit for hit in hits if hit.score > 0.0]
155			self._subject_index = subject_index
156			self._vector = None
157
158			def _hits_to_vector(self):
159			vector = np.zeros(len(self._subject_index))
160			for hit in self._hits:
161			subject_id = self._subject_index.by_uri(hit.uri)
162			if subject_id is not None:
163			vector[subject_id] = hit.score
164			return vector
165
166			@property
167			def hits(self):
168			return self._hits
169
170			@property
171			def vector(self):
172			if self._vector is None:
173			self._vector = self._hits_to_vector()
174			return self._vector
175
176			def filter(self, limit=None, threshold=0.0):
177			hits = sorted(self.hits, key=lambda hit: hit.score, reverse=True)
178			if limit is not None:
179			hits = hits[:limit]
180			return ListSuggestionResult([hit for hit in hits
181			if hit.score >= threshold and
182			hit.score > 0.0],
183			self._subject_index)
184
185			def __len__(self):
186			return len(self._hits)
187

NatLibFi / Annif

Push — master ( c8c370...dee89b )

annif.suggestion A

Complexity

Size/Duplication

Importance

27 Methods

Duplication Side-by-Side

Filter issues like