annif.suggestion - Code Metrics - Inspection of "Refactor SubjectSuggestion to store subject_id - n..." - NatLibFi/Annif - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — master (#604)

by Osma

created 2022-08-10 10:45 UTC

annif.suggestion A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	195
Duplicated Lines	82.05 %

Importance

Changes

Metric	Value
eloc	134
dl	160
loc	195
rs	9.1199
c	0
b	0
f	0
wmc	41

26 Methods

Rating	Name	Duplication	Size	Complexity
A	SuggestionFilter.__init__()	0	4	1
A	SuggestionFilter.__call__()	0	5	2
A	VectorSuggestionResult._vector_to_list_suggestion()	11	11	3
A	SuggestionResult.as_vector()	6	6	1
A	VectorSuggestionResult.as_vector()	5	5	2
A	VectorSuggestionResult.as_list()	4	4	2
A	ListSuggestionResult.filter()	8	8	3
A	ListSuggestionResult.__len__()	2	2	1
A	VectorSuggestionResult.__len__()	2	2	1
A	LazySuggestionResult._initialize()	3	3	2
A	ListSuggestionResult.as_list()	2	2	1
A	VectorSuggestionResult.subject_order()	5	5	2
A	SuggestionResult.filter()	5	5	1
A	ListSuggestionResult.__init__()	5	5	1
A	LazySuggestionResult.as_list()	3	3	1
A	SuggestionResult.as_list()	5	5	1
A	LazySuggestionResult.__init__()	5	5	1
A	LazySuggestionResult.filter()	3	3	1
A	LazySuggestionResult.__len__()	3	3	1
A	SuggestionResult.__len__()	4	4	1
A	VectorSuggestionResult.filter()	17	17	2
A	ListSuggestionResult.as_vector()	4	4	2
A	ListSuggestionResult._list_to_vector()	8	8	4
A	VectorSuggestionResult.__init__()	6	6	1
A	ListSuggestionResult._enforce_score_range()	5	5	2
A	LazySuggestionResult.as_vector()	3	3	1

How to fix Duplicated Code Complexity

"""Representing suggested subjects."""

import abc
import collections
import itertools
import numpy as np


SubjectSuggestion = collections.namedtuple(
    'SubjectSuggestion', 'subject_id score')
WeightedSuggestion = collections.namedtuple(
    'WeightedSuggestion', 'hits weight subjects')


class SuggestionFilter:
    """A reusable filter for filtering SubjectSuggestion objects."""

    def __init__(self, subject_index, limit=None, threshold=0.0):
        self._subject_index = subject_index
        self._limit = limit
        self._threshold = threshold

    def __call__(self, orighits):
        return LazySuggestionResult(
            lambda: orighits.filter(self._subject_index,
                                    self._limit,
                                    self._threshold))


class SuggestionResult(metaclass=abc.ABCMeta):

    """Abstract base class for a set of hits returned by an analysis
    operation."""

    @abc.abstractmethod
    def as_list(self):
        """Return the hits as an ordered sequence of SubjectSuggestion objects,
        highest scores first."""
        pass  # pragma: no cover

    @abc.abstractmethod
    def as_vector(self, size, destination=None):
        """Return the hits as a one-dimensional score vector of given size.
        If destination array is given (not None) it will be used, otherwise a
        new array will be created."""
        pass  # pragma: no cover

    @abc.abstractmethod
    def filter(self, subject_index, limit=None, threshold=0.0):
        """Return a subset of the hits, filtered by the given limit and
        score threshold, as another SuggestionResult object."""
        pass  # pragma: no cover

    @abc.abstractmethod
    def __len__(self):
        """Return the number of hits with non-zero scores."""
        pass  # pragma: no cover


class LazySuggestionResult(SuggestionResult):

    """SuggestionResult implementation that wraps another SuggestionResult which
    is initialized lazily only when it is actually accessed. Method calls
    will be proxied to the wrapped SuggestionResult."""

    def __init__(self, construct):
        """Create the proxy object. The given construct function will be
        called to create the actual SuggestionResult when it is needed."""
        self._construct = construct
        self._object = None

    def _initialize(self):
        if self._object is None:
            self._object = self._construct()

    def as_list(self):
        self._initialize()
        return self._object.as_list()

    def as_vector(self, size, destination=None):
        self._initialize()
        return self._object.as_vector(size, destination)

    def filter(self, subject_index, limit=None, threshold=0.0):
        self._initialize()
        return self._object.filter(subject_index, limit, threshold)

    def __len__(self):
        self._initialize()
        return len(self._object)


class VectorSuggestionResult(SuggestionResult):

    """SuggestionResult implementation based primarily on NumPy vectors."""

    def __init__(self, vector):
        vector_f32 = vector.astype(np.float32)
        # limit scores to the range 0.0 .. 1.0
        self._vector = np.minimum(np.maximum(vector_f32, 0.0), 1.0)
        self._subject_order = None
        self._lsr = None

    def _vector_to_list_suggestion(self):
        hits = []
        for subject_id in self.subject_order:
            score = self._vector[subject_id]
            if score <= 0.0:
                break  # we can skip the remaining ones
            hits.append(
                SubjectSuggestion(
                    subject_id=subject_id,
                    score=float(score)))
        return ListSuggestionResult(hits)

    @property
    def subject_order(self):
        if self._subject_order is None:
            self._subject_order = np.argsort(self._vector)[::-1]
        return self._subject_order

    def as_list(self):
        if self._lsr is None:
            self._lsr = self._vector_to_list_suggestion()
        return self._lsr.as_list()

    def as_vector(self, size, destination=None):
        if destination is not None:
            np.copyto(destination, self._vector)
            return destination
        return self._vector

    def filter(self, subject_index, limit=None, threshold=0.0):
        mask = (self._vector > threshold)
        deprecated_ids = subject_index.deprecated_ids()
        if limit is not None:
            limit_mask = np.zeros_like(self._vector, dtype=bool)
            deprecated_set = set(deprecated_ids)
            top_k_subjects = itertools.islice(
                                (subj for subj in self.subject_order
                                 if subj not in deprecated_set), limit)
            limit_mask[list(top_k_subjects)] = True
            mask = mask & limit_mask
        else:
            deprecated_mask = np.ones_like(self._vector, dtype=bool)
            deprecated_mask[deprecated_ids] = False
            mask = mask & deprecated_mask
        vsr = VectorSuggestionResult(self._vector * mask)
        return ListSuggestionResult(vsr.as_list())

    def __len__(self):
        return (self._vector > 0.0).sum()


class ListSuggestionResult(SuggestionResult):

    """SuggestionResult implementation based primarily on lists of hits."""

    def __init__(self, hits):
        self._list = [self._enforce_score_range(hit)
                      for hit in hits
                      if hit.score > 0.0]
        self._vector = None

    @staticmethod
    def _enforce_score_range(hit):
        if hit.score > 1.0:
            return hit._replace(score=1.0)
        return hit

    def _list_to_vector(self, size, destination):
        if destination is None:
            destination = np.zeros(size, dtype=np.float32)

        for hit in self._list:
            if hit.subject_id is not None:
                destination[hit.subject_id] = hit.score
        return destination

    def as_list(self):
        return self._list

    def as_vector(self, size, destination=None):
        if self._vector is None:
            self._vector = self._list_to_vector(size, destination)
        return self._vector

    def filter(self, subject_index, limit=None, threshold=0.0):
        hits = sorted(self._list, key=lambda hit: hit.score, reverse=True)
        filtered_hits = [hit for hit in hits
                         if hit.score >= threshold and hit.score > 0.0 and
                         hit.subject_id is not None]
        if limit is not None:
            filtered_hits = filtered_hits[:limit]
        return ListSuggestionResult(filtered_hits)

    def __len__(self):
        return len(self._list)


1		"""Representing suggested subjects."""
2
3		import abc
4		import collections
5		import itertools
6		import numpy as np
7
8
9		SubjectSuggestion = collections.namedtuple(
10		'SubjectSuggestion', 'subject_id score')
11		WeightedSuggestion = collections.namedtuple(
12		'WeightedSuggestion', 'hits weight subjects')
13
14
15		class SuggestionFilter:
16		"""A reusable filter for filtering SubjectSuggestion objects."""
17
18		def __init__(self, subject_index, limit=None, threshold=0.0):
19		self._subject_index = subject_index
20		self._limit = limit
21		self._threshold = threshold
22
23		def __call__(self, orighits):
24		return LazySuggestionResult(
25		lambda: orighits.filter(self._subject_index,
26		self._limit,
27		self._threshold))
28
29
30	View Code Duplication	class SuggestionResult(metaclass=abc.ABCMeta):
		0 ignored issues – show Duplication introduced 2022-01-18 08:57 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
31		"""Abstract base class for a set of hits returned by an analysis
32		operation."""
33
34		@abc.abstractmethod
35		def as_list(self):
36		"""Return the hits as an ordered sequence of SubjectSuggestion objects,
37		highest scores first."""
38		pass # pragma: no cover
39
40		@abc.abstractmethod
41		def as_vector(self, size, destination=None):
42		"""Return the hits as a one-dimensional score vector of given size.
43		If destination array is given (not None) it will be used, otherwise a
44		new array will be created."""
45		pass # pragma: no cover
46
47		@abc.abstractmethod
48		def filter(self, subject_index, limit=None, threshold=0.0):
49		"""Return a subset of the hits, filtered by the given limit and
50		score threshold, as another SuggestionResult object."""
51		pass # pragma: no cover
52
53		@abc.abstractmethod
54		def __len__(self):
55		"""Return the number of hits with non-zero scores."""
56		pass # pragma: no cover
57
58
59	View Code Duplication	class LazySuggestionResult(SuggestionResult):
		0 ignored issues – show Duplication introduced 2022-01-18 08:57 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
60		"""SuggestionResult implementation that wraps another SuggestionResult which
61		is initialized lazily only when it is actually accessed. Method calls
62		will be proxied to the wrapped SuggestionResult."""
63
64		def __init__(self, construct):
65		"""Create the proxy object. The given construct function will be
66		called to create the actual SuggestionResult when it is needed."""
67		self._construct = construct
68		self._object = None
69
70		def _initialize(self):
71		if self._object is None:
72		self._object = self._construct()
73
74		def as_list(self):
75		self._initialize()
76		return self._object.as_list()
77
78		def as_vector(self, size, destination=None):
79		self._initialize()
80		return self._object.as_vector(size, destination)
81
82		def filter(self, subject_index, limit=None, threshold=0.0):
83		self._initialize()
84		return self._object.filter(subject_index, limit, threshold)
85
86		def __len__(self):
87		self._initialize()
88		return len(self._object)
89
90
91	View Code Duplication	class VectorSuggestionResult(SuggestionResult):
		0 ignored issues – show Duplication introduced 2022-01-18 08:57 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
92		"""SuggestionResult implementation based primarily on NumPy vectors."""
93
94		def __init__(self, vector):
95		vector_f32 = vector.astype(np.float32)
96		# limit scores to the range 0.0 .. 1.0
97		self._vector = np.minimum(np.maximum(vector_f32, 0.0), 1.0)
98		self._subject_order = None
99		self._lsr = None
100
101		def _vector_to_list_suggestion(self):
102		hits = []
103		for subject_id in self.subject_order:
104		score = self._vector[subject_id]
105		if score <= 0.0:
106		break # we can skip the remaining ones
107		hits.append(
108		SubjectSuggestion(
109		subject_id=subject_id,
110		score=float(score)))
111		return ListSuggestionResult(hits)
112
113		@property
114		def subject_order(self):
115		if self._subject_order is None:
116		self._subject_order = np.argsort(self._vector)[::-1]
117		return self._subject_order
118
119		def as_list(self):
120		if self._lsr is None:
121		self._lsr = self._vector_to_list_suggestion()
122		return self._lsr.as_list()
123
124		def as_vector(self, size, destination=None):
125		if destination is not None:
126		np.copyto(destination, self._vector)
127		return destination
128		return self._vector
129
130		def filter(self, subject_index, limit=None, threshold=0.0):
131		mask = (self._vector > threshold)
132		deprecated_ids = subject_index.deprecated_ids()
133		if limit is not None:
134		limit_mask = np.zeros_like(self._vector, dtype=bool)
135		deprecated_set = set(deprecated_ids)
136		top_k_subjects = itertools.islice(
137		(subj for subj in self.subject_order
138		if subj not in deprecated_set), limit)
139		limit_mask[list(top_k_subjects)] = True
140		mask = mask & limit_mask
141		else:
142		deprecated_mask = np.ones_like(self._vector, dtype=bool)
143		deprecated_mask[deprecated_ids] = False
144		mask = mask & deprecated_mask
145		vsr = VectorSuggestionResult(self._vector * mask)
146		return ListSuggestionResult(vsr.as_list())
147
148		def __len__(self):
149		return (self._vector > 0.0).sum()
150
151
152	View Code Duplication	class ListSuggestionResult(SuggestionResult):
		0 ignored issues – show Duplication introduced 2022-01-18 08:57 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
153		"""SuggestionResult implementation based primarily on lists of hits."""
154
155		def __init__(self, hits):
156		self._list = [self._enforce_score_range(hit)
157		for hit in hits
158		if hit.score > 0.0]
159		self._vector = None
160
161		@staticmethod
162		def _enforce_score_range(hit):
163		if hit.score > 1.0:
164		return hit._replace(score=1.0)
165		return hit
166
167		def _list_to_vector(self, size, destination):
168		if destination is None:
169		destination = np.zeros(size, dtype=np.float32)
170
171		for hit in self._list:
172		if hit.subject_id is not None:
173		destination[hit.subject_id] = hit.score
174		return destination
175
176		def as_list(self):
177		return self._list
178
179		def as_vector(self, size, destination=None):
180		if self._vector is None:
181		self._vector = self._list_to_vector(size, destination)
182		return self._vector
183
184		def filter(self, subject_index, limit=None, threshold=0.0):
185		hits = sorted(self._list, key=lambda hit: hit.score, reverse=True)
186		filtered_hits = [hit for hit in hits
187		if hit.score >= threshold and hit.score > 0.0 and
188		hit.subject_id is not None]
189		if limit is not None:
190		filtered_hits = filtered_hits[:limit]
191		return ListSuggestionResult(filtered_hits)
192
193		def __len__(self):
194		return len(self._list)
195

NatLibFi / Annif

Pull Request — master (#604)

annif.suggestion A

Complexity

Size/Duplication

Importance

26 Methods

How to fix Duplicated Code Complexity

Duplicated Code

Complexity

Duplication Side-by-Side

Filter issues like