abydos.distance._vps.VPS.sim() - Code Metrics - Inspection of "0.4.1" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#225)

by Chris

created 2019-07-12 04:59 UTC

abydos.distance._vps.VPS.sim() C

↳ Parent: abydos.distance._vps

Complexity

Conditions

Size

Total Lines	62
Code Lines	26

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	24
CRAP Score	11

Importance

Changes

Metric	Value
eloc	26
dl	0
loc	62
ccs	24
cts	24
cp	1
rs	5.4
c	0
b	0
f	0
cc	11
nop	3
crap	11

How to fix Long Method Complexity

# -*- coding: utf-8 -*-

# Copyright 2019 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.distance._vps.

Victorian Panel Study (VPS) score
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from collections import defaultdict

from ._distance import _Distance

__all__ = ['VPS']


class VPS(_Distance):
    """Victorian Panel Study (VPS) score.

    VPS score is presented in :cite:`Shurer:2007`.

    .. versionadded:: 0.4.1
    """

    def sim(self, src, tar):
        """Return the Victorian Panel Study score of two words.

        Parameters
        ----------
        src : str
            Source string for comparison
        tar : str
            Target string for comparison

        Returns
        -------
        float
            The VPS score

        Examples
        --------
        >>> cmp = VPS()
        >>> cmp.sim('cat', 'hat')
        0.5
        >>> cmp.sim('Niall', 'Neil')
        0.3
        >>> cmp.sim('aluminum', 'Catalan')
        0.14285714285714285
        >>> cmp.sim('ATCG', 'TAGC')
        0.3333333333333333


        .. versionadded:: 0.4.1

        """
        if src == tar:
            return 1.0
        if len(src) < len(tar):
            src, tar = tar, src

        score = 0
        discount = 0

        src_tokens = defaultdict(set)
        tar_tokens = defaultdict(set)
        for slen in range(1, 4):
            for i in range(len(src) - slen + 1):
                src_tokens[src[i : i + slen]].add(i)
            for i in range(len(tar) - slen + 1):
                tar_tokens[tar[i : i + slen]].add(i)

        for token in src_tokens.keys():
            if token in tar_tokens:
                for src_pos in src_tokens[token]:
                    score += 1
                    if src_pos not in tar_tokens[token]:
                        discount += min(
                            abs(src_pos - tar_pos)
                            for tar_pos in tar_tokens[token]
                        )

        score -= discount / max(len(src), len(tar))
        if score:
            score /= 3 * len(src) - 3

        return score


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2019 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.distance._vps.
20
21		Victorian Panel Study (VPS) score
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from collections import defaultdict
32
33	1	from ._distance import _Distance
34
35	1	__all__ = ['VPS']
36
37
38	1	class VPS(_Distance):
39		"""Victorian Panel Study (VPS) score.
40
41		VPS score is presented in :cite:`Shurer:2007`.
42
43		.. versionadded:: 0.4.1
44		"""
45
46	1	def sim(self, src, tar):
47		"""Return the Victorian Panel Study score of two words.
48
49		Parameters
50		----------
51		src : str
52		Source string for comparison
53		tar : str
54		Target string for comparison
55
56		Returns
57		-------
58		float
59		The VPS score
60
61		Examples
62		--------
63		>>> cmp = VPS()
64		>>> cmp.sim('cat', 'hat')
65		0.5
66		>>> cmp.sim('Niall', 'Neil')
67		0.3
68		>>> cmp.sim('aluminum', 'Catalan')
69		0.14285714285714285
70		>>> cmp.sim('ATCG', 'TAGC')
71		0.3333333333333333
72
73
74		.. versionadded:: 0.4.1
75
76		"""
77	1	if src == tar:
78	1	return 1.0
79	1	if len(src) < len(tar):
80	1	src, tar = tar, src
81
82	1	score = 0
83	1	discount = 0
84
85	1	src_tokens = defaultdict(set)
86	1	tar_tokens = defaultdict(set)
87	1	for slen in range(1, 4):
88	1	for i in range(len(src) - slen + 1):
89	1	src_tokens[src[i : i + slen]].add(i)
90	1	for i in range(len(tar) - slen + 1):
91	1	tar_tokens[tar[i : i + slen]].add(i)
92
93	1	for token in src_tokens.keys():
94	1	if token in tar_tokens:
95	1	for src_pos in src_tokens[token]:
96	1	score += 1
97	1	if src_pos not in tar_tokens[token]:
98	1	discount += min(
99		abs(src_pos - tar_pos)
100		for tar_pos in tar_tokens[token]
101		)
102
103	1	score -= discount / max(len(src), len(tar))
104	1	if score:
105	1	score /= 3 * len(src) - 3
106
107	1	return score
108
109
110		if __name__ == '__main__':
111		import doctest
112
113		doctest.testmod()
114

chrislit / abydos

Pull Request — master (#225)

abydos.distance._vps.VPS.sim() C

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like