abydos.distance._sift4_simplest.Sift4Simplest.dist_abs() - Code Metrics - Inspection of "Merge pull request #149 from chrislit/0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( f43547...71985b )

by Chris

created 2018-11-17 08:52 UTC

Sift4Simplest.dist_abs() F

↳ Parent: abydos.distance._sift4_simplest

Complexity

Conditions

Size

Total Lines	75
Code Lines	36

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	33
CRAP Score	14

Importance

Changes

Metric	Value
cc	14
eloc	36
nop	4
dl	0
loc	75
ccs	33
cts	33
cp	1
crap	14
rs	3.6
c	0
b	0
f	0

How to fix Long Method Complexity

# -*- coding: utf-8 -*-

# Copyright 2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.distance._sift4_simplest.

Sift4 Simplest approximate string distance
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from six.moves import range

from ._sift4 import Sift4

__all__ = ['Sift4Simplest', 'sift4_simplest']


class Sift4Simplest(Sift4):

    """Sift4 Simplest version.

    This is an approximation of edit distance, described in
    :cite:`Zackwehdex:2014`.
    """

    def dist_abs(self, src, tar, max_offset=5):

        """Return the "simplest" Sift4 distance between two terms.

        Parameters
        ----------
        src : str
            Source string for comparison
        tar : str
            Target string for comparison
        max_offset : int
            The number of characters to search for matching letters

        Returns
        -------
        int
            The Sift4 distance according to the simplest formula

        Examples
        --------
        >>> cmp = Sift4Simplest()
        >>> cmp.dist_abs('cat', 'hat')
        1
        >>> cmp.dist_abs('Niall', 'Neil')
        2
        >>> cmp.dist_abs('Colin', 'Cuilen')
        3
        >>> cmp.dist_abs('ATCG', 'TAGC')
        2

        """
        if not src:
            return len(tar)

        if not tar:
            return len(src)

        src_len = len(src)
        tar_len = len(tar)

        src_cur = 0
        tar_cur = 0
        lcss = 0
        local_cs = 0

        while (src_cur < src_len) and (tar_cur < tar_len):
            if src[src_cur] == tar[tar_cur]:
                local_cs += 1
            else:
                lcss += local_cs
                local_cs = 0
                if src_cur != tar_cur:
                    src_cur = tar_cur = max(src_cur, tar_cur)
                for i in range(max_offset):
                    if not (
                        (src_cur + i < src_len) or (tar_cur + i < tar_len)

                    ):
                        break
                    if (src_cur + i < src_len) and (
                        src[src_cur + i] == tar[tar_cur]

                    ):
                        src_cur += i
                        local_cs += 1
                        break
                    if (tar_cur + i < tar_len) and (
                        src[src_cur] == tar[tar_cur + i]

                    ):
                        tar_cur += i
                        local_cs += 1
                        break

            src_cur += 1
            tar_cur += 1

        lcss += local_cs
        return round(max(src_len, tar_len) - lcss)


def sift4_simplest(src, tar, max_offset=5):
    """Return the "simplest" Sift4 distance between two terms.

    This is a wrapper for :py:meth:`Sift4Simplest.dist_abs`.

    Parameters
    ----------
    src : str
        Source string for comparison
    tar : str
        Target string for comparison
    max_offset : int
        The number of characters to search for matching letters

    Returns
    -------
    int
        The Sift4 distance according to the simplest formula

    Examples
    --------
    >>> sift4_simplest('cat', 'hat')
    1
    >>> sift4_simplest('Niall', 'Neil')
    2
    >>> sift4_simplest('Colin', 'Cuilen')
    3
    >>> sift4_simplest('ATCG', 'TAGC')
    2

    """
    return Sift4Simplest().dist_abs(src, tar, max_offset)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.distance._sift4_simplest.
20
21		Sift4 Simplest approximate string distance
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from six.moves import range
32
33	1	from ._sift4 import Sift4
34
35	1	__all__ = ['Sift4Simplest', 'sift4_simplest']
36
37
38	1	class Sift4Simplest(Sift4):
		0 ignored issues – show Unused Code introduced 2018-11-10 01:42 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
39		"""Sift4 Simplest version.
40
41		This is an approximation of edit distance, described in
42		:cite:`Zackwehdex:2014`.
43		"""
44
45	1	def dist_abs(self, src, tar, max_offset=5):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'dist_abs' method Loading history...
46		"""Return the "simplest" Sift4 distance between two terms.
47
48		Parameters
49		----------
50		src : str
51		Source string for comparison
52		tar : str
53		Target string for comparison
54		max_offset : int
55		The number of characters to search for matching letters
56
57		Returns
58		-------
59		int
60		The Sift4 distance according to the simplest formula
61
62		Examples
63		--------
64		>>> cmp = Sift4Simplest()
65		>>> cmp.dist_abs('cat', 'hat')
66		1
67		>>> cmp.dist_abs('Niall', 'Neil')
68		2
69		>>> cmp.dist_abs('Colin', 'Cuilen')
70		3
71		>>> cmp.dist_abs('ATCG', 'TAGC')
72		2
73
74		"""
75	1	if not src:
76	1	return len(tar)
77
78	1	if not tar:
79	1	return len(src)
80
81	1	src_len = len(src)
82	1	tar_len = len(tar)
83
84	1	src_cur = 0
85	1	tar_cur = 0
86	1	lcss = 0
87	1	local_cs = 0
88
89	1	while (src_cur < src_len) and (tar_cur < tar_len):
90	1	if src[src_cur] == tar[tar_cur]:
91	1	local_cs += 1
92		else:
93	1	lcss += local_cs
94	1	local_cs = 0
95	1	if src_cur != tar_cur:
96	1	src_cur = tar_cur = max(src_cur, tar_cur)
97	1	for i in range(max_offset):
98	1	if not (
99		(src_cur + i < src_len) or (tar_cur + i < tar_len)
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
100		):
101	1	break
102	1	if (src_cur + i < src_len) and (
103		src[src_cur + i] == tar[tar_cur]
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
104		):
105	1	src_cur += i
106	1	local_cs += 1
107	1	break
108	1	if (tar_cur + i < tar_len) and (
109		src[src_cur] == tar[tar_cur + i]
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
110		):
111	1	tar_cur += i
112	1	local_cs += 1
113	1	break
114
115	1	src_cur += 1
116	1	tar_cur += 1
117
118	1	lcss += local_cs
119	1	return round(max(src_len, tar_len) - lcss)
120
121
122	1	def sift4_simplest(src, tar, max_offset=5):
123		"""Return the "simplest" Sift4 distance between two terms.
124
125		This is a wrapper for :py:meth:`Sift4Simplest.dist_abs`.
126
127		Parameters
128		----------
129		src : str
130		Source string for comparison
131		tar : str
132		Target string for comparison
133		max_offset : int
134		The number of characters to search for matching letters
135
136		Returns
137		-------
138		int
139		The Sift4 distance according to the simplest formula
140
141		Examples
142		--------
143		>>> sift4_simplest('cat', 'hat')
144		1
145		>>> sift4_simplest('Niall', 'Neil')
146		2
147		>>> sift4_simplest('Colin', 'Cuilen')
148		3
149		>>> sift4_simplest('ATCG', 'TAGC')
150		2
151
152		"""
153	1	return Sift4Simplest().dist_abs(src, tar, max_offset)
154
155
156		if __name__ == '__main__':
157		import doctest
158
159		doctest.testmod()
160

chrislit / abydos

Push — master ( f43547...71985b )

Sift4Simplest.dist_abs() F

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like