abydos.distance._mra.mra_compare() - Code Metrics - Inspection of "Merge pull request #149 from chrislit/0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( f43547...71985b )

by Chris

created 2018-11-17 08:52 UTC

abydos.distance._mra.mra_compare() A

↳ Parent: abydos.distance._mra

Complexity

Conditions

Size

Total Lines	30
Code Lines	2

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	2
CRAP Score	1

Importance

Changes

Metric	Value
cc	1
eloc	2
nop	2
dl	0
loc	30
ccs	2
cts	2
cp	1
crap	1
rs	10
c	0
b	0
f	0

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.distance._mra.

The Match Rating Algorithm's distance measure
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from six.moves import range

from ._distance import _Distance
from ..phonetic import mra

__all__ = ['MRA', 'dist_mra', 'mra_compare', 'sim_mra']


class MRA(_Distance):

    """Match Rating Algorithm comparison rating.

    The Western Airlines Surname Match Rating Algorithm comparison rating, as
    presented on page 18 of :cite:`Moore:1977`.
    """

    def dist_abs(self, src, tar):

        """Return the MRA comparison rating of two strings.

        Parameters
        ----------
        src : str
            Source string for comparison
        tar : str
            Target string for comparison

        Returns
        -------
        int
            MRA comparison rating

        Examples
        --------
        >>> cmp = MRA()
        >>> cmp.dist_abs('cat', 'hat')
        5
        >>> cmp.dist_abs('Niall', 'Neil')
        6
        >>> cmp.dist_abs('aluminum', 'Catalan')
        0
        >>> cmp.dist_abs('ATCG', 'TAGC')
        5

        """
        if src == tar:
            return 6
        if src == '' or tar == '':
            return 0
        src = list(mra(src))
        tar = list(mra(tar))

        if abs(len(src) - len(tar)) > 2:
            return 0

        length_sum = len(src) + len(tar)
        if length_sum < 5:
            min_rating = 5
        elif length_sum < 8:
            min_rating = 4
        elif length_sum < 12:
            min_rating = 3
        else:
            min_rating = 2

        for _ in range(2):
            new_src = []
            new_tar = []
            minlen = min(len(src), len(tar))
            for i in range(minlen):
                if src[i] != tar[i]:
                    new_src.append(src[i])
                    new_tar.append(tar[i])
            src = new_src + src[minlen:]
            tar = new_tar + tar[minlen:]
            src.reverse()
            tar.reverse()

        similarity = 6 - max(len(src), len(tar))

        if similarity >= min_rating:
            return similarity
        return 0

    def sim(self, src, tar):

        """Return the normalized MRA similarity of two strings.

        This is the MRA normalized to :math:`[0, 1]`, given that MRA itself is
        constrained to the range :math:`[0, 6]`.

        Parameters
        ----------
        src : str
            Source string for comparison
        tar : str
            Target string for comparison

        Returns
        -------
        float
            Normalized MRA similarity

        Examples
        --------
        >>> cmp = MRA()
        >>> cmp.sim('cat', 'hat')
        0.8333333333333334
        >>> cmp.sim('Niall', 'Neil')
        1.0
        >>> cmp.sim('aluminum', 'Catalan')
        0.0
        >>> cmp.sim('ATCG', 'TAGC')
        0.8333333333333334

        """
        return mra_compare(src, tar) / 6


def mra_compare(src, tar):
    """Return the MRA comparison rating of two strings.

    This is a wrapper for :py:meth:`MRA.dist_abs`.

    Parameters
    ----------
    src : str
        Source string for comparison
    tar : str
        Target string for comparison

    Returns
    -------
    int
        MRA comparison rating

    Examples
    --------
    >>> mra_compare('cat', 'hat')
    5
    >>> mra_compare('Niall', 'Neil')
    6
    >>> mra_compare('aluminum', 'Catalan')
    0
    >>> mra_compare('ATCG', 'TAGC')
    5

    """
    return MRA().dist_abs(src, tar)


def sim_mra(src, tar):
    """Return the normalized MRA similarity of two strings.

    This is a wrapper for :py:meth:`MRA.sim`.

    Parameters
    ----------
    src : str
        Source string for comparison
    tar : str
        Target string for comparison

    Returns
    -------
    float
        Normalized MRA similarity

    Examples
    --------
    >>> sim_mra('cat', 'hat')
    0.8333333333333334
    >>> sim_mra('Niall', 'Neil')
    1.0
    >>> sim_mra('aluminum', 'Catalan')
    0.0
    >>> sim_mra('ATCG', 'TAGC')
    0.8333333333333334

    """
    return MRA().sim(src, tar)


def dist_mra(src, tar):
    """Return the normalized MRA distance between two strings.

    This is a wrapper for :py:meth:`MRA.dist`.

    Parameters
    ----------
    src : str
        Source string for comparison
    tar : str
        Target string for comparison

    Returns
    -------
    float
        Normalized MRA distance

    Examples
    --------
    >>> dist_mra('cat', 'hat')
    0.16666666666666663
    >>> dist_mra('Niall', 'Neil')
    0.0
    >>> dist_mra('aluminum', 'Catalan')
    1.0
    >>> dist_mra('ATCG', 'TAGC')
    0.16666666666666663

    """
    return MRA().dist(src, tar)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.distance._mra.
20
21		The Match Rating Algorithm's distance measure
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from six.moves import range
32
33	1	from ._distance import _Distance
34	1	from ..phonetic import mra
35
36	1	__all__ = ['MRA', 'dist_mra', 'mra_compare', 'sim_mra']
37
38
39	1	class MRA(_Distance):
		0 ignored issues – show Unused Code introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
40		"""Match Rating Algorithm comparison rating.
41
42		The Western Airlines Surname Match Rating Algorithm comparison rating, as
43		presented on page 18 of :cite:`Moore:1977`.
44		"""
45
46	1	def dist_abs(self, src, tar):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'dist_abs' method Loading history...
47		"""Return the MRA comparison rating of two strings.
48
49		Parameters
50		----------
51		src : str
52		Source string for comparison
53		tar : str
54		Target string for comparison
55
56		Returns
57		-------
58		int
59		MRA comparison rating
60
61		Examples
62		--------
63		>>> cmp = MRA()
64		>>> cmp.dist_abs('cat', 'hat')
65		5
66		>>> cmp.dist_abs('Niall', 'Neil')
67		6
68		>>> cmp.dist_abs('aluminum', 'Catalan')
69		0
70		>>> cmp.dist_abs('ATCG', 'TAGC')
71		5
72
73		"""
74	1	if src == tar:
75	1	return 6
76	1	if src == '' or tar == '':
77	1	return 0
78	1	src = list(mra(src))
79	1	tar = list(mra(tar))
80
81	1	if abs(len(src) - len(tar)) > 2:
82	1	return 0
83
84	1	length_sum = len(src) + len(tar)
85	1	if length_sum < 5:
86	1	min_rating = 5
87	1	elif length_sum < 8:
88	1	min_rating = 4
89	1	elif length_sum < 12:
90	1	min_rating = 3
91		else:
92	1	min_rating = 2
93
94	1	for _ in range(2):
95	1	new_src = []
96	1	new_tar = []
97	1	minlen = min(len(src), len(tar))
98	1	for i in range(minlen):
99	1	if src[i] != tar[i]:
100	1	new_src.append(src[i])
101	1	new_tar.append(tar[i])
102	1	src = new_src + src[minlen:]
103	1	tar = new_tar + tar[minlen:]
104	1	src.reverse()
105	1	tar.reverse()
106
107	1	similarity = 6 - max(len(src), len(tar))
108
109	1	if similarity >= min_rating:
110	1	return similarity
111	1	return 0
112
113	1	def sim(self, src, tar):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'sim' method Loading history...
114		"""Return the normalized MRA similarity of two strings.
115
116		This is the MRA normalized to :math:`[0, 1]`, given that MRA itself is
117		constrained to the range :math:`[0, 6]`.
118
119		Parameters
120		----------
121		src : str
122		Source string for comparison
123		tar : str
124		Target string for comparison
125
126		Returns
127		-------
128		float
129		Normalized MRA similarity
130
131		Examples
132		--------
133		>>> cmp = MRA()
134		>>> cmp.sim('cat', 'hat')
135		0.8333333333333334
136		>>> cmp.sim('Niall', 'Neil')
137		1.0
138		>>> cmp.sim('aluminum', 'Catalan')
139		0.0
140		>>> cmp.sim('ATCG', 'TAGC')
141		0.8333333333333334
142
143		"""
144	1	return mra_compare(src, tar) / 6
145
146
147	1	def mra_compare(src, tar):
148		"""Return the MRA comparison rating of two strings.
149
150		This is a wrapper for :py:meth:`MRA.dist_abs`.
151
152		Parameters
153		----------
154		src : str
155		Source string for comparison
156		tar : str
157		Target string for comparison
158
159		Returns
160		-------
161		int
162		MRA comparison rating
163
164		Examples
165		--------
166		>>> mra_compare('cat', 'hat')
167		5
168		>>> mra_compare('Niall', 'Neil')
169		6
170		>>> mra_compare('aluminum', 'Catalan')
171		0
172		>>> mra_compare('ATCG', 'TAGC')
173		5
174
175		"""
176	1	return MRA().dist_abs(src, tar)
177
178
179	1	def sim_mra(src, tar):
180		"""Return the normalized MRA similarity of two strings.
181
182		This is a wrapper for :py:meth:`MRA.sim`.
183
184		Parameters
185		----------
186		src : str
187		Source string for comparison
188		tar : str
189		Target string for comparison
190
191		Returns
192		-------
193		float
194		Normalized MRA similarity
195
196		Examples
197		--------
198		>>> sim_mra('cat', 'hat')
199		0.8333333333333334
200		>>> sim_mra('Niall', 'Neil')
201		1.0
202		>>> sim_mra('aluminum', 'Catalan')
203		0.0
204		>>> sim_mra('ATCG', 'TAGC')
205		0.8333333333333334
206
207		"""
208	1	return MRA().sim(src, tar)
209
210
211	1	def dist_mra(src, tar):
212		"""Return the normalized MRA distance between two strings.
213
214		This is a wrapper for :py:meth:`MRA.dist`.
215
216		Parameters
217		----------
218		src : str
219		Source string for comparison
220		tar : str
221		Target string for comparison
222
223		Returns
224		-------
225		float
226		Normalized MRA distance
227
228		Examples
229		--------
230		>>> dist_mra('cat', 'hat')
231		0.16666666666666663
232		>>> dist_mra('Niall', 'Neil')
233		0.0
234		>>> dist_mra('aluminum', 'Catalan')
235		1.0
236		>>> dist_mra('ATCG', 'TAGC')
237		0.16666666666666663
238
239		"""
240	1	return MRA().dist(src, tar)
241
242
243		if __name__ == '__main__':
244		import doctest
245
246		doctest.testmod()
247

chrislit / abydos

Push — master ( f43547...71985b )

abydos.distance._mra.mra_compare() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like