Completed
Push — master ( f43547...71985b )
by Chris
12:00 queued 10s
created

abydos.distance._mra   A

Complexity

Total Complexity 16

Size/Duplication

Total Lines 247
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
eloc 55
dl 0
loc 247
ccs 48
cts 48
cp 1
rs 10
c 0
b 0
f 0
wmc 16

2 Methods

Rating   Name   Duplication   Size   Complexity  
D MRA.dist_abs() 0 66 12
A MRA.sim() 0 32 1

3 Functions

Rating   Name   Duplication   Size   Complexity  
A sim_mra() 0 30 1
A dist_mra() 0 30 1
A mra_compare() 0 30 1
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._mra.
20
21
The Match Rating Algorithm's distance measure
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from six.moves import range
32
33 1
from ._distance import _Distance
34 1
from ..phonetic import mra
35
36 1
__all__ = ['MRA', 'dist_mra', 'mra_compare', 'sim_mra']
37
38
39 1
class MRA(_Distance):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
40
    """Match Rating Algorithm comparison rating.
41
42
    The Western Airlines Surname Match Rating Algorithm comparison rating, as
43
    presented on page 18 of :cite:`Moore:1977`.
44
    """
45
46 1
    def dist_abs(self, src, tar):
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'dist_abs' method
Loading history...
47
        """Return the MRA comparison rating of two strings.
48
49
        Parameters
50
        ----------
51
        src : str
52
            Source string for comparison
53
        tar : str
54
            Target string for comparison
55
56
        Returns
57
        -------
58
        int
59
            MRA comparison rating
60
61
        Examples
62
        --------
63
        >>> cmp = MRA()
64
        >>> cmp.dist_abs('cat', 'hat')
65
        5
66
        >>> cmp.dist_abs('Niall', 'Neil')
67
        6
68
        >>> cmp.dist_abs('aluminum', 'Catalan')
69
        0
70
        >>> cmp.dist_abs('ATCG', 'TAGC')
71
        5
72
73
        """
74 1
        if src == tar:
75 1
            return 6
76 1
        if src == '' or tar == '':
77 1
            return 0
78 1
        src = list(mra(src))
79 1
        tar = list(mra(tar))
80
81 1
        if abs(len(src) - len(tar)) > 2:
82 1
            return 0
83
84 1
        length_sum = len(src) + len(tar)
85 1
        if length_sum < 5:
86 1
            min_rating = 5
87 1
        elif length_sum < 8:
88 1
            min_rating = 4
89 1
        elif length_sum < 12:
90 1
            min_rating = 3
91
        else:
92 1
            min_rating = 2
93
94 1
        for _ in range(2):
95 1
            new_src = []
96 1
            new_tar = []
97 1
            minlen = min(len(src), len(tar))
98 1
            for i in range(minlen):
99 1
                if src[i] != tar[i]:
100 1
                    new_src.append(src[i])
101 1
                    new_tar.append(tar[i])
102 1
            src = new_src + src[minlen:]
103 1
            tar = new_tar + tar[minlen:]
104 1
            src.reverse()
105 1
            tar.reverse()
106
107 1
        similarity = 6 - max(len(src), len(tar))
108
109 1
        if similarity >= min_rating:
110 1
            return similarity
111 1
        return 0
112
113 1
    def sim(self, src, tar):
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'sim' method
Loading history...
114
        """Return the normalized MRA similarity of two strings.
115
116
        This is the MRA normalized to :math:`[0, 1]`, given that MRA itself is
117
        constrained to the range :math:`[0, 6]`.
118
119
        Parameters
120
        ----------
121
        src : str
122
            Source string for comparison
123
        tar : str
124
            Target string for comparison
125
126
        Returns
127
        -------
128
        float
129
            Normalized MRA similarity
130
131
        Examples
132
        --------
133
        >>> cmp = MRA()
134
        >>> cmp.sim('cat', 'hat')
135
        0.8333333333333334
136
        >>> cmp.sim('Niall', 'Neil')
137
        1.0
138
        >>> cmp.sim('aluminum', 'Catalan')
139
        0.0
140
        >>> cmp.sim('ATCG', 'TAGC')
141
        0.8333333333333334
142
143
        """
144 1
        return mra_compare(src, tar) / 6
145
146
147 1
def mra_compare(src, tar):
148
    """Return the MRA comparison rating of two strings.
149
150
    This is a wrapper for :py:meth:`MRA.dist_abs`.
151
152
    Parameters
153
    ----------
154
    src : str
155
        Source string for comparison
156
    tar : str
157
        Target string for comparison
158
159
    Returns
160
    -------
161
    int
162
        MRA comparison rating
163
164
    Examples
165
    --------
166
    >>> mra_compare('cat', 'hat')
167
    5
168
    >>> mra_compare('Niall', 'Neil')
169
    6
170
    >>> mra_compare('aluminum', 'Catalan')
171
    0
172
    >>> mra_compare('ATCG', 'TAGC')
173
    5
174
175
    """
176 1
    return MRA().dist_abs(src, tar)
177
178
179 1
def sim_mra(src, tar):
180
    """Return the normalized MRA similarity of two strings.
181
182
    This is a wrapper for :py:meth:`MRA.sim`.
183
184
    Parameters
185
    ----------
186
    src : str
187
        Source string for comparison
188
    tar : str
189
        Target string for comparison
190
191
    Returns
192
    -------
193
    float
194
        Normalized MRA similarity
195
196
    Examples
197
    --------
198
    >>> sim_mra('cat', 'hat')
199
    0.8333333333333334
200
    >>> sim_mra('Niall', 'Neil')
201
    1.0
202
    >>> sim_mra('aluminum', 'Catalan')
203
    0.0
204
    >>> sim_mra('ATCG', 'TAGC')
205
    0.8333333333333334
206
207
    """
208 1
    return MRA().sim(src, tar)
209
210
211 1
def dist_mra(src, tar):
212
    """Return the normalized MRA distance between two strings.
213
214
    This is a wrapper for :py:meth:`MRA.dist`.
215
216
    Parameters
217
    ----------
218
    src : str
219
        Source string for comparison
220
    tar : str
221
        Target string for comparison
222
223
    Returns
224
    -------
225
    float
226
        Normalized MRA distance
227
228
    Examples
229
    --------
230
    >>> dist_mra('cat', 'hat')
231
    0.16666666666666663
232
    >>> dist_mra('Niall', 'Neil')
233
    0.0
234
    >>> dist_mra('aluminum', 'Catalan')
235
    1.0
236
    >>> dist_mra('ATCG', 'TAGC')
237
    0.16666666666666663
238
239
    """
240 1
    return MRA().dist(src, tar)
241
242
243
if __name__ == '__main__':
244
    import doctest
245
246
    doctest.testmod()
247