Completed
Push — master ( 3ac297...afe14d )
by Chris
16:40 queued 07:25
created

abydos.distance._monge_elkan.MongeElkan.sim()   B

Complexity

Conditions 7

Size

Total Lines 53
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 17
CRAP Score 7

Importance

Changes 0
Metric Value
cc 7
eloc 17
nop 5
dl 0
loc 53
ccs 17
cts 17
cp 1
crap 7
rs 8
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._monge_elkan.
20
21
Monge-Elkan similarity & distance
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from ._distance import _Distance
32 1
from ._levenshtein import sim_levenshtein
33 1
from ..tokenizer import QGrams
34
35 1
__all__ = ['MongeElkan', 'dist_monge_elkan', 'sim_monge_elkan']
36
37
38 1
class MongeElkan(_Distance):
39
    """Monge-Elkan similarity.
40
41
    Monge-Elkan is defined in :cite:`Monge:1996`.
42
43
    Note: Monge-Elkan is NOT a symmetric similarity algorithm. Thus, the
44
    similarity of src to tar is not necessarily equal to the similarity of
45
    tar to src. If the symmetric argument is True, a symmetric value is
46
    calculated, at the cost of doubling the computation time (since
47
    :math:`sim_{Monge-Elkan}(src, tar)` and :math:`sim_{Monge-Elkan}(tar, src)`
48
    are both calculated and then averaged).
49
    """
50
51 1
    def sim(self, src, tar, sim_func=sim_levenshtein, symmetric=False):
52
        """Return the Monge-Elkan similarity of two strings.
53
54
        Parameters
55
        ----------
56
        src : str
57
            Source string for comparison
58
        tar : str
59
            Target string for comparison
60
        sim_func : function
61
            The internal similarity metric to employ
62
        symmetric : bool
63
            Return a symmetric similarity measure
64
65
        Returns
66
        -------
67
        float
68
            Monge-Elkan similarity
69
70
        Examples
71
        --------
72
        >>> cmp = MongeElkan()
73
        >>> cmp.sim('cat', 'hat')
74
        0.75
75
        >>> round(cmp.sim('Niall', 'Neil'), 12)
76
        0.666666666667
77
        >>> round(cmp.sim('aluminum', 'Catalan'), 12)
78
        0.388888888889
79
        >>> cmp.sim('ATCG', 'TAGC')
80
        0.5
81
82
        """
83 1
        if src == tar:
84 1
            return 1.0
85
86 1
        q_src = sorted(QGrams(src).elements())
87 1
        q_tar = sorted(QGrams(tar).elements())
88
89 1
        if not q_src or not q_tar:
90 1
            return 0.0
91
92 1
        sum_of_maxes = 0
93 1
        for q_s in q_src:
94 1
            max_sim = float('-inf')
95 1
            for q_t in q_tar:
96 1
                max_sim = max(max_sim, sim_func(q_s, q_t))
97 1
            sum_of_maxes += max_sim
98 1
        sim_em = sum_of_maxes / len(q_src)
99
100 1
        if symmetric:
101 1
            sim_em = (sim_em + self.sim(tar, src, sim_func, False)) / 2
102
103 1
        return sim_em
104
105
106 1
def sim_monge_elkan(src, tar, sim_func=sim_levenshtein, symmetric=False):
107
    """Return the Monge-Elkan similarity of two strings.
108
109
    This is a wrapper for :py:meth:`MongeElkan.sim`.
110
111
    Parameters
112
    ----------
113
    src : str
114
        Source string for comparison
115
    tar : str
116
        Target string for comparison
117
    sim_func : function
118
        Rhe internal similarity metric to employ
119
    symmetric : bool
120
        Return a symmetric similarity measure
121
122
    Returns
123
    -------
124
    float
125
        Monge-Elkan similarity
126
127
    Examples
128
    --------
129
    >>> sim_monge_elkan('cat', 'hat')
130
    0.75
131
    >>> round(sim_monge_elkan('Niall', 'Neil'), 12)
132
    0.666666666667
133
    >>> round(sim_monge_elkan('aluminum', 'Catalan'), 12)
134
    0.388888888889
135
    >>> sim_monge_elkan('ATCG', 'TAGC')
136
    0.5
137
138
    """
139 1
    return MongeElkan().sim(src, tar, sim_func, symmetric)
140
141
142 1
def dist_monge_elkan(src, tar, sim_func=sim_levenshtein, symmetric=False):
143
    """Return the Monge-Elkan distance between two strings.
144
145
    This is a wrapper for :py:meth:`MongeElkan.dist`.
146
147
    Parameters
148
    ----------
149
    src : str
150
        Source string for comparison
151
    tar : str
152
        Target string for comparison
153
    sim_func : function
154
        The internal similarity metric to employ
155
    symmetric : bool
156
        Return a symmetric similarity measure
157
158
    Returns
159
    -------
160
    float
161
        Monge-Elkan distance
162
163
    Examples
164
    --------
165
    >>> dist_monge_elkan('cat', 'hat')
166
    0.25
167
    >>> round(dist_monge_elkan('Niall', 'Neil'), 12)
168
    0.333333333333
169
    >>> round(dist_monge_elkan('aluminum', 'Catalan'), 12)
170
    0.611111111111
171
    >>> dist_monge_elkan('ATCG', 'TAGC')
172
    0.5
173
174
    """
175 1
    return MongeElkan().dist(src, tar, sim_func, symmetric)
176
177
178
if __name__ == '__main__':
179
    import doctest
180
181
    doctest.testmod()
182