Completed
Pull Request — master (#141)
by Chris
11:04
created

abydos.distance._monge_elkan.MongeElkan.sim()   B

Complexity

Conditions 7

Size

Total Lines 45
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 17
CRAP Score 7

Importance

Changes 0
Metric Value
cc 7
eloc 17
nop 5
dl 0
loc 45
ccs 17
cts 17
cp 1
crap 7
rs 8
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._monge_elkan.
20
21
Monge-Elkan similarity & distance
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from ._distance import _Distance
32 1
from ._levenshtein import sim_levenshtein
33 1
from ..tokenizer import QGrams
34
35 1
__all__ = ['MongeElkan', 'dist_monge_elkan', 'sim_monge_elkan']
36
37
38 1
class MongeElkan(_Distance):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
39
    """Monge-Elkan similarity.
40
41
    Monge-Elkan is defined in :cite:`Monge:1996`.
42
43
    Note: Monge-Elkan is NOT a symmetric similarity algorithm. Thus, the
44
    similarity of src to tar is not necessarily equal to the similarity of
45
    tar to src. If the symmetric argument is True, a symmetric value is
46
    calculated, at the cost of doubling the computation time (since
47
    :math:`sim_{Monge-Elkan}(src, tar)` and :math:`sim_{Monge-Elkan}(tar, src)`
48
    are both calculated and then averaged).
49
    """
50
51 1
    def sim(self, src, tar, sim_func=sim_levenshtein, symmetric=False):
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'sim' method
Loading history...
52
        """Return the Monge-Elkan similarity of two strings.
53
54
        Args:
55
            src (str): Source string for comparison
56
            tar (str): Target string for comparison
57
            sim_func (function): the internal similarity metric to employ
58
            symmetric (bool): return a symmetric similarity measure
59
60
        Returns:
61
            float: Monge-Elkan similarity
62
63
        Examples:
64
            >>> cmp = MongeElkan()
65
            >>> cmp.sim('cat', 'hat')
66
            0.75
67
            >>> round(cmp.sim('Niall', 'Neil'), 12)
68
            0.666666666667
69
            >>> round(cmp.sim('aluminum', 'Catalan'), 12)
70
            0.388888888889
71
            >>> cmp.sim('ATCG', 'TAGC')
72
            0.5
73
74
        """
75 1
        if src == tar:
76 1
            return 1.0
77
78 1
        q_src = sorted(QGrams(src).elements())
79 1
        q_tar = sorted(QGrams(tar).elements())
80
81 1
        if not q_src or not q_tar:
82 1
            return 0.0
83
84 1
        sum_of_maxes = 0
85 1
        for q_s in q_src:
86 1
            max_sim = float('-inf')
87 1
            for q_t in q_tar:
88 1
                max_sim = max(max_sim, sim_func(q_s, q_t))
89 1
            sum_of_maxes += max_sim
90 1
        sim_em = sum_of_maxes / len(q_src)
91
92 1
        if symmetric:
93 1
            sim_em = (sim_em + self.sim(tar, src, sim_func, False)) / 2
94
95 1
        return sim_em
96
97
98 1
def sim_monge_elkan(src, tar, sim_func=sim_levenshtein, symmetric=False):
99
    """Return the Monge-Elkan similarity of two strings.
100
101
    This is a wrapper for :py:meth:`MongeElkan.sim`.
102
103
    Args:
104
        src (str): Source string for comparison
105
        tar (str): Target string for comparison
106
        sim_func (function): the internal similarity metric to employ
107
        symmetric (bool): return a symmetric similarity measure
108
109
    Returns:
110
        float: Monge-Elkan similarity
111
112
    Examples:
113
        >>> sim_monge_elkan('cat', 'hat')
114
        0.75
115
        >>> round(sim_monge_elkan('Niall', 'Neil'), 12)
116
        0.666666666667
117
        >>> round(sim_monge_elkan('aluminum', 'Catalan'), 12)
118
        0.388888888889
119
        >>> sim_monge_elkan('ATCG', 'TAGC')
120
        0.5
121
122
    """
123 1
    return MongeElkan().sim(src, tar, sim_func, symmetric)
124
125
126 1
def dist_monge_elkan(src, tar, sim_func=sim_levenshtein, symmetric=False):
127
    """Return the Monge-Elkan distance between two strings.
128
129
    This is a wrapper for :py:meth:`MongeElkan.dist`.
130
131
    Args:
132
        src (str): Source string for comparison
133
        tar (str): Target string for comparison
134
        sim_func (function): the internal similarity metric to employ
135
        symmetric (bool): return a symmetric similarity measure
136
137
    Returns:
138
        float: Monge-Elkan distance
139
140
    Examples:
141
        >>> dist_monge_elkan('cat', 'hat')
142
        0.25
143
        >>> round(dist_monge_elkan('Niall', 'Neil'), 12)
144
        0.333333333333
145
        >>> round(dist_monge_elkan('aluminum', 'Catalan'), 12)
146
        0.611111111111
147
        >>> dist_monge_elkan('ATCG', 'TAGC')
148
        0.5
149
150
    """
151 1
    return MongeElkan().dist(src, tar, sim_func, symmetric)
152
153
154
if __name__ == '__main__':
155
    import doctest
156
157
    doctest.testmod()
158