Completed
Pull Request — master (#225)
by Chris
09:15
created

abydos.distance._lig3   A

Complexity

Total Complexity 2

Size/Duplication

Total Lines 104
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 2
eloc 21
dl 0
loc 104
ccs 13
cts 13
cp 1
rs 10
c 0
b 0
f 0

1 Method

Rating   Name   Duplication   Size   Complexity  
A LIG3.sim() 0 40 2
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2019 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._lig3.
20
21
LIG3 similarity
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from ._distance import _Distance
32 1
from ._levenshtein import Levenshtein
33
34 1
__all__ = ['LIG3']
35
36
37 1
class LIG3(_Distance):
38
    r"""LIG3 similarity.
39
40
    :cite:`Snae:2002` proposes three Levenshtein-ISG-Guth hybrid similarity
41
    measures: LIG1, LIG2, and LIG3. Of these, LIG1 is identical to ISG and LIG2
42
    is identical to normalized Levenshtein similarity. Only LIG3 is a novel
43
    measure, defined as:
44
45
        .. math::
46
47
            sim_{LIG3}(X, Y) = \frac{2I}{2I+C}
48
49
    Here, I is the number of exact matches between the two words, truncated to
50
    the length of the shorter word, and C is the Levenshtein distance between
51
    the two words.
52
53
    .. versionadded:: 0.4.1
54
    """
55
56 1
    _lev = Levenshtein()
57
58 1
    def sim(self, src, tar):
59
        """Return the LIG3 similarity of two words.
60
61
        Parameters
62
        ----------
63
        src : str
64
            Source string for comparison
65
        tar : str
66
            Target string for comparison
67
68
        Returns
69
        -------
70
        float
71
            The LIG3 similarity
72
73
        Examples
74
        --------
75
        >>> cmp = LIG3()
76
        >>> cmp.sim('cat', 'hat')
77
        0.8
78
        >>> cmp.sim('Niall', 'Neil')
79
        0.5714285714285714
80
        >>> cmp.sim('aluminum', 'Catalan')
81
        0.0
82
        >>> cmp.sim('ATCG', 'TAGC')
83
        0.0
84
85
86
        .. versionadded:: 0.4.1
87
88
        """
89 1
        if src == tar:
90 1
            return 1.0
91
92 1
        matches = 2 * sum(
93
            src[pos] == tar[pos] for pos in range(min(len(src), len(tar)))
94
        )
95 1
        cost = self._lev.dist_abs(src, tar)
96
97 1
        return matches / (matches + cost)
98
99
100
if __name__ == '__main__':
101
    import doctest
102
103
    doctest.testmod()
104