abydos.distance._fuzzywuzzy_partial_string   A
last analyzed

Complexity

Total Complexity 4

Size/Duplication

Total Lines 95
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 4
eloc 22
dl 0
loc 95
ccs 14
cts 14
cp 1
rs 10
c 0
b 0
f 0

1 Method

Rating   Name   Duplication   Size   Complexity  
A FuzzyWuzzyPartialString.sim() 0 49 4
1
# Copyright 2019-2020 by Christopher C. Little.
2
# This file is part of Abydos.
3
#
4
# Abydos is free software: you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# Abydos is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17
"""abydos.distance._fuzzywuzzy_partial_string.
18
19 1
FuzzyWuzzy Partial String similarity
20
"""
21
22
from difflib import SequenceMatcher
23
24 1
from ._distance import _Distance
25
26
__all__ = ['FuzzyWuzzyPartialString']
27
28
29
class FuzzyWuzzyPartialString(_Distance):
30
    """FuzzyWuzzy Partial String similarity.
31 1
32
    This follows the FuzzyWuzzy Partial String similarity algorithm
33 1
    :cite:`Cohen:2011`. Rather than returning an integer in the range [0, 100],
34
    as demonstrated in the blog post, this implementation returns a float in
35 1
    the range [0.0, 1.0].
36
37
    .. versionadded:: 0.4.0
38 1
    """
39
40
    def sim(self, src: str, tar: str) -> float:
41
        """Return the FuzzyWuzzy Partial String similarity of two strings.
42
43
        Parameters
44
        ----------
45
        src : str
46
            Source string for comparison
47
        tar : str
48
            Target string for comparison
49 1
50
        Returns
51
        -------
52
        float
53
            FuzzyWuzzy Partial String similarity
54
55
        Examples
56
        --------
57
        >>> cmp = FuzzyWuzzyPartialString()
58
        >>> round(cmp.sim('cat', 'hat'), 12)
59
        0.666666666667
60
        >>> round(cmp.sim('Niall', 'Neil'), 12)
61
        0.75
62
        >>> round(cmp.sim('aluminum', 'Catalan'), 12)
63
        0.428571428571
64
        >>> cmp.sim('ATCG', 'TAGC')
65
        0.5
66
67
68
        .. versionadded:: 0.4.0
69
70
        """
71
        max_sim = 0.0
72
        start_pos = 0
73
74
        if len(src) > len(tar):
75
            src, tar = tar, src
76
77
        src_len = len(src)
78
79
        while max_sim < 1.0 and start_pos < len(tar) - src_len + 1:
80 1
            max_sim = max(
81 1
                max_sim,
82
                SequenceMatcher(
83 1
                    None, src, tar[start_pos : start_pos + src_len]
84 1
                ).ratio(),
85
            )
86 1
            start_pos += 1
87
88 1
        return max_sim
89 1
90
91
if __name__ == '__main__':
92
    import doctest
93
94
    doctest.testmod()
95