Completed
Pull Request — master (#149)
by Chris
11:34
created

abydos.distance._lcsstr.sim_lcsstr()   A

Complexity

Conditions 1

Size

Total Lines 30
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 2
dl 0
loc 30
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._lcsstr.
20
21
Longest common substring
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from numpy import int as np_int
32 1
from numpy import zeros as np_zeros
33
34 1
from six.moves import range
35
36 1
from ._distance import _Distance
37
38 1
__all__ = ['LCSstr', 'dist_lcsstr', 'lcsstr', 'sim_lcsstr']
39
40
41 1
class LCSstr(_Distance):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
42
    """Longest common substring."""
43
44 1
    def lcsstr(self, src, tar):
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
45
        """Return the longest common substring of two strings.
46
47
        Longest common substring (LCSstr).
48
49
        Based on the code from
50
        https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Longest_common_substring
51
        :cite:`Wikibooks:2018`.
52
        This is licensed Creative Commons: Attribution-ShareAlike 3.0.
53
54
        Modifications include:
55
56
            - conversion to a numpy array in place of a list of lists
57
            - conversion to Python 2/3-safe range from xrange via six
58
59
        Parameters
60
        ----------
61
        src : str
62
            Source string for comparison
63
        tar : str
64
            Target string for comparison
65
66
        Returns
67
        -------
68
        str
69
            The longest common substring
70
71
        Examples
72
        --------
73
        >>> sstr = LCSstr()
74
        >>> sstr.lcsstr('cat', 'hat')
75
        'at'
76
        >>> sstr.lcsstr('Niall', 'Neil')
77
        'N'
78
        >>> sstr.lcsstr('aluminum', 'Catalan')
79
        'al'
80
        >>> sstr.lcsstr('ATCG', 'TAGC')
81
        'A'
82
83
        """
84 1
        lengths = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_int)
85 1
        longest, i_longest = 0, 0
86 1
        for i in range(1, len(src) + 1):
87 1
            for j in range(1, len(tar) + 1):
88 1
                if src[i - 1] == tar[j - 1]:
89 1
                    lengths[i, j] = lengths[i - 1, j - 1] + 1
90 1
                    if lengths[i, j] > longest:
91 1
                        longest = lengths[i, j]
92 1
                        i_longest = i
93
                else:
94 1
                    lengths[i, j] = 0
95 1
        return src[i_longest - longest : i_longest]
96
97 1
    def sim(self, src, tar):
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'sim' method
Loading history...
98
        r"""Return the longest common substring similarity of two strings.
99
100
        Longest common substring similarity (:math:`sim_{LCSstr}`).
101
102
        This employs the LCS function to derive a similarity metric:
103
        :math:`sim_{LCSstr}(s,t) = \frac{|LCSstr(s,t)|}{max(|s|, |t|)}`
104
105
        Parameters
106
        ----------
107
        src : str
108
            Source string for comparison
109
        tar : str
110
            Target string for comparison
111
112
        Returns
113
        -------
114
        float
115
            LCSstr similarity
116
117
        Examples
118
        --------
119
        >>> sim_lcsstr('cat', 'hat')
120
        0.6666666666666666
121
        >>> sim_lcsstr('Niall', 'Neil')
122
        0.2
123
        >>> sim_lcsstr('aluminum', 'Catalan')
124
        0.25
125
        >>> sim_lcsstr('ATCG', 'TAGC')
126
        0.25
127
128
        """
129 1
        if src == tar:
130 1
            return 1.0
131 1
        elif not src or not tar:
132 1
            return 0.0
133 1
        return len(self.lcsstr(src, tar)) / max(len(src), len(tar))
134
135
136 1
def lcsstr(src, tar):
137
    """Return the longest common substring of two strings.
138
139
    This is a wrapper for :py:meth:`LCSstr.lcsstr`.
140
141
    Parameters
142
    ----------
143
    src : str
144
        Source string for comparison
145
    tar : str
146
        Target string for comparison
147
148
    Returns
149
    -------
150
    str
151
        The longest common substring
152
153
    Examples
154
    --------
155
    >>> lcsstr('cat', 'hat')
156
    'at'
157
    >>> lcsstr('Niall', 'Neil')
158
    'N'
159
    >>> lcsstr('aluminum', 'Catalan')
160
    'al'
161
    >>> lcsstr('ATCG', 'TAGC')
162
    'A'
163
164
    """
165 1
    return LCSstr().lcsstr(src, tar)
166
167
168 1
def sim_lcsstr(src, tar):
169
    """Return the longest common substring similarity of two strings.
170
171
    This is a wrapper for :py:meth:`LCSstr.sim`.
172
173
    Parameters
174
    ----------
175
    src : str
176
        Source string for comparison
177
    tar : str
178
        Target string for comparison
179
180
    Returns
181
    -------
182
    float
183
        LCSstr similarity
184
185
    Examples
186
    --------
187
    >>> sim_lcsstr('cat', 'hat')
188
    0.6666666666666666
189
    >>> sim_lcsstr('Niall', 'Neil')
190
    0.2
191
    >>> sim_lcsstr('aluminum', 'Catalan')
192
    0.25
193
    >>> sim_lcsstr('ATCG', 'TAGC')
194
    0.25
195
196
    """
197 1
    return LCSstr().sim(src, tar)
198
199
200 1
def dist_lcsstr(src, tar):
201
    """Return the longest common substring distance between two strings.
202
203
    This is a wrapper for :py:meth:`LCSstr.dist`.
204
205
    Parameters
206
    ----------
207
    src : str
208
        Source string for comparison
209
    tar : str
210
        Target string for comparison
211
212
    Returns
213
    -------
214
    float
215
        LCSstr distance
216
217
    Examples
218
    --------
219
    >>> dist_lcsstr('cat', 'hat')
220
    0.33333333333333337
221
    >>> dist_lcsstr('Niall', 'Neil')
222
    0.8
223
    >>> dist_lcsstr('aluminum', 'Catalan')
224
    0.75
225
    >>> dist_lcsstr('ATCG', 'TAGC')
226
    0.75
227
228
    """
229 1
    return LCSstr().dist(src, tar)
230
231
232
if __name__ == '__main__':
233
    import doctest
234
235
    doctest.testmod()
236