Completed
Pull Request — master (#141)
by Chris
13:24
created

abydos.distance._indel   A

Complexity

Total Complexity 6

Size/Duplication

Total Lines 214
Duplicated Lines 0 %

Test Coverage

Coverage 94.74%

Importance

Changes 0
Metric Value
eloc 27
dl 0
loc 214
ccs 18
cts 19
cp 0.9474
rs 10
c 0
b 0
f 0
wmc 6

3 Functions

Rating   Name   Duplication   Size   Complexity  
A sim_indel() 0 31 1
A indel() 0 28 1
A dist_indel() 0 31 1

2 Methods

Rating   Name   Duplication   Size   Complexity  
A Indel.dist_abs() 0 30 1
A Indel.dist() 0 34 2
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._indel.
20
21
Indel distance
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from ._distance import _Distance
32 1
from ._levenshtein import Levenshtein
33
34 1
__all__ = ['Indel', 'dist_indel', 'sim_indel']
35
36
37 1
class Indel(_Distance):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
38
    """Indel distance.
39
40
    This is equivalent to Levenshtein distance, when only inserts and deletes
41
    are possible.
42
    """
43
44 1
    _lev = Levenshtein()
45
46 1
    def dist_abs(self, src, tar):
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'dist_abs' method
Loading history...
47
        """Return the indel distance between two strings.
48
49
        Parameters
50
        ----------
51
        src : str
52
            Source string for comparison
53
        tar : str
54
            Target string for comparison
55
56
        Returns
57
        -------
58
        int
59
            Indel distance
60
61
        Examples
62
        --------
63
        >>> cmp = Indel()
64
        >>> cmp.dist_abs('cat', 'hat')
65
        2
66
        >>> cmp.dist_abs('Niall', 'Neil')
67
        3
68
        >>> cmp.dist_abs('Colin', 'Cuilen')
69
        5
70
        >>> cmp.dist_abs('ATCG', 'TAGC')
71
        4
72
73
        """
74 1
        return self._lev.dist_abs(
75
            src, tar, mode='lev', cost=(1, 1, 9999, 9999)
76
        )
77
78 1
    def dist(self, src, tar):
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'dist' method
Loading history...
79
        """Return the normalized indel distance between two strings.
80
81
        This is equivalent to normalized Levenshtein distance, when only
82
        inserts and deletes are possible.
83
84
        Parameters
85
        ----------
86
        src : str
87
            Source string for comparison
88
        tar : str
89
            Target string for comparison
90
91
        Returns
92
        -------
93
        float
94
            Normalized indel distance
95
96
        Examples
97
        --------
98
        >>> cmp = Indel()
99
        >>> round(cmp.dist('cat', 'hat'), 12)
100
        0.333333333333
101
        >>> round(cmp.dist('Niall', 'Neil'), 12)
102
        0.333333333333
103
        >>> round(cmp.dist('Colin', 'Cuilen'), 12)
104
        0.454545454545
105
        >>> cmp.dist('ATCG', 'TAGC')
106
        0.5
107
108
        """
109 1
        if src == tar:
110 1
            return 0.0
111 1
        return self.dist_abs(src, tar) / (len(src) + len(tar))
112
113
114 1
def indel(src, tar):
115
    """Return the indel distance between two strings.
116
117
    Parameters
118
    ----------
119
    src : str
120
        Source string for comparison
121
    tar : str
122
        Target string for comparison
123
124
    Returns
125
    -------
126
    int
127
        Indel distance
128
129
    Examples
130
    --------
131
    >>> indel('cat', 'hat')
132
    2
133
    >>> indel('Niall', 'Neil')
134
    3
135
    >>> indel('Colin', 'Cuilen')
136
    5
137
    >>> indel('ATCG', 'TAGC')
138
    4
139
140
    """
141
    return Indel().dist_abs(src, tar)
142
143
144 1
def dist_indel(src, tar):
145
    """Return the normalized indel distance between two strings.
146
147
    This is equivalent to normalized Levenshtein distance, when only inserts
148
    and deletes are possible.
149
150
    Parameters
151
    ----------
152
    src : str
153
        Source string for comparison
154
    tar : str
155
        Target string for comparison
156
157
    Returns
158
    -------
159
    float
160
        Normalized indel distance
161
162
    Examples
163
    --------
164
    >>> round(dist_indel('cat', 'hat'), 12)
165
    0.333333333333
166
    >>> round(dist_indel('Niall', 'Neil'), 12)
167
    0.333333333333
168
    >>> round(dist_indel('Colin', 'Cuilen'), 12)
169
    0.454545454545
170
    >>> dist_indel('ATCG', 'TAGC')
171
    0.5
172
173
    """
174 1
    return Indel().dist(src, tar)
175
176
177 1
def sim_indel(src, tar):
178
    """Return the normalized indel similarity of two strings.
179
180
    This is equivalent to normalized Levenshtein similarity, when only inserts
181
    and deletes are possible.
182
183
    Parameters
184
    ----------
185
    src : str
186
        Source string for comparison
187
    tar : str
188
        Target string for comparison
189
190
    Returns
191
    -------
192
    float
193
        Normalized indel similarity
194
195
    Examples
196
    --------
197
    >>> round(sim_indel('cat', 'hat'), 12)
198
    0.666666666667
199
    >>> round(sim_indel('Niall', 'Neil'), 12)
200
    0.666666666667
201
    >>> round(sim_indel('Colin', 'Cuilen'), 12)
202
    0.545454545455
203
    >>> sim_indel('ATCG', 'TAGC')
204
    0.5
205
206
    """
207 1
    return Indel().sim(src, tar)
208
209
210
if __name__ == '__main__':
211
    import doctest
212
213
    doctest.testmod()
214