Completed
Branch master (78a222)
by Chris
14:36
created

abydos.distance._basic.sim_length()   A

Complexity

Conditions 5

Size

Total Lines 25
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 5

Importance

Changes 0
Metric Value
eloc 6
dl 0
loc 25
ccs 6
cts 6
cp 1
rs 9.3333
c 0
b 0
f 0
cc 5
nop 2
crap 5
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance.basic.
20
21
The distance.basic module implements simple string edit distance functions
22
including:
23
24
    - Identity similarity & distance
25
    - Length similarity & distance
26
    - Prefix similarity & distance
27
    - Suffix similarity & distance
28
"""
29
30 1
from __future__ import division, unicode_literals
31
32 1
from six.moves import range
33
34 1
__all__ = [
35
    'dist_ident',
36
    'dist_length',
37
    'dist_prefix',
38
    'dist_suffix',
39
    'sim_ident',
40
    'sim_length',
41
    'sim_prefix',
42
    'sim_suffix',
43
]
44
45
46 1
def sim_ident(src, tar):
47
    """Return the identity similarity of two strings.
48
49
    Identity similarity is 1 if the two strings are identical, otherwise 0.
50
51
    :param str src: source string for comparison
52
    :param str tar: target string for comparison
53
    :returns: identity similarity
54
    :rtype: int
55
56
    >>> sim_ident('cat', 'hat')
57
    0
58
    >>> sim_ident('cat', 'cat')
59
    1
60
    """
61 1
    return int(src == tar)
62
63
64 1
def dist_ident(src, tar):
65
    """Return the identity distance between two strings.
66
67
    This is 0 if the two strings are identical, otherwise 1, i.e.
68
    :math:`dist_{identity} = 1 - sim_{identity}`.
69
70
    :param str src: source string for comparison
71
    :param str tar: target string for comparison
72
    :returns: identity distance
73
    :rtype: int
74
75
    >>> dist_ident('cat', 'hat')
76
    1
77
    >>> dist_ident('cat', 'cat')
78
    0
79
    """
80 1
    return 1 - sim_ident(src, tar)
81
82
83 1
def sim_length(src, tar):
84
    """Return the length similarity of two strings.
85
86
    Length similarity is the ratio of the length of the shorter string to the
87
    longer.
88
89
    :param str src: source string for comparison
90
    :param str tar: target string for comparison
91
    :returns: length similarity
92
    :rtype: float
93
94
    >>> sim_length('cat', 'hat')
95
    1.0
96
    >>> sim_length('Niall', 'Neil')
97
    0.8
98
    >>> sim_length('aluminum', 'Catalan')
99
    0.875
100
    >>> sim_length('ATCG', 'TAGC')
101
    1.0
102
    """
103 1
    if src == tar:
104 1
        return 1.0
105 1
    if not src or not tar:
106 1
        return 0.0
107 1
    return len(src) / len(tar) if len(src) < len(tar) else len(tar) / len(src)
108
109
110 1
def dist_length(src, tar):
111
    """Return the length distance between two strings.
112
113
    Length distance is the complement of length similarity:
114
    :math:`dist_{length} = 1 - sim_{length}`.
115
116
    :param str src: source string for comparison
117
    :param str tar: target string for comparison
118
    :returns: length distance
119
    :rtype: float
120
121
    >>> dist_length('cat', 'hat')
122
    0.0
123
    >>> dist_length('Niall', 'Neil')
124
    0.19999999999999996
125
    >>> dist_length('aluminum', 'Catalan')
126
    0.125
127
    >>> dist_length('ATCG', 'TAGC')
128
    0.0
129
    """
130 1
    return 1 - sim_length(src, tar)
131
132
133 1 View Code Duplication
def sim_prefix(src, tar):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
134
    """Return the prefix similarity of two strings.
135
136
    Prefix similarity is the ratio of the length of the shorter term that
137
    exactly matches the longer term to the length of the shorter term,
138
    beginning at the start of both terms.
139
140
    :param str src: source string for comparison
141
    :param str tar: target string for comparison
142
    :returns: prefix similarity
143
    :rtype: float
144
145
    >>> sim_prefix('cat', 'hat')
146
    0.0
147
    >>> sim_prefix('Niall', 'Neil')
148
    0.25
149
    >>> sim_prefix('aluminum', 'Catalan')
150
    0.0
151
    >>> sim_prefix('ATCG', 'TAGC')
152
    0.0
153
    """
154 1
    if src == tar:
155 1
        return 1.0
156 1
    if not src or not tar:
157 1
        return 0.0
158 1
    min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src)
159 1
    min_len = len(min_word)
160 1
    for i in range(min_len, 0, -1):
161 1
        if min_word[:i] == max_word[:i]:
162 1
            return i / min_len
163 1
    return 0.0
164
165
166 1
def dist_prefix(src, tar):
167
    """Return the prefix distance between two strings.
168
169
    Prefix distance is the complement of prefix similarity:
170
    :math:`dist_{prefix} = 1 - sim_{prefix}`.
171
172
    :param str src: source string for comparison
173
    :param str tar: target string for comparison
174
    :returns: prefix distance
175
    :rtype: float
176
177
    >>> dist_prefix('cat', 'hat')
178
    1.0
179
    >>> dist_prefix('Niall', 'Neil')
180
    0.75
181
    >>> dist_prefix('aluminum', 'Catalan')
182
    1.0
183
    >>> dist_prefix('ATCG', 'TAGC')
184
    1.0
185
    """
186 1
    return 1 - sim_prefix(src, tar)
187
188
189 1 View Code Duplication
def sim_suffix(src, tar):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
190
    """Return the suffix similarity of two strings.
191
192
    Suffix similarity is the ratio of the length of the shorter term that
193
    exactly matches the longer term to the length of the shorter term,
194
    beginning at the end of both terms.
195
196
    :param str src: source string for comparison
197
    :param str tar: target string for comparison
198
    :returns: suffix similarity
199
    :rtype: float
200
201
    >>> sim_suffix('cat', 'hat')
202
    0.6666666666666666
203
    >>> sim_suffix('Niall', 'Neil')
204
    0.25
205
    >>> sim_suffix('aluminum', 'Catalan')
206
    0.0
207
    >>> sim_suffix('ATCG', 'TAGC')
208
    0.0
209
    """
210 1
    if src == tar:
211 1
        return 1.0
212 1
    if not src or not tar:
213 1
        return 0.0
214 1
    min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src)
215 1
    min_len = len(min_word)
216 1
    for i in range(min_len, 0, -1):
217 1
        if min_word[-i:] == max_word[-i:]:
218 1
            return i / min_len
219 1
    return 0.0
220
221
222 1
def dist_suffix(src, tar):
223
    """Return the suffix distance between two strings.
224
225
    Suffix distance is the complement of suffix similarity:
226
    :math:`dist_{suffix} = 1 - sim_{suffix}`.
227
228
    :param str src: source string for comparison
229
    :param str tar: target string for comparison
230
    :returns: suffix distance
231
    :rtype: float
232
233
    >>> dist_suffix('cat', 'hat')
234
    0.33333333333333337
235
    >>> dist_suffix('Niall', 'Neil')
236
    0.75
237
    >>> dist_suffix('aluminum', 'Catalan')
238
    1.0
239
    >>> dist_suffix('ATCG', 'TAGC')
240
    1.0
241
    """
242 1
    return 1 - sim_suffix(src, tar)
243
244
245
if __name__ == '__main__':
246
    import doctest
247
248
    doctest.testmod()
249