|
@@ 189-219 (lines=31) @@
|
| 186 |
|
return 1 - sim_prefix(src, tar) |
| 187 |
|
|
| 188 |
|
|
| 189 |
|
def sim_suffix(src, tar): |
| 190 |
|
"""Return the suffix similarity of two strings. |
| 191 |
|
|
| 192 |
|
Suffix similarity is the ratio of the length of the shorter term that |
| 193 |
|
exactly matches the longer term to the length of the shorter term, |
| 194 |
|
beginning at the end of both terms. |
| 195 |
|
|
| 196 |
|
:param str src: source string for comparison |
| 197 |
|
:param str tar: target string for comparison |
| 198 |
|
:returns: suffix similarity |
| 199 |
|
:rtype: float |
| 200 |
|
|
| 201 |
|
>>> sim_suffix('cat', 'hat') |
| 202 |
|
0.6666666666666666 |
| 203 |
|
>>> sim_suffix('Niall', 'Neil') |
| 204 |
|
0.25 |
| 205 |
|
>>> sim_suffix('aluminum', 'Catalan') |
| 206 |
|
0.0 |
| 207 |
|
>>> sim_suffix('ATCG', 'TAGC') |
| 208 |
|
0.0 |
| 209 |
|
""" |
| 210 |
|
if src == tar: |
| 211 |
|
return 1.0 |
| 212 |
|
if not src or not tar: |
| 213 |
|
return 0.0 |
| 214 |
|
min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src) |
| 215 |
|
min_len = len(min_word) |
| 216 |
|
for i in range(min_len, 0, -1): |
| 217 |
|
if min_word[-i:] == max_word[-i:]: |
| 218 |
|
return i / min_len |
| 219 |
|
return 0.0 |
| 220 |
|
|
| 221 |
|
|
| 222 |
|
def dist_suffix(src, tar): |
|
@@ 133-163 (lines=31) @@
|
| 130 |
|
return 1 - sim_length(src, tar) |
| 131 |
|
|
| 132 |
|
|
| 133 |
|
def sim_prefix(src, tar): |
| 134 |
|
"""Return the prefix similarity of two strings. |
| 135 |
|
|
| 136 |
|
Prefix similarity is the ratio of the length of the shorter term that |
| 137 |
|
exactly matches the longer term to the length of the shorter term, |
| 138 |
|
beginning at the start of both terms. |
| 139 |
|
|
| 140 |
|
:param str src: source string for comparison |
| 141 |
|
:param str tar: target string for comparison |
| 142 |
|
:returns: prefix similarity |
| 143 |
|
:rtype: float |
| 144 |
|
|
| 145 |
|
>>> sim_prefix('cat', 'hat') |
| 146 |
|
0.0 |
| 147 |
|
>>> sim_prefix('Niall', 'Neil') |
| 148 |
|
0.25 |
| 149 |
|
>>> sim_prefix('aluminum', 'Catalan') |
| 150 |
|
0.0 |
| 151 |
|
>>> sim_prefix('ATCG', 'TAGC') |
| 152 |
|
0.0 |
| 153 |
|
""" |
| 154 |
|
if src == tar: |
| 155 |
|
return 1.0 |
| 156 |
|
if not src or not tar: |
| 157 |
|
return 0.0 |
| 158 |
|
min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src) |
| 159 |
|
min_len = len(min_word) |
| 160 |
|
for i in range(min_len, 0, -1): |
| 161 |
|
if min_word[:i] == max_word[:i]: |
| 162 |
|
return i / min_len |
| 163 |
|
return 0.0 |
| 164 |
|
|
| 165 |
|
|
| 166 |
|
def dist_prefix(src, tar): |