|
@@ 2488-2519 (lines=32) @@
|
| 2485 |
|
return 1 - sim_prefix(src, tar) |
| 2486 |
|
|
| 2487 |
|
|
| 2488 |
|
def sim_suffix(src, tar): |
| 2489 |
|
"""Return the suffix similarity of two strings. |
| 2490 |
|
|
| 2491 |
|
Suffix similarity is the ratio of the length of the shorter term that |
| 2492 |
|
exactly matches the longer term to the length of the shorter term, |
| 2493 |
|
beginning at the end of both terms. |
| 2494 |
|
|
| 2495 |
|
:param str src, tar: two strings to be compared |
| 2496 |
|
:returns: suffix similarity |
| 2497 |
|
:rtype: float |
| 2498 |
|
|
| 2499 |
|
>>> sim_suffix('cat', 'hat') |
| 2500 |
|
0.6666666666666666 |
| 2501 |
|
>>> sim_suffix('Niall', 'Neil') |
| 2502 |
|
0.25 |
| 2503 |
|
>>> sim_suffix('aluminum', 'Catalan') |
| 2504 |
|
0.0 |
| 2505 |
|
>>> sim_suffix('ATCG', 'TAGC') |
| 2506 |
|
0.0 |
| 2507 |
|
""" |
| 2508 |
|
if src == tar: |
| 2509 |
|
return 1.0 |
| 2510 |
|
if not src or not tar: |
| 2511 |
|
return 0.0 |
| 2512 |
|
min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src) |
| 2513 |
|
min_len = len(min_word) |
| 2514 |
|
for i in range(min_len, 0, -1): |
| 2515 |
|
if min_word[-i:] == max_word[-i:]: |
| 2516 |
|
return i/min_len |
| 2517 |
|
return 0.0 |
| 2518 |
|
|
| 2519 |
|
|
| 2520 |
|
def dist_suffix(src, tar): |
| 2521 |
|
"""Return the suffix distance between two strings. |
| 2522 |
|
|
|
@@ 2434-2465 (lines=32) @@
|
| 2431 |
|
return 1 - sim_length(src, tar) |
| 2432 |
|
|
| 2433 |
|
|
| 2434 |
|
def sim_prefix(src, tar): |
| 2435 |
|
"""Return the prefix similarty of two strings. |
| 2436 |
|
|
| 2437 |
|
Prefix similarity is the ratio of the length of the shorter term that |
| 2438 |
|
exactly matches the longer term to the length of the shorter term, |
| 2439 |
|
beginning at the start of both terms. |
| 2440 |
|
|
| 2441 |
|
:param str src, tar: two strings to be compared |
| 2442 |
|
:returns: prefix similarity |
| 2443 |
|
:rtype: float |
| 2444 |
|
|
| 2445 |
|
>>> sim_prefix('cat', 'hat') |
| 2446 |
|
0.0 |
| 2447 |
|
>>> sim_prefix('Niall', 'Neil') |
| 2448 |
|
0.25 |
| 2449 |
|
>>> sim_prefix('aluminum', 'Catalan') |
| 2450 |
|
0.0 |
| 2451 |
|
>>> sim_prefix('ATCG', 'TAGC') |
| 2452 |
|
0.0 |
| 2453 |
|
""" |
| 2454 |
|
if src == tar: |
| 2455 |
|
return 1.0 |
| 2456 |
|
if not src or not tar: |
| 2457 |
|
return 0.0 |
| 2458 |
|
min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src) |
| 2459 |
|
min_len = len(min_word) |
| 2460 |
|
for i in range(min_len, 0, -1): |
| 2461 |
|
if min_word[:i] == max_word[:i]: |
| 2462 |
|
return i/min_len |
| 2463 |
|
return 0.0 |
| 2464 |
|
|
| 2465 |
|
|
| 2466 |
|
def dist_prefix(src, tar): |
| 2467 |
|
"""Return the prefix distance between two strings. |
| 2468 |
|
|