|
@@ 2515-2546 (lines=32) @@
|
| 2512 |
|
return 1 - sim_prefix(src, tar) |
| 2513 |
|
|
| 2514 |
|
|
| 2515 |
|
def sim_suffix(src, tar): |
| 2516 |
|
"""Return the suffix similarity of two strings. |
| 2517 |
|
|
| 2518 |
|
Suffix similarity |
| 2519 |
|
|
| 2520 |
|
Suffix similarity is the ratio of the length of the shorter term that |
| 2521 |
|
exactly matches the longer term to the length of the shorter term, |
| 2522 |
|
beginning at the end of both terms. |
| 2523 |
|
|
| 2524 |
|
:param str src, tar: two strings to be compared |
| 2525 |
|
:returns: suffix similarity |
| 2526 |
|
:rtype: float |
| 2527 |
|
|
| 2528 |
|
>>> sim_suffix('cat', 'hat') |
| 2529 |
|
0.6666666666666666 |
| 2530 |
|
>>> sim_suffix('Niall', 'Neil') |
| 2531 |
|
0.25 |
| 2532 |
|
>>> sim_suffix('aluminum', 'Catalan') |
| 2533 |
|
0.0 |
| 2534 |
|
>>> sim_suffix('ATCG', 'TAGC') |
| 2535 |
|
0.0 |
| 2536 |
|
""" |
| 2537 |
|
if src == tar: |
| 2538 |
|
return 1.0 |
| 2539 |
|
if not src or not tar: |
| 2540 |
|
return 0.0 |
| 2541 |
|
min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src) |
| 2542 |
|
min_len = len(min_word) |
| 2543 |
|
for i in range(min_len, 0, -1): |
| 2544 |
|
if min_word[-i:] == max_word[-i:]: |
| 2545 |
|
return i/min_len |
| 2546 |
|
return 0.0 |
| 2547 |
|
|
| 2548 |
|
|
| 2549 |
|
def dist_suffix(src, tar): |
|
@@ 2457-2488 (lines=32) @@
|
| 2454 |
|
return 1 - sim_length(src, tar) |
| 2455 |
|
|
| 2456 |
|
|
| 2457 |
|
def sim_prefix(src, tar): |
| 2458 |
|
"""Return the prefix similarty of two strings. |
| 2459 |
|
|
| 2460 |
|
Prefix similarity |
| 2461 |
|
|
| 2462 |
|
Prefix similarity is the ratio of the length of the shorter term that |
| 2463 |
|
exactly matches the longer term to the length of the shorter term, |
| 2464 |
|
beginning at the start of both terms. |
| 2465 |
|
|
| 2466 |
|
:param str src, tar: two strings to be compared |
| 2467 |
|
:returns: prefix similarity |
| 2468 |
|
:rtype: float |
| 2469 |
|
|
| 2470 |
|
>>> sim_prefix('cat', 'hat') |
| 2471 |
|
0.0 |
| 2472 |
|
>>> sim_prefix('Niall', 'Neil') |
| 2473 |
|
0.25 |
| 2474 |
|
>>> sim_prefix('aluminum', 'Catalan') |
| 2475 |
|
0.0 |
| 2476 |
|
>>> sim_prefix('ATCG', 'TAGC') |
| 2477 |
|
0.0 |
| 2478 |
|
""" |
| 2479 |
|
if src == tar: |
| 2480 |
|
return 1.0 |
| 2481 |
|
if not src or not tar: |
| 2482 |
|
return 0.0 |
| 2483 |
|
min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src) |
| 2484 |
|
min_len = len(min_word) |
| 2485 |
|
for i in range(min_len, 0, -1): |
| 2486 |
|
if min_word[:i] == max_word[:i]: |
| 2487 |
|
return i/min_len |
| 2488 |
|
return 0.0 |
| 2489 |
|
|
| 2490 |
|
|
| 2491 |
|
def dist_prefix(src, tar): |