@@ 2520-2551 (lines=32) @@ | ||
2517 | return 1 - sim_prefix(src, tar) |
|
2518 | ||
2519 | ||
2520 | def sim_suffix(src, tar): |
|
2521 | """Return the suffix similarity of two strings. |
|
2522 | ||
2523 | Suffix similarity |
|
2524 | ||
2525 | Suffix similarity is the ratio of the length of the shorter term that |
|
2526 | exactly matches the longer term to the length of the shorter term, |
|
2527 | beginning at the end of both terms. |
|
2528 | ||
2529 | :param str src, tar: two strings to be compared |
|
2530 | :returns: suffix similarity |
|
2531 | :rtype: float |
|
2532 | ||
2533 | >>> sim_suffix('cat', 'hat') |
|
2534 | 0.6666666666666666 |
|
2535 | >>> sim_suffix('Niall', 'Neil') |
|
2536 | 0.25 |
|
2537 | >>> sim_suffix('aluminum', 'Catalan') |
|
2538 | 0.0 |
|
2539 | >>> sim_suffix('ATCG', 'TAGC') |
|
2540 | 0.0 |
|
2541 | """ |
|
2542 | if src == tar: |
|
2543 | return 1.0 |
|
2544 | if not src or not tar: |
|
2545 | return 0.0 |
|
2546 | min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src) |
|
2547 | min_len = len(min_word) |
|
2548 | for i in range(min_len, 0, -1): |
|
2549 | if min_word[-i:] == max_word[-i:]: |
|
2550 | return i/min_len |
|
2551 | return 0.0 |
|
2552 | ||
2553 | ||
2554 | def dist_suffix(src, tar): |
|
@@ 2462-2493 (lines=32) @@ | ||
2459 | return 1 - sim_length(src, tar) |
|
2460 | ||
2461 | ||
2462 | def sim_prefix(src, tar): |
|
2463 | """Return the prefix similarty of two strings. |
|
2464 | ||
2465 | Prefix similarity |
|
2466 | ||
2467 | Prefix similarity is the ratio of the length of the shorter term that |
|
2468 | exactly matches the longer term to the length of the shorter term, |
|
2469 | beginning at the start of both terms. |
|
2470 | ||
2471 | :param str src, tar: two strings to be compared |
|
2472 | :returns: prefix similarity |
|
2473 | :rtype: float |
|
2474 | ||
2475 | >>> sim_prefix('cat', 'hat') |
|
2476 | 0.0 |
|
2477 | >>> sim_prefix('Niall', 'Neil') |
|
2478 | 0.25 |
|
2479 | >>> sim_prefix('aluminum', 'Catalan') |
|
2480 | 0.0 |
|
2481 | >>> sim_prefix('ATCG', 'TAGC') |
|
2482 | 0.0 |
|
2483 | """ |
|
2484 | if src == tar: |
|
2485 | return 1.0 |
|
2486 | if not src or not tar: |
|
2487 | return 0.0 |
|
2488 | min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src) |
|
2489 | min_len = len(min_word) |
|
2490 | for i in range(min_len, 0, -1): |
|
2491 | if min_word[:i] == max_word[:i]: |
|
2492 | return i/min_len |
|
2493 | return 0.0 |
|
2494 | ||
2495 | ||
2496 | def dist_prefix(src, tar): |