@@ 2614-2645 (lines=32) @@ | ||
2611 | return 1 - sim_prefix(src, tar) |
|
2612 | ||
2613 | ||
2614 | def sim_suffix(src, tar): |
|
2615 | """Return the suffix similarity of two strings. |
|
2616 | ||
2617 | Suffix similarity is the ratio of the length of the shorter term that |
|
2618 | exactly matches the longer term to the length of the shorter term, |
|
2619 | beginning at the end of both terms. |
|
2620 | ||
2621 | :param str src: source string for comparison |
|
2622 | :param str tar: target string for comparison |
|
2623 | :returns: suffix similarity |
|
2624 | :rtype: float |
|
2625 | ||
2626 | >>> sim_suffix('cat', 'hat') |
|
2627 | 0.6666666666666666 |
|
2628 | >>> sim_suffix('Niall', 'Neil') |
|
2629 | 0.25 |
|
2630 | >>> sim_suffix('aluminum', 'Catalan') |
|
2631 | 0.0 |
|
2632 | >>> sim_suffix('ATCG', 'TAGC') |
|
2633 | 0.0 |
|
2634 | """ |
|
2635 | if src == tar: |
|
2636 | return 1.0 |
|
2637 | if not src or not tar: |
|
2638 | return 0.0 |
|
2639 | min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src) |
|
2640 | min_len = len(min_word) |
|
2641 | for i in range(min_len, 0, -1): |
|
2642 | if min_word[-i:] == max_word[-i:]: |
|
2643 | return i/min_len |
|
2644 | return 0.0 |
|
2645 | ||
2646 | ||
2647 | def dist_suffix(src, tar): |
|
2648 | """Return the suffix distance between two strings. |
|
@@ 2558-2589 (lines=32) @@ | ||
2555 | return 1 - sim_length(src, tar) |
|
2556 | ||
2557 | ||
2558 | def sim_prefix(src, tar): |
|
2559 | """Return the prefix similarity of two strings. |
|
2560 | ||
2561 | Prefix similarity is the ratio of the length of the shorter term that |
|
2562 | exactly matches the longer term to the length of the shorter term, |
|
2563 | beginning at the start of both terms. |
|
2564 | ||
2565 | :param str src: source string for comparison |
|
2566 | :param str tar: target string for comparison |
|
2567 | :returns: prefix similarity |
|
2568 | :rtype: float |
|
2569 | ||
2570 | >>> sim_prefix('cat', 'hat') |
|
2571 | 0.0 |
|
2572 | >>> sim_prefix('Niall', 'Neil') |
|
2573 | 0.25 |
|
2574 | >>> sim_prefix('aluminum', 'Catalan') |
|
2575 | 0.0 |
|
2576 | >>> sim_prefix('ATCG', 'TAGC') |
|
2577 | 0.0 |
|
2578 | """ |
|
2579 | if src == tar: |
|
2580 | return 1.0 |
|
2581 | if not src or not tar: |
|
2582 | return 0.0 |
|
2583 | min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src) |
|
2584 | min_len = len(min_word) |
|
2585 | for i in range(min_len, 0, -1): |
|
2586 | if min_word[:i] == max_word[:i]: |
|
2587 | return i/min_len |
|
2588 | return 0.0 |
|
2589 | ||
2590 | ||
2591 | def dist_prefix(src, tar): |
|
2592 | """Return the prefix distance between two strings. |