@@ 2514-2545 (lines=32) @@ | ||
2511 | return 1 - sim_prefix(src, tar) |
|
2512 | ||
2513 | ||
2514 | def sim_suffix(src, tar): |
|
2515 | """Return the suffix similarity of two strings. |
|
2516 | ||
2517 | Suffix similarity |
|
2518 | ||
2519 | Suffix similarity is the ratio of the length of the shorter term that |
|
2520 | exactly matches the longer term to the length of the shorter term, |
|
2521 | beginning at the end of both terms. |
|
2522 | ||
2523 | :param str src, tar: two strings to be compared |
|
2524 | :returns: suffix similarity |
|
2525 | :rtype: float |
|
2526 | ||
2527 | >>> sim_suffix('cat', 'hat') |
|
2528 | 0.6666666666666666 |
|
2529 | >>> sim_suffix('Niall', 'Neil') |
|
2530 | 0.25 |
|
2531 | >>> sim_suffix('aluminum', 'Catalan') |
|
2532 | 0.0 |
|
2533 | >>> sim_suffix('ATCG', 'TAGC') |
|
2534 | 0.0 |
|
2535 | """ |
|
2536 | if src == tar: |
|
2537 | return 1.0 |
|
2538 | if not src or not tar: |
|
2539 | return 0.0 |
|
2540 | min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src) |
|
2541 | min_len = len(min_word) |
|
2542 | for i in range(min_len, 0, -1): |
|
2543 | if min_word[-i:] == max_word[-i:]: |
|
2544 | return i/min_len |
|
2545 | return 0.0 |
|
2546 | ||
2547 | ||
2548 | def dist_suffix(src, tar): |
|
@@ 2456-2487 (lines=32) @@ | ||
2453 | return 1 - sim_length(src, tar) |
|
2454 | ||
2455 | ||
2456 | def sim_prefix(src, tar): |
|
2457 | """Return the prefix similarty of two strings. |
|
2458 | ||
2459 | Prefix similarity |
|
2460 | ||
2461 | Prefix similarity is the ratio of the length of the shorter term that |
|
2462 | exactly matches the longer term to the length of the shorter term, |
|
2463 | beginning at the start of both terms. |
|
2464 | ||
2465 | :param str src, tar: two strings to be compared |
|
2466 | :returns: prefix similarity |
|
2467 | :rtype: float |
|
2468 | ||
2469 | >>> sim_prefix('cat', 'hat') |
|
2470 | 0.0 |
|
2471 | >>> sim_prefix('Niall', 'Neil') |
|
2472 | 0.25 |
|
2473 | >>> sim_prefix('aluminum', 'Catalan') |
|
2474 | 0.0 |
|
2475 | >>> sim_prefix('ATCG', 'TAGC') |
|
2476 | 0.0 |
|
2477 | """ |
|
2478 | if src == tar: |
|
2479 | return 1.0 |
|
2480 | if not src or not tar: |
|
2481 | return 0.0 |
|
2482 | min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src) |
|
2483 | min_len = len(min_word) |
|
2484 | for i in range(min_len, 0, -1): |
|
2485 | if min_word[:i] == max_word[:i]: |
|
2486 | return i/min_len |
|
2487 | return 0.0 |
|
2488 | ||
2489 | ||
2490 | def dist_prefix(src, tar): |