@@ 2513-2544 (lines=32) @@ | ||
2510 | return 1 - sim_prefix(src, tar) |
|
2511 | ||
2512 | ||
2513 | def sim_suffix(src, tar): |
|
2514 | """Return the suffix similarity of two strings. |
|
2515 | ||
2516 | Suffix similarity is the ratio of the length of the shorter term that |
|
2517 | exactly matches the longer term to the length of the shorter term, |
|
2518 | beginning at the end of both terms. |
|
2519 | ||
2520 | :param str src, tar: two strings to be compared |
|
2521 | :returns: suffix similarity |
|
2522 | :rtype: float |
|
2523 | ||
2524 | >>> sim_suffix('cat', 'hat') |
|
2525 | 0.6666666666666666 |
|
2526 | >>> sim_suffix('Niall', 'Neil') |
|
2527 | 0.25 |
|
2528 | >>> sim_suffix('aluminum', 'Catalan') |
|
2529 | 0.0 |
|
2530 | >>> sim_suffix('ATCG', 'TAGC') |
|
2531 | 0.0 |
|
2532 | """ |
|
2533 | if src == tar: |
|
2534 | return 1.0 |
|
2535 | if not src or not tar: |
|
2536 | return 0.0 |
|
2537 | min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src) |
|
2538 | min_len = len(min_word) |
|
2539 | for i in range(min_len, 0, -1): |
|
2540 | if min_word[-i:] == max_word[-i:]: |
|
2541 | return i/min_len |
|
2542 | return 0.0 |
|
2543 | ||
2544 | ||
2545 | def dist_suffix(src, tar): |
|
2546 | """Return the suffix distance between two strings. |
|
2547 | ||
@@ 2459-2490 (lines=32) @@ | ||
2456 | return 1 - sim_length(src, tar) |
|
2457 | ||
2458 | ||
2459 | def sim_prefix(src, tar): |
|
2460 | """Return the prefix similarty of two strings. |
|
2461 | ||
2462 | Prefix similarity is the ratio of the length of the shorter term that |
|
2463 | exactly matches the longer term to the length of the shorter term, |
|
2464 | beginning at the start of both terms. |
|
2465 | ||
2466 | :param str src, tar: two strings to be compared |
|
2467 | :returns: prefix similarity |
|
2468 | :rtype: float |
|
2469 | ||
2470 | >>> sim_prefix('cat', 'hat') |
|
2471 | 0.0 |
|
2472 | >>> sim_prefix('Niall', 'Neil') |
|
2473 | 0.25 |
|
2474 | >>> sim_prefix('aluminum', 'Catalan') |
|
2475 | 0.0 |
|
2476 | >>> sim_prefix('ATCG', 'TAGC') |
|
2477 | 0.0 |
|
2478 | """ |
|
2479 | if src == tar: |
|
2480 | return 1.0 |
|
2481 | if not src or not tar: |
|
2482 | return 0.0 |
|
2483 | min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src) |
|
2484 | min_len = len(min_word) |
|
2485 | for i in range(min_len, 0, -1): |
|
2486 | if min_word[:i] == max_word[:i]: |
|
2487 | return i/min_len |
|
2488 | return 0.0 |
|
2489 | ||
2490 | ||
2491 | def dist_prefix(src, tar): |
|
2492 | """Return the prefix distance between two strings. |
|
2493 |