@@ 2489-2520 (lines=32) @@ | ||
2486 | return 1 - sim_prefix(src, tar) |
|
2487 | ||
2488 | ||
2489 | def sim_suffix(src, tar): |
|
2490 | """Return the suffix similarity of two strings. |
|
2491 | ||
2492 | Suffix similarity is the ratio of the length of the shorter term that |
|
2493 | exactly matches the longer term to the length of the shorter term, |
|
2494 | beginning at the end of both terms. |
|
2495 | ||
2496 | :param str src, tar: two strings to be compared |
|
2497 | :returns: suffix similarity |
|
2498 | :rtype: float |
|
2499 | ||
2500 | >>> sim_suffix('cat', 'hat') |
|
2501 | 0.6666666666666666 |
|
2502 | >>> sim_suffix('Niall', 'Neil') |
|
2503 | 0.25 |
|
2504 | >>> sim_suffix('aluminum', 'Catalan') |
|
2505 | 0.0 |
|
2506 | >>> sim_suffix('ATCG', 'TAGC') |
|
2507 | 0.0 |
|
2508 | """ |
|
2509 | if src == tar: |
|
2510 | return 1.0 |
|
2511 | if not src or not tar: |
|
2512 | return 0.0 |
|
2513 | min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src) |
|
2514 | min_len = len(min_word) |
|
2515 | for i in range(min_len, 0, -1): |
|
2516 | if min_word[-i:] == max_word[-i:]: |
|
2517 | return i/min_len |
|
2518 | return 0.0 |
|
2519 | ||
2520 | ||
2521 | def dist_suffix(src, tar): |
|
2522 | """Return the suffix distance between two strings. |
|
2523 | ||
@@ 2435-2466 (lines=32) @@ | ||
2432 | return 1 - sim_length(src, tar) |
|
2433 | ||
2434 | ||
2435 | def sim_prefix(src, tar): |
|
2436 | """Return the prefix similarty of two strings. |
|
2437 | ||
2438 | Prefix similarity is the ratio of the length of the shorter term that |
|
2439 | exactly matches the longer term to the length of the shorter term, |
|
2440 | beginning at the start of both terms. |
|
2441 | ||
2442 | :param str src, tar: two strings to be compared |
|
2443 | :returns: prefix similarity |
|
2444 | :rtype: float |
|
2445 | ||
2446 | >>> sim_prefix('cat', 'hat') |
|
2447 | 0.0 |
|
2448 | >>> sim_prefix('Niall', 'Neil') |
|
2449 | 0.25 |
|
2450 | >>> sim_prefix('aluminum', 'Catalan') |
|
2451 | 0.0 |
|
2452 | >>> sim_prefix('ATCG', 'TAGC') |
|
2453 | 0.0 |
|
2454 | """ |
|
2455 | if src == tar: |
|
2456 | return 1.0 |
|
2457 | if not src or not tar: |
|
2458 | return 0.0 |
|
2459 | min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src) |
|
2460 | min_len = len(min_word) |
|
2461 | for i in range(min_len, 0, -1): |
|
2462 | if min_word[:i] == max_word[:i]: |
|
2463 | return i/min_len |
|
2464 | return 0.0 |
|
2465 | ||
2466 | ||
2467 | def dist_prefix(src, tar): |
|
2468 | """Return the prefix distance between two strings. |
|
2469 |