@@ 38-76 (lines=39) @@ | ||
35 | __all__ = ['Prefix', 'dist_prefix', 'sim_prefix'] |
|
36 | ||
37 | ||
38 | class Prefix(_Distance): |
|
39 | """Prefix similiarity and distance.""" |
|
40 | ||
41 | def sim(self, src, tar): |
|
42 | """Return the prefix similarity of two strings. |
|
43 | ||
44 | Prefix similarity is the ratio of the length of the shorter term that |
|
45 | exactly matches the longer term to the length of the shorter term, |
|
46 | beginning at the start of both terms. |
|
47 | ||
48 | Parameters |
|
49 | ---------- |
|
50 | src : str |
|
51 | Source string for comparison |
|
52 | tar : str |
|
53 | Target string for comparison |
|
54 | ||
55 | Returns |
|
56 | ------- |
|
57 | float |
|
58 | Prefix similarity |
|
59 | ||
60 | Examples |
|
61 | -------- |
|
62 | >>> cmp = Prefix() |
|
63 | >>> cmp.sim('cat', 'hat') |
|
64 | 0.0 |
|
65 | >>> cmp.sim('Niall', 'Neil') |
|
66 | 0.25 |
|
67 | >>> cmp.sim('aluminum', 'Catalan') |
|
68 | 0.0 |
|
69 | >>> cmp.sim('ATCG', 'TAGC') |
|
70 | 0.0 |
|
71 | ||
72 | """ |
|
73 | if src == tar: |
|
74 | return 1.0 |
|
75 | if not src or not tar: |
|
76 | return 0.0 |
|
77 | min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src) |
|
78 | min_len = len(min_word) |
|
79 | for i in range(min_len, 0, -1): |
@@ 38-76 (lines=39) @@ | ||
35 | __all__ = ['Suffix', 'dist_suffix', 'sim_suffix'] |
|
36 | ||
37 | ||
38 | class Suffix(_Distance): |
|
39 | """Suffix similarity and distance.""" |
|
40 | ||
41 | def sim(self, src, tar): |
|
42 | """Return the suffix similarity of two strings. |
|
43 | ||
44 | Suffix similarity is the ratio of the length of the shorter term that |
|
45 | exactly matches the longer term to the length of the shorter term, |
|
46 | beginning at the end of both terms. |
|
47 | ||
48 | Parameters |
|
49 | ---------- |
|
50 | src : str |
|
51 | Source string for comparison |
|
52 | tar : str |
|
53 | Target string for comparison |
|
54 | ||
55 | Returns |
|
56 | ------- |
|
57 | float |
|
58 | Suffix similarity |
|
59 | ||
60 | Examples |
|
61 | -------- |
|
62 | >>> cmp = Suffix() |
|
63 | >>> cmp.sim('cat', 'hat') |
|
64 | 0.6666666666666666 |
|
65 | >>> cmp.sim('Niall', 'Neil') |
|
66 | 0.25 |
|
67 | >>> cmp.sim('aluminum', 'Catalan') |
|
68 | 0.0 |
|
69 | >>> cmp.sim('ATCG', 'TAGC') |
|
70 | 0.0 |
|
71 | ||
72 | """ |
|
73 | if src == tar: |
|
74 | return 1.0 |
|
75 | if not src or not tar: |
|
76 | return 0.0 |
|
77 | min_word, max_word = (src, tar) if len(src) < len(tar) else (tar, src) |
|
78 | min_len = len(min_word) |
|
79 | for i in range(min_len, 0, -1): |