Conditions | 14 |
Total Lines | 219 |
Code Lines | 174 |
Lines | 0 |
Ratio | 0 % |
Tests | 44 |
CRAP Score | 14 |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like abydos.phonetic._alpha_sis.alpha_sis() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | # -*- coding: utf-8 -*- |
||
35 | 1 | def alpha_sis(word, max_length=14): |
|
36 | """Return the IBM Alpha Search Inquiry System code for a word. |
||
37 | |||
38 | The Alpha Search Inquiry System code is defined in :cite:`IBM:1973`. |
||
39 | This implementation is based on the description in :cite:`Moore:1977`. |
||
40 | |||
41 | A collection is necessary since there can be multiple values for a |
||
42 | single word. But the collection must be ordered since the first value |
||
43 | is the primary coding. |
||
44 | |||
45 | :param str word: the word to transform |
||
46 | :param int max_length: the length of the code returned (defaults to 14) |
||
47 | :returns: the Alpha SIS value |
||
48 | :rtype: tuple |
||
49 | |||
50 | >>> alpha_sis('Christopher') |
||
51 | ('06401840000000', '07040184000000', '04018400000000') |
||
52 | >>> alpha_sis('Niall') |
||
53 | ('02500000000000',) |
||
54 | >>> alpha_sis('Smith') |
||
55 | ('03100000000000',) |
||
56 | >>> alpha_sis('Schmidt') |
||
57 | ('06310000000000',) |
||
58 | """ |
||
59 | 1 | _alpha_sis_initials = { |
|
60 | 'GF': '08', |
||
61 | 'GM': '03', |
||
62 | 'GN': '02', |
||
63 | 'KN': '02', |
||
64 | 'PF': '08', |
||
65 | 'PN': '02', |
||
66 | 'PS': '00', |
||
67 | 'WR': '04', |
||
68 | 'A': '1', |
||
69 | 'E': '1', |
||
70 | 'H': '2', |
||
71 | 'I': '1', |
||
72 | 'J': '3', |
||
73 | 'O': '1', |
||
74 | 'U': '1', |
||
75 | 'W': '4', |
||
76 | 'Y': '5', |
||
77 | } |
||
78 | 1 | _alpha_sis_initials_order = ( |
|
79 | 'GF', |
||
80 | 'GM', |
||
81 | 'GN', |
||
82 | 'KN', |
||
83 | 'PF', |
||
84 | 'PN', |
||
85 | 'PS', |
||
86 | 'WR', |
||
87 | 'A', |
||
88 | 'E', |
||
89 | 'H', |
||
90 | 'I', |
||
91 | 'J', |
||
92 | 'O', |
||
93 | 'U', |
||
94 | 'W', |
||
95 | 'Y', |
||
96 | ) |
||
97 | 1 | _alpha_sis_basic = { |
|
98 | 'SCH': '6', |
||
99 | 'CZ': ('70', '6', '0'), |
||
100 | 'CH': ('6', '70', '0'), |
||
101 | 'CK': ('7', '6'), |
||
102 | 'DS': ('0', '10'), |
||
103 | 'DZ': ('0', '10'), |
||
104 | 'TS': ('0', '10'), |
||
105 | 'TZ': ('0', '10'), |
||
106 | 'CI': '0', |
||
107 | 'CY': '0', |
||
108 | 'CE': '0', |
||
109 | 'SH': '6', |
||
110 | 'DG': '7', |
||
111 | 'PH': '8', |
||
112 | 'C': ('7', '6'), |
||
113 | 'K': ('7', '6'), |
||
114 | 'Z': '0', |
||
115 | 'S': '0', |
||
116 | 'D': '1', |
||
117 | 'T': '1', |
||
118 | 'N': '2', |
||
119 | 'M': '3', |
||
120 | 'R': '4', |
||
121 | 'L': '5', |
||
122 | 'J': '6', |
||
123 | 'G': '7', |
||
124 | 'Q': '7', |
||
125 | 'X': '7', |
||
126 | 'F': '8', |
||
127 | 'V': '8', |
||
128 | 'B': '9', |
||
129 | 'P': '9', |
||
130 | } |
||
131 | 1 | _alpha_sis_basic_order = ( |
|
132 | 'SCH', |
||
133 | 'CZ', |
||
134 | 'CH', |
||
135 | 'CK', |
||
136 | 'DS', |
||
137 | 'DZ', |
||
138 | 'TS', |
||
139 | 'TZ', |
||
140 | 'CI', |
||
141 | 'CY', |
||
142 | 'CE', |
||
143 | 'SH', |
||
144 | 'DG', |
||
145 | 'PH', |
||
146 | 'C', |
||
147 | 'K', |
||
148 | 'Z', |
||
149 | 'S', |
||
150 | 'D', |
||
151 | 'T', |
||
152 | 'N', |
||
153 | 'M', |
||
154 | 'R', |
||
155 | 'L', |
||
156 | 'J', |
||
157 | 'C', |
||
158 | 'G', |
||
159 | 'K', |
||
160 | 'Q', |
||
161 | 'X', |
||
162 | 'F', |
||
163 | 'V', |
||
164 | 'B', |
||
165 | 'P', |
||
166 | ) |
||
167 | |||
168 | 1 | alpha = [''] |
|
169 | 1 | pos = 0 |
|
170 | 1 | word = unicode_normalize('NFKD', text_type(word.upper())) |
|
171 | 1 | word = word.replace('ß', 'SS') |
|
172 | 1 | word = ''.join( |
|
173 | c |
||
174 | for c in word |
||
175 | if c |
||
176 | in { |
||
177 | 'A', |
||
178 | 'B', |
||
179 | 'C', |
||
180 | 'D', |
||
181 | 'E', |
||
182 | 'F', |
||
183 | 'G', |
||
184 | 'H', |
||
185 | 'I', |
||
186 | 'J', |
||
187 | 'K', |
||
188 | 'L', |
||
189 | 'M', |
||
190 | 'N', |
||
191 | 'O', |
||
192 | 'P', |
||
193 | 'Q', |
||
194 | 'R', |
||
195 | 'S', |
||
196 | 'T', |
||
197 | 'U', |
||
198 | 'V', |
||
199 | 'W', |
||
200 | 'X', |
||
201 | 'Y', |
||
202 | 'Z', |
||
203 | } |
||
204 | ) |
||
205 | |||
206 | # Clamp max_length to [4, 64] |
||
207 | 1 | if max_length != -1: |
|
208 | 1 | max_length = min(max(4, max_length), 64) |
|
209 | else: |
||
210 | 1 | max_length = 64 |
|
211 | |||
212 | # Do special processing for initial substrings |
||
213 | 1 | for k in _alpha_sis_initials_order: |
|
214 | 1 | if word.startswith(k): |
|
215 | 1 | alpha[0] += _alpha_sis_initials[k] |
|
216 | 1 | pos += len(k) |
|
217 | 1 | break |
|
218 | |||
219 | # Add a '0' if alpha is still empty |
||
220 | 1 | if not alpha[0]: |
|
221 | 1 | alpha[0] += '0' |
|
222 | |||
223 | # Whether or not any special initial codes were encoded, iterate |
||
224 | # through the length of the word in the main encoding loop |
||
225 | 1 | while pos < len(word): |
|
226 | 1 | orig_pos = pos |
|
227 | 1 | for k in _alpha_sis_basic_order: |
|
228 | 1 | if word[pos:].startswith(k): |
|
229 | 1 | if isinstance(_alpha_sis_basic[k], tuple): |
|
230 | 1 | newalpha = [] |
|
231 | 1 | for i in range(len(_alpha_sis_basic[k])): |
|
232 | 1 | newalpha += [_ + _alpha_sis_basic[k][i] for _ in alpha] |
|
233 | 1 | alpha = newalpha |
|
234 | else: |
||
235 | 1 | alpha = [_ + _alpha_sis_basic[k] for _ in alpha] |
|
236 | 1 | pos += len(k) |
|
237 | 1 | break |
|
238 | 1 | if pos == orig_pos: |
|
239 | 1 | alpha = [_ + '_' for _ in alpha] |
|
240 | 1 | pos += 1 |
|
241 | |||
242 | # Trim doublets and placeholders |
||
243 | 1 | for i in range(len(alpha)): |
|
|
|||
244 | 1 | pos = 1 |
|
245 | 1 | while pos < len(alpha[i]): |
|
246 | 1 | if alpha[i][pos] == alpha[i][pos - 1]: |
|
247 | 1 | alpha[i] = alpha[i][:pos] + alpha[i][pos + 1 :] |
|
248 | 1 | pos += 1 |
|
249 | 1 | alpha = (_.replace('_', '') for _ in alpha) |
|
250 | |||
251 | # Trim codes and return tuple |
||
252 | 1 | alpha = ((_ + ('0' * max_length))[:max_length] for _ in alpha) |
|
253 | 1 | return tuple(alpha) |
|
254 | |||
260 |