Conditions | 24 |
Total Lines | 186 |
Code Lines | 59 |
Lines | 0 |
Ratio | 0 % |
Tests | 58 |
CRAP Score | 24 |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like abydos.phonetic._phonix.Phonix.encode() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | # -*- coding: utf-8 -*- |
||
182 | 1 | def encode(self, word, max_length=4, zero_pad=True): |
|
183 | """Return the Phonix code for a word. |
||
184 | |||
185 | Parameters |
||
186 | ---------- |
||
187 | word : str |
||
188 | The word to transform |
||
189 | max_length : int |
||
190 | The length of the code returned (defaults to 4) |
||
191 | zero_pad : bool |
||
192 | Pad the end of the return value with 0s to achieve a max_length |
||
193 | string |
||
194 | |||
195 | Returns |
||
196 | ------- |
||
197 | str |
||
198 | The Phonix value |
||
199 | |||
200 | Examples |
||
201 | -------- |
||
202 | >>> pe = Phonix() |
||
203 | >>> pe.encode('Christopher') |
||
204 | 'K683' |
||
205 | >>> pe.encode('Niall') |
||
206 | 'N400' |
||
207 | >>> pe.encode('Smith') |
||
208 | 'S530' |
||
209 | >>> pe.encode('Schmidt') |
||
210 | 'S530' |
||
211 | |||
212 | """ |
||
213 | |||
214 | 1 | def _start_repl(word, src, tar, post=None): |
|
215 | """Replace src with tar at the start of word. |
||
216 | |||
217 | Parameters |
||
218 | ---------- |
||
219 | word : str |
||
220 | The word to modify |
||
221 | src : str |
||
222 | Substring to match |
||
223 | tar : str |
||
224 | Substring to substitute |
||
225 | post : set |
||
226 | Following characters |
||
227 | |||
228 | Returns |
||
229 | ------- |
||
230 | str |
||
231 | Modified string |
||
232 | |||
233 | """ |
||
234 | 1 | if post: |
|
235 | 1 | for i in post: |
|
236 | 1 | if word.startswith(src + i): |
|
237 | 1 | return tar + word[len(src) :] |
|
238 | 1 | elif word.startswith(src): |
|
239 | 1 | return tar + word[len(src) :] |
|
240 | 1 | return word |
|
241 | |||
242 | 1 | def _end_repl(word, src, tar, pre=None): |
|
243 | """Replace src with tar at the end of word. |
||
244 | |||
245 | Parameters |
||
246 | ---------- |
||
247 | word : str |
||
248 | The word to modify |
||
249 | src : str |
||
250 | Substring to match |
||
251 | tar : str |
||
252 | Substring to substitute |
||
253 | pre : set |
||
254 | Preceding characters |
||
255 | |||
256 | Returns |
||
257 | ------- |
||
258 | str |
||
259 | Modified string |
||
260 | |||
261 | """ |
||
262 | 1 | if pre: |
|
263 | 1 | for i in pre: |
|
264 | 1 | if word.endswith(i + src): |
|
265 | 1 | return word[: -len(src)] + tar |
|
266 | 1 | elif word.endswith(src): |
|
267 | 1 | return word[: -len(src)] + tar |
|
268 | 1 | return word |
|
269 | |||
270 | 1 | def _mid_repl(word, src, tar, pre=None, post=None): |
|
271 | """Replace src with tar in the middle of word. |
||
272 | |||
273 | Parameters |
||
274 | ---------- |
||
275 | word : str |
||
276 | The word to modify |
||
277 | src : str |
||
278 | Substring to match |
||
279 | tar : str |
||
280 | Substring to substitute |
||
281 | pre : set |
||
282 | Preceding characters |
||
283 | post : set |
||
284 | Following characters |
||
285 | |||
286 | Returns |
||
287 | ------- |
||
288 | str |
||
289 | Modified string |
||
290 | |||
291 | """ |
||
292 | 1 | if pre or post: |
|
293 | 1 | if not pre: |
|
294 | 1 | return word[0] + _all_repl(word[1:], src, tar, pre, post) |
|
295 | 1 | elif not post: |
|
296 | 1 | return _all_repl(word[:-1], src, tar, pre, post) + word[-1] |
|
297 | 1 | return _all_repl(word, src, tar, pre, post) |
|
298 | 1 | return ( |
|
299 | word[0] + _all_repl(word[1:-1], src, tar, pre, post) + word[-1] |
||
300 | ) |
||
301 | |||
302 | 1 | def _all_repl(word, src, tar, pre=None, post=None): |
|
303 | """Replace src with tar anywhere in word. |
||
304 | |||
305 | Parameters |
||
306 | ---------- |
||
307 | word : str |
||
308 | The word to modify |
||
309 | src : str |
||
310 | Substring to match |
||
311 | tar : str |
||
312 | Substring to substitute |
||
313 | pre : set |
||
314 | Preceding characters |
||
315 | post : set |
||
316 | Following characters |
||
317 | |||
318 | Returns |
||
319 | ------- |
||
320 | str |
||
321 | Modified string |
||
322 | |||
323 | """ |
||
324 | 1 | if pre or post: |
|
325 | 1 | if post: |
|
326 | 1 | post = post |
|
327 | else: |
||
328 | 1 | post = frozenset(('',)) |
|
329 | 1 | if pre: |
|
330 | 1 | pre = pre |
|
331 | else: |
||
332 | 1 | pre = frozenset(('',)) |
|
333 | |||
334 | 1 | for i, j in ((i, j) for i in pre for j in post): |
|
335 | 1 | word = word.replace(i + src + j, i + tar + j) |
|
336 | 1 | return word |
|
337 | else: |
||
338 | 1 | return word.replace(src, tar) |
|
339 | |||
340 | 1 | repl_at = (_start_repl, _end_repl, _mid_repl, _all_repl) |
|
341 | |||
342 | 1 | sdx = '' |
|
343 | |||
344 | 1 | word = unicode_normalize('NFKD', text_type(word.upper())) |
|
345 | 1 | word = word.replace('ß', 'SS') |
|
346 | 1 | word = ''.join(c for c in word if c in self._uc_set) |
|
347 | 1 | if word: |
|
348 | 1 | for trans in self._substitutions: |
|
349 | 1 | word = repl_at[trans[0]](word, *trans[1:]) |
|
350 | 1 | if word[0] in self._uc_vy_set: |
|
351 | 1 | sdx = 'v' + word[1:].translate(self._trans) |
|
352 | else: |
||
353 | 1 | sdx = word[0] + word[1:].translate(self._trans) |
|
354 | 1 | sdx = self._delete_consecutive_repeats(sdx) |
|
355 | 1 | sdx = sdx.replace('0', '') |
|
356 | |||
357 | # Clamp max_length to [4, 64] |
||
358 | 1 | if max_length != -1: |
|
359 | 1 | max_length = min(max(4, max_length), 64) |
|
360 | else: |
||
361 | 1 | max_length = 64 |
|
362 | |||
363 | 1 | if zero_pad: |
|
364 | 1 | sdx += '0' * max_length |
|
365 | 1 | if not sdx: |
|
366 | 1 | sdx = '0' |
|
367 | 1 | return sdx[:max_length] |
|
368 | |||
408 |