| Conditions | 11 |
| Total Lines | 91 |
| Code Lines | 57 |
| Lines | 0 |
| Ratio | 0 % |
| Tests | 24 |
| CRAP Score | 11 |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like abydos.phonetic._pt.soundex_br() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | # -*- coding: utf-8 -*- |
||
| 38 | 1 | def soundex_br(word, max_length=4, zero_pad=True): |
|
| 39 | """Return the SoundexBR encoding of a word. |
||
| 40 | |||
| 41 | This is based on :cite:`Marcelino:2015`. |
||
| 42 | |||
| 43 | :param str word: the word to transform |
||
| 44 | :param int max_length: the length of the code returned (defaults to 4) |
||
| 45 | :param bool zero_pad: pad the end of the return value with 0s to achieve a |
||
| 46 | max_length string |
||
| 47 | :returns: the SoundexBR code |
||
| 48 | :rtype: str |
||
| 49 | |||
| 50 | >>> soundex_br('Oliveira') |
||
| 51 | 'O416' |
||
| 52 | >>> soundex_br('Almeida') |
||
| 53 | 'A453' |
||
| 54 | >>> soundex_br('Barbosa') |
||
| 55 | 'B612' |
||
| 56 | >>> soundex_br('Araújo') |
||
| 57 | 'A620' |
||
| 58 | >>> soundex_br('Gonçalves') |
||
| 59 | 'G524' |
||
| 60 | >>> soundex_br('Goncalves') |
||
| 61 | 'G524' |
||
| 62 | """ |
||
| 63 | 1 | _soundex_br_translation = dict( |
|
| 64 | zip( |
||
| 65 | (ord(_) for _ in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'), |
||
|
|
|||
| 66 | '01230120022455012623010202', |
||
| 67 | ) |
||
| 68 | ) |
||
| 69 | |||
| 70 | 1 | word = unicode_normalize('NFKD', text_type(word.upper())) |
|
| 71 | 1 | word = ''.join( |
|
| 72 | c |
||
| 73 | for c in word |
||
| 74 | if c |
||
| 75 | in { |
||
| 76 | 'A', |
||
| 77 | 'B', |
||
| 78 | 'C', |
||
| 79 | 'D', |
||
| 80 | 'E', |
||
| 81 | 'F', |
||
| 82 | 'G', |
||
| 83 | 'H', |
||
| 84 | 'I', |
||
| 85 | 'J', |
||
| 86 | 'K', |
||
| 87 | 'L', |
||
| 88 | 'M', |
||
| 89 | 'N', |
||
| 90 | 'O', |
||
| 91 | 'P', |
||
| 92 | 'Q', |
||
| 93 | 'R', |
||
| 94 | 'S', |
||
| 95 | 'T', |
||
| 96 | 'U', |
||
| 97 | 'V', |
||
| 98 | 'W', |
||
| 99 | 'X', |
||
| 100 | 'Y', |
||
| 101 | 'Z', |
||
| 102 | } |
||
| 103 | ) |
||
| 104 | |||
| 105 | 1 | if word[:2] == 'WA': |
|
| 106 | 1 | first = 'V' |
|
| 107 | 1 | elif word[:1] == 'K' and word[1:2] in {'A', 'O', 'U'}: |
|
| 108 | 1 | first = 'C' |
|
| 109 | 1 | elif word[:1] == 'C' and word[1:2] in {'I', 'E'}: |
|
| 110 | 1 | first = 'S' |
|
| 111 | 1 | elif word[:1] == 'G' and word[1:2] in {'E', 'I'}: |
|
| 112 | 1 | first = 'J' |
|
| 113 | 1 | elif word[:1] == 'Y': |
|
| 114 | 1 | first = 'I' |
|
| 115 | 1 | elif word[:1] == 'H': |
|
| 116 | 1 | first = word[1:2] |
|
| 117 | 1 | word = word[1:] |
|
| 118 | else: |
||
| 119 | 1 | first = word[:1] |
|
| 120 | |||
| 121 | 1 | sdx = first + word[1:].translate(_soundex_br_translation) |
|
| 122 | 1 | sdx = _delete_consecutive_repeats(sdx) |
|
| 123 | 1 | sdx = sdx.replace('0', '') |
|
| 124 | |||
| 125 | 1 | if zero_pad: |
|
| 126 | 1 | sdx += '0' * max_length |
|
| 127 | |||
| 128 | 1 | return sdx[:max_length] |
|
| 129 | |||
| 135 |