| Conditions | 21 |
| Total Lines | 147 |
| Code Lines | 81 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like abydos.phonetic.spfc.spfc() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | # -*- coding: utf-8 -*- |
||
| 37 | def spfc(word): |
||
| 38 | """Return the Standardized Phonetic Frequency Code (SPFC) of a word. |
||
| 39 | |||
| 40 | Standardized Phonetic Frequency Code is roughly Soundex-like. |
||
| 41 | This implementation is based on page 19-21 of :cite:`Moore:1977`. |
||
| 42 | |||
| 43 | :param str word: the word to transform |
||
| 44 | :returns: the SPFC value |
||
| 45 | :rtype: str |
||
| 46 | |||
| 47 | >>> spfc('Christopher Smith') |
||
| 48 | '01160' |
||
| 49 | >>> spfc('Christopher Schmidt') |
||
| 50 | '01160' |
||
| 51 | >>> spfc('Niall Smith') |
||
| 52 | '01660' |
||
| 53 | >>> spfc('Niall Schmidt') |
||
| 54 | '01660' |
||
| 55 | |||
| 56 | >>> spfc('L.Smith') |
||
| 57 | '01960' |
||
| 58 | >>> spfc('R.Miller') |
||
| 59 | '65490' |
||
| 60 | |||
| 61 | >>> spfc(('L', 'Smith')) |
||
| 62 | '01960' |
||
| 63 | >>> spfc(('R', 'Miller')) |
||
| 64 | '65490' |
||
| 65 | """ |
||
| 66 | _pf1 = dict(zip((ord(_) for _ in 'SZCKQVFPUWABLORDHIEMNXGJT'), |
||
|
|
|||
| 67 | '0011112222334445556666777')) |
||
| 68 | _pf2 = dict(zip((ord(_) for _ in |
||
| 69 | 'SZCKQFPXABORDHIMNGJTUVWEL'), |
||
| 70 | '0011122233445556677788899')) |
||
| 71 | _pf3 = dict(zip((ord(_) for _ in |
||
| 72 | 'BCKQVDTFLPGJXMNRSZAEHIOUWY'), |
||
| 73 | '00000112223334456677777777')) |
||
| 74 | |||
| 75 | _substitutions = (('DK', 'K'), ('DT', 'T'), ('SC', 'S'), ('KN', 'N'), |
||
| 76 | ('MN', 'N')) |
||
| 77 | |||
| 78 | def _raise_word_ex(): |
||
| 79 | """Raise an AttributeError.""" |
||
| 80 | raise AttributeError('word attribute must be a string with a space ' + |
||
| 81 | 'or period dividing the first and last names ' + |
||
| 82 | 'or a tuple/list consisting of the first and ' + |
||
| 83 | 'last names') |
||
| 84 | |||
| 85 | if not word: |
||
| 86 | return '' |
||
| 87 | |||
| 88 | names = [] |
||
| 89 | if isinstance(word, (str, text_type)): |
||
| 90 | names = word.split('.', 1) |
||
| 91 | if len(names) != 2: |
||
| 92 | names = word.split(' ', 1) |
||
| 93 | if len(names) != 2: |
||
| 94 | _raise_word_ex() |
||
| 95 | elif hasattr(word, '__iter__'): |
||
| 96 | if len(word) != 2: |
||
| 97 | _raise_word_ex() |
||
| 98 | names = word |
||
| 99 | else: |
||
| 100 | _raise_word_ex() |
||
| 101 | |||
| 102 | names = [unicode_normalize('NFKD', text_type(_.strip() |
||
| 103 | .replace('ß', 'SS') |
||
| 104 | .upper())) |
||
| 105 | for _ in names] |
||
| 106 | code = '' |
||
| 107 | |||
| 108 | def steps_one_to_three(name): |
||
| 109 | """Perform the first three steps of SPFC.""" |
||
| 110 | # filter out non A-Z |
||
| 111 | name = ''.join(_ for _ in name if _ in |
||
| 112 | {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', |
||
| 113 | 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', |
||
| 114 | 'W', 'X', 'Y', 'Z'}) |
||
| 115 | |||
| 116 | # 1. In the field, convert DK to K, DT to T, SC to S, KN to N, |
||
| 117 | # and MN to N |
||
| 118 | for subst in _substitutions: |
||
| 119 | name = name.replace(subst[0], subst[1]) |
||
| 120 | |||
| 121 | # 2. In the name field, replace multiple letters with a single letter |
||
| 122 | name = _delete_consecutive_repeats(name) |
||
| 123 | |||
| 124 | # 3. Remove vowels, W, H, and Y, but keep the first letter in the name |
||
| 125 | # field. |
||
| 126 | if name: |
||
| 127 | name = name[0] + ''.join(_ for _ in name[1:] if _ not in |
||
| 128 | {'A', 'E', 'H', 'I', 'O', 'U', 'W', 'Y'}) |
||
| 129 | return name |
||
| 130 | |||
| 131 | names = [steps_one_to_three(_) for _ in names] |
||
| 132 | |||
| 133 | # 4. The first digit of the code is obtained using PF1 and the first letter |
||
| 134 | # of the name field. Remove this letter after coding. |
||
| 135 | if names[1]: |
||
| 136 | code += names[1][0].translate(_pf1) |
||
| 137 | names[1] = names[1][1:] |
||
| 138 | |||
| 139 | # 5. Using the last letters of the name, use Table PF3 to obtain the |
||
| 140 | # second digit of the code. Use as many letters as possible and remove |
||
| 141 | # after coding. |
||
| 142 | if names[1]: |
||
| 143 | if names[1][-3:] == 'STN' or names[1][-3:] == 'PRS': |
||
| 144 | code += '8' |
||
| 145 | names[1] = names[1][:-3] |
||
| 146 | elif names[1][-2:] == 'SN': |
||
| 147 | code += '8' |
||
| 148 | names[1] = names[1][:-2] |
||
| 149 | elif names[1][-3:] == 'STR': |
||
| 150 | code += '9' |
||
| 151 | names[1] = names[1][:-3] |
||
| 152 | elif names[1][-2:] in {'SR', 'TN', 'TD'}: |
||
| 153 | code += '9' |
||
| 154 | names[1] = names[1][:-2] |
||
| 155 | elif names[1][-3:] == 'DRS': |
||
| 156 | code += '7' |
||
| 157 | names[1] = names[1][:-3] |
||
| 158 | elif names[1][-2:] in {'TR', 'MN'}: |
||
| 159 | code += '7' |
||
| 160 | names[1] = names[1][:-2] |
||
| 161 | else: |
||
| 162 | code += names[1][-1].translate(_pf3) |
||
| 163 | names[1] = names[1][:-1] |
||
| 164 | |||
| 165 | # 6. The third digit is found using Table PF2 and the first character of |
||
| 166 | # the first name. Remove after coding. |
||
| 167 | if names[0]: |
||
| 168 | code += names[0][0].translate(_pf2) |
||
| 169 | names[0] = names[0][1:] |
||
| 170 | |||
| 171 | # 7. The fourth digit is found using Table PF2 and the first character of |
||
| 172 | # the name field. If no letters remain use zero. After coding remove the |
||
| 173 | # letter. |
||
| 174 | # 8. The fifth digit is found in the same manner as the fourth using the |
||
| 175 | # remaining characters of the name field if any. |
||
| 176 | for _ in range(2): |
||
| 177 | if names[1]: |
||
| 178 | code += names[1][0].translate(_pf2) |
||
| 179 | names[1] = names[1][1:] |
||
| 180 | else: |
||
| 181 | code += '0' |
||
| 182 | |||
| 183 | return code |
||
| 184 | |||
| 189 |