Conditions | 21 |
Total Lines | 162 |
Code Lines | 70 |
Lines | 0 |
Ratio | 0 % |
Tests | 61 |
CRAP Score | 21 |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like abydos.phonetic._spfc.SPFC.encode() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | # -*- coding: utf-8 -*- |
||
75 | 1 | def encode(self, word): |
|
76 | """Return the Standardized Phonetic Frequency Code (SPFC) of a word. |
||
77 | |||
78 | Args: |
||
79 | word (str): The word to transform |
||
80 | |||
81 | Returns: |
||
82 | str: The SPFC value |
||
83 | |||
84 | Raises: |
||
85 | AttributeError: Word attribute must be a string with a space or |
||
86 | period dividing the first and last names or a tuple/list |
||
87 | consisting of the first and last names |
||
88 | |||
89 | Examples: |
||
90 | >>> pe = SPFC() |
||
91 | >>> pe.encode('Christopher Smith') |
||
92 | '01160' |
||
93 | >>> pe.encode('Christopher Schmidt') |
||
94 | '01160' |
||
95 | >>> pe.encode('Niall Smith') |
||
96 | '01660' |
||
97 | >>> pe.encode('Niall Schmidt') |
||
98 | '01660' |
||
99 | |||
100 | >>> pe.encode('L.Smith') |
||
101 | '01960' |
||
102 | >>> pe.encode('R.Miller') |
||
103 | '65490' |
||
104 | |||
105 | >>> pe.encode(('L', 'Smith')) |
||
106 | '01960' |
||
107 | >>> pe.encode(('R', 'Miller')) |
||
108 | '65490' |
||
109 | |||
110 | """ |
||
111 | |||
112 | 1 | def _raise_word_ex(): |
|
113 | """Raise an AttributeError. |
||
114 | |||
115 | Raises: |
||
116 | AttributeError: Word attribute must be a string with a space or |
||
117 | period dividing the first and last names or a tuple/list |
||
118 | consisting of the first and last names |
||
119 | |||
120 | """ |
||
121 | 1 | raise AttributeError( |
|
122 | 'Word attribute must be a string with a space or period ' |
||
123 | + 'dividing the first and last names or a tuple/list ' |
||
124 | + 'consisting of the first and last names' |
||
125 | ) |
||
126 | |||
127 | 1 | if not word: |
|
128 | 1 | return '' |
|
129 | |||
130 | 1 | names = [] |
|
131 | 1 | if isinstance(word, (str, text_type)): |
|
132 | 1 | names = word.split('.', 1) |
|
133 | 1 | if len(names) != 2: |
|
134 | 1 | names = word.split(' ', 1) |
|
135 | 1 | if len(names) != 2: |
|
136 | 1 | _raise_word_ex() |
|
137 | 1 | elif hasattr(word, '__iter__'): |
|
138 | 1 | if len(word) != 2: |
|
139 | 1 | _raise_word_ex() |
|
140 | 1 | names = word |
|
141 | else: |
||
142 | 1 | _raise_word_ex() |
|
143 | |||
144 | 1 | names = [ |
|
145 | unicode_normalize( |
||
146 | 'NFKD', text_type(_.strip().replace('ß', 'SS').upper()) |
||
147 | ) |
||
148 | for _ in names |
||
149 | ] |
||
150 | 1 | code = '' |
|
151 | |||
152 | 1 | def _steps_one_to_three(name): |
|
153 | """Perform the first three steps of SPFC. |
||
154 | |||
155 | Args: |
||
156 | name (str): Name to transform |
||
157 | |||
158 | Returns: |
||
159 | str: Transformed name |
||
160 | |||
161 | """ |
||
162 | # filter out non A-Z |
||
163 | 1 | name = ''.join(_ for _ in name if _ in self._uc_set) |
|
164 | |||
165 | # 1. In the field, convert DK to K, DT to T, SC to S, KN to N, |
||
166 | # and MN to N |
||
167 | 1 | for subst in self._substitutions: |
|
168 | 1 | name = name.replace(subst[0], subst[1]) |
|
169 | |||
170 | # 2. In the name field, replace multiple letters with a single |
||
171 | # letter |
||
172 | 1 | name = self._delete_consecutive_repeats(name) |
|
173 | |||
174 | # 3. Remove vowels, W, H, and Y, but keep the first letter in the |
||
175 | # name field. |
||
176 | 1 | if name: |
|
177 | 1 | name = name[0] + ''.join( |
|
178 | _ |
||
179 | for _ in name[1:] |
||
180 | if _ not in {'A', 'E', 'H', 'I', 'O', 'U', 'W', 'Y'} |
||
181 | ) |
||
182 | 1 | return name |
|
183 | |||
184 | 1 | names = [_steps_one_to_three(_) for _ in names] |
|
185 | |||
186 | # 4. The first digit of the code is obtained using PF1 and the first |
||
187 | # letter of the name field. Remove this letter after coding. |
||
188 | 1 | if names[1]: |
|
189 | 1 | code += names[1][0].translate(self._pf1) |
|
190 | 1 | names[1] = names[1][1:] |
|
191 | |||
192 | # 5. Using the last letters of the name, use Table PF3 to obtain the |
||
193 | # second digit of the code. Use as many letters as possible and remove |
||
194 | # after coding. |
||
195 | 1 | if names[1]: |
|
196 | 1 | if names[1][-3:] == 'STN' or names[1][-3:] == 'PRS': |
|
197 | 1 | code += '8' |
|
198 | 1 | names[1] = names[1][:-3] |
|
199 | 1 | elif names[1][-2:] == 'SN': |
|
200 | 1 | code += '8' |
|
201 | 1 | names[1] = names[1][:-2] |
|
202 | 1 | elif names[1][-3:] == 'STR': |
|
203 | 1 | code += '9' |
|
204 | 1 | names[1] = names[1][:-3] |
|
205 | 1 | elif names[1][-2:] in {'SR', 'TN', 'TD'}: |
|
206 | 1 | code += '9' |
|
207 | 1 | names[1] = names[1][:-2] |
|
208 | 1 | elif names[1][-3:] == 'DRS': |
|
209 | 1 | code += '7' |
|
210 | 1 | names[1] = names[1][:-3] |
|
211 | 1 | elif names[1][-2:] in {'TR', 'MN'}: |
|
212 | 1 | code += '7' |
|
213 | 1 | names[1] = names[1][:-2] |
|
214 | else: |
||
215 | 1 | code += names[1][-1].translate(self._pf3) |
|
216 | 1 | names[1] = names[1][:-1] |
|
217 | |||
218 | # 6. The third digit is found using Table PF2 and the first character |
||
219 | # of the first name. Remove after coding. |
||
220 | 1 | if names[0]: |
|
221 | 1 | code += names[0][0].translate(self._pf2) |
|
222 | 1 | names[0] = names[0][1:] |
|
223 | |||
224 | # 7. The fourth digit is found using Table PF2 and the first character |
||
225 | # of the name field. If no letters remain use zero. After coding remove |
||
226 | # the letter. |
||
227 | # 8. The fifth digit is found in the same manner as the fourth using |
||
228 | # the remaining characters of the name field if any. |
||
229 | 1 | for _ in range(2): |
|
230 | 1 | if names[1]: |
|
231 | 1 | code += names[1][0].translate(self._pf2) |
|
232 | 1 | names[1] = names[1][1:] |
|
233 | else: |
||
234 | 1 | code += '0' |
|
235 | |||
236 | 1 | return code |
|
237 | |||
278 |