Conditions | 21 |
Total Lines | 175 |
Code Lines | 70 |
Lines | 0 |
Ratio | 0 % |
Tests | 61 |
CRAP Score | 21 |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like abydos.phonetic._spfc.SPFC.encode() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | # -*- coding: utf-8 -*- |
||
75 | 1 | def encode(self, word): |
|
76 | """Return the Standardized Phonetic Frequency Code (SPFC) of a word. |
||
77 | |||
78 | Parameters |
||
79 | ---------- |
||
80 | word : str |
||
81 | The word to transform |
||
82 | |||
83 | Returns |
||
84 | ------- |
||
85 | str |
||
86 | The SPFC value |
||
87 | |||
88 | Raises |
||
89 | ------ |
||
90 | AttributeError |
||
91 | Word attribute must be a string with a space or period dividing the |
||
92 | first and last names or a tuple/list consisting of the first and |
||
93 | last names |
||
94 | |||
95 | Examples |
||
96 | -------- |
||
97 | >>> pe = SPFC() |
||
98 | >>> pe.encode('Christopher Smith') |
||
99 | '01160' |
||
100 | >>> pe.encode('Christopher Schmidt') |
||
101 | '01160' |
||
102 | >>> pe.encode('Niall Smith') |
||
103 | '01660' |
||
104 | >>> pe.encode('Niall Schmidt') |
||
105 | '01660' |
||
106 | |||
107 | >>> pe.encode('L.Smith') |
||
108 | '01960' |
||
109 | >>> pe.encode('R.Miller') |
||
110 | '65490' |
||
111 | |||
112 | >>> pe.encode(('L', 'Smith')) |
||
113 | '01960' |
||
114 | >>> pe.encode(('R', 'Miller')) |
||
115 | '65490' |
||
116 | |||
117 | """ |
||
118 | |||
119 | 1 | def _raise_word_ex(): |
|
120 | """Raise an AttributeError. |
||
121 | |||
122 | Raises |
||
123 | ------ |
||
124 | AttributeError |
||
125 | Word attribute must be a string with a space or period dividing |
||
126 | the first and last names or a tuple/list consisting of the |
||
127 | first and last names |
||
128 | |||
129 | """ |
||
130 | 1 | raise AttributeError( |
|
131 | 'Word attribute must be a string with a space or period ' |
||
132 | + 'dividing the first and last names or a tuple/list ' |
||
133 | + 'consisting of the first and last names' |
||
134 | ) |
||
135 | |||
136 | 1 | if not word: |
|
137 | 1 | return '' |
|
138 | |||
139 | 1 | names = [] |
|
140 | 1 | if isinstance(word, (str, text_type)): |
|
141 | 1 | names = word.split('.', 1) |
|
142 | 1 | if len(names) != 2: |
|
143 | 1 | names = word.split(' ', 1) |
|
144 | 1 | if len(names) != 2: |
|
145 | 1 | _raise_word_ex() |
|
146 | 1 | elif hasattr(word, '__iter__'): |
|
147 | 1 | if len(word) != 2: |
|
148 | 1 | _raise_word_ex() |
|
149 | 1 | names = word |
|
150 | else: |
||
151 | 1 | _raise_word_ex() |
|
152 | |||
153 | 1 | names = [ |
|
154 | unicode_normalize( |
||
155 | 'NFKD', text_type(_.strip().replace('ß', 'SS').upper()) |
||
156 | ) |
||
157 | for _ in names |
||
158 | ] |
||
159 | 1 | code = '' |
|
160 | |||
161 | 1 | def _steps_one_to_three(name): |
|
162 | """Perform the first three steps of SPFC. |
||
163 | |||
164 | Parameters |
||
165 | ---------- |
||
166 | name : str |
||
167 | Name to transform |
||
168 | |||
169 | Returns |
||
170 | ------- |
||
171 | str |
||
172 | Transformed name |
||
173 | |||
174 | """ |
||
175 | # filter out non A-Z |
||
176 | 1 | name = ''.join(_ for _ in name if _ in self._uc_set) |
|
177 | |||
178 | # 1. In the field, convert DK to K, DT to T, SC to S, KN to N, |
||
179 | # and MN to N |
||
180 | 1 | for subst in self._substitutions: |
|
181 | 1 | name = name.replace(subst[0], subst[1]) |
|
182 | |||
183 | # 2. In the name field, replace multiple letters with a single |
||
184 | # letter |
||
185 | 1 | name = self._delete_consecutive_repeats(name) |
|
186 | |||
187 | # 3. Remove vowels, W, H, and Y, but keep the first letter in the |
||
188 | # name field. |
||
189 | 1 | if name: |
|
190 | 1 | name = name[0] + ''.join( |
|
191 | _ |
||
192 | for _ in name[1:] |
||
193 | if _ not in {'A', 'E', 'H', 'I', 'O', 'U', 'W', 'Y'} |
||
194 | ) |
||
195 | 1 | return name |
|
196 | |||
197 | 1 | names = [_steps_one_to_three(_) for _ in names] |
|
198 | |||
199 | # 4. The first digit of the code is obtained using PF1 and the first |
||
200 | # letter of the name field. Remove this letter after coding. |
||
201 | 1 | if names[1]: |
|
202 | 1 | code += names[1][0].translate(self._pf1) |
|
203 | 1 | names[1] = names[1][1:] |
|
204 | |||
205 | # 5. Using the last letters of the name, use Table PF3 to obtain the |
||
206 | # second digit of the code. Use as many letters as possible and remove |
||
207 | # after coding. |
||
208 | 1 | if names[1]: |
|
209 | 1 | if names[1][-3:] == 'STN' or names[1][-3:] == 'PRS': |
|
210 | 1 | code += '8' |
|
211 | 1 | names[1] = names[1][:-3] |
|
212 | 1 | elif names[1][-2:] == 'SN': |
|
213 | 1 | code += '8' |
|
214 | 1 | names[1] = names[1][:-2] |
|
215 | 1 | elif names[1][-3:] == 'STR': |
|
216 | 1 | code += '9' |
|
217 | 1 | names[1] = names[1][:-3] |
|
218 | 1 | elif names[1][-2:] in {'SR', 'TN', 'TD'}: |
|
219 | 1 | code += '9' |
|
220 | 1 | names[1] = names[1][:-2] |
|
221 | 1 | elif names[1][-3:] == 'DRS': |
|
222 | 1 | code += '7' |
|
223 | 1 | names[1] = names[1][:-3] |
|
224 | 1 | elif names[1][-2:] in {'TR', 'MN'}: |
|
225 | 1 | code += '7' |
|
226 | 1 | names[1] = names[1][:-2] |
|
227 | else: |
||
228 | 1 | code += names[1][-1].translate(self._pf3) |
|
229 | 1 | names[1] = names[1][:-1] |
|
230 | |||
231 | # 6. The third digit is found using Table PF2 and the first character |
||
232 | # of the first name. Remove after coding. |
||
233 | 1 | if names[0]: |
|
234 | 1 | code += names[0][0].translate(self._pf2) |
|
235 | 1 | names[0] = names[0][1:] |
|
236 | |||
237 | # 7. The fourth digit is found using Table PF2 and the first character |
||
238 | # of the name field. If no letters remain use zero. After coding remove |
||
239 | # the letter. |
||
240 | # 8. The fifth digit is found in the same manner as the fourth using |
||
241 | # the remaining characters of the name field if any. |
||
242 | 1 | for _ in range(2): |
|
243 | 1 | if names[1]: |
|
244 | 1 | code += names[1][0].translate(self._pf2) |
|
245 | 1 | names[1] = names[1][1:] |
|
246 | else: |
||
247 | 1 | code += '0' |
|
248 | |||
249 | 1 | return code |
|
250 | |||
296 |