Conditions | 116 |
Total Lines | 233 |
Code Lines | 176 |
Lines | 0 |
Ratio | 0 % |
Tests | 172 |
CRAP Score | 116 |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like abydos.stemmer._porter.Porter.stem() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | # -*- coding: utf-8 -*- |
||
139 | 1 | def stem(self, word, early_english=False): |
|
140 | """Return Porter stem. |
||
141 | |||
142 | Parameters |
||
143 | ---------- |
||
144 | word : str |
||
145 | The word to stem |
||
146 | early_english : bool |
||
147 | Set to True in order to remove -eth & -est (2nd & 3rd person |
||
148 | singular verbal agreement suffixes) |
||
149 | |||
150 | Returns |
||
151 | ------- |
||
152 | str |
||
153 | Word stem |
||
154 | |||
155 | Examples |
||
156 | -------- |
||
157 | >>> stmr = Porter() |
||
158 | >>> stmr.stem('reading') |
||
159 | 'read' |
||
160 | >>> stmr.stem('suspension') |
||
161 | 'suspens' |
||
162 | >>> stmr.stem('elusiveness') |
||
163 | 'elus' |
||
164 | |||
165 | >>> stmr.stem('eateth', early_english=True) |
||
166 | 'eat' |
||
167 | |||
168 | """ |
||
169 | # lowercase, normalize, and compose |
||
170 | 1 | word = normalize('NFC', text_type(word.lower())) |
|
171 | |||
172 | # Return word if stem is shorter than 2 |
||
173 | 1 | if len(word) < 3: |
|
174 | 1 | return word |
|
175 | |||
176 | # Re-map consonantal y to Y (Y will be C, y will be V) |
||
177 | 1 | if word[0] == 'y': |
|
178 | 1 | word = 'Y' + word[1:] |
|
179 | 1 | for i in range(1, len(word)): |
|
180 | 1 | if word[i] == 'y' and word[i - 1] in self._vowels: |
|
181 | 1 | word = word[:i] + 'Y' + word[i + 1 :] |
|
182 | |||
183 | # Step 1a |
||
184 | 1 | if word[-1] == 's': |
|
185 | 1 | if word[-4:] == 'sses': |
|
186 | 1 | word = word[:-2] |
|
187 | 1 | elif word[-3:] == 'ies': |
|
188 | 1 | word = word[:-2] |
|
189 | 1 | elif word[-2:] == 'ss': |
|
190 | 1 | pass |
|
191 | else: |
||
192 | 1 | word = word[:-1] |
|
193 | |||
194 | # Step 1b |
||
195 | 1 | step1b_flag = False |
|
196 | 1 | if word[-3:] == 'eed': |
|
197 | 1 | if self._m_degree(word[:-3]) > 0: |
|
198 | 1 | word = word[:-1] |
|
199 | 1 | elif word[-2:] == 'ed': |
|
200 | 1 | if self._has_vowel(word[:-2]): |
|
201 | 1 | word = word[:-2] |
|
202 | 1 | step1b_flag = True |
|
203 | 1 | elif word[-3:] == 'ing': |
|
204 | 1 | if self._has_vowel(word[:-3]): |
|
205 | 1 | word = word[:-3] |
|
206 | 1 | step1b_flag = True |
|
207 | 1 | elif early_english: |
|
208 | 1 | if word[-3:] == 'est': |
|
209 | 1 | if self._has_vowel(word[:-3]): |
|
210 | 1 | word = word[:-3] |
|
211 | 1 | step1b_flag = True |
|
212 | 1 | elif word[-3:] == 'eth': |
|
213 | 1 | if self._has_vowel(word[:-3]): |
|
214 | 1 | word = word[:-3] |
|
215 | 1 | step1b_flag = True |
|
216 | |||
217 | 1 | if step1b_flag: |
|
218 | 1 | if word[-2:] in {'at', 'bl', 'iz'}: |
|
219 | 1 | word += 'e' |
|
220 | 1 | elif self._ends_in_doubled_cons(word) and word[-1] not in { |
|
221 | 'l', |
||
222 | 's', |
||
223 | 'z', |
||
224 | }: |
||
225 | 1 | word = word[:-1] |
|
226 | 1 | elif self._m_degree(word) == 1 and self._ends_in_cvc(word): |
|
227 | 1 | word += 'e' |
|
228 | |||
229 | # Step 1c |
||
230 | 1 | if word[-1] in {'Y', 'y'} and self._has_vowel(word[:-1]): |
|
231 | 1 | word = word[:-1] + 'i' |
|
232 | |||
233 | # Step 2 |
||
234 | 1 | if len(word) > 1: |
|
235 | 1 | if word[-2] == 'a': |
|
236 | 1 | if word[-7:] == 'ational': |
|
237 | 1 | if self._m_degree(word[:-7]) > 0: |
|
238 | 1 | word = word[:-5] + 'e' |
|
239 | 1 | elif word[-6:] == 'tional': |
|
240 | 1 | if self._m_degree(word[:-6]) > 0: |
|
241 | 1 | word = word[:-2] |
|
242 | 1 | elif word[-2] == 'c': |
|
243 | 1 | if word[-4:] in {'enci', 'anci'}: |
|
244 | 1 | if self._m_degree(word[:-4]) > 0: |
|
245 | 1 | word = word[:-1] + 'e' |
|
246 | 1 | elif word[-2] == 'e': |
|
247 | 1 | if word[-4:] == 'izer': |
|
248 | 1 | if self._m_degree(word[:-4]) > 0: |
|
249 | 1 | word = word[:-1] |
|
250 | 1 | elif word[-2] == 'g': |
|
251 | 1 | if word[-4:] == 'logi': |
|
252 | 1 | if self._m_degree(word[:-4]) > 0: |
|
253 | 1 | word = word[:-1] |
|
254 | 1 | elif word[-2] == 'l': |
|
255 | 1 | if word[-3:] == 'bli': |
|
256 | 1 | if self._m_degree(word[:-3]) > 0: |
|
257 | 1 | word = word[:-1] + 'e' |
|
258 | 1 | elif word[-4:] == 'alli': |
|
259 | 1 | if self._m_degree(word[:-4]) > 0: |
|
260 | 1 | word = word[:-2] |
|
261 | 1 | elif word[-5:] == 'entli': |
|
262 | 1 | if self._m_degree(word[:-5]) > 0: |
|
263 | 1 | word = word[:-2] |
|
264 | 1 | elif word[-3:] == 'eli': |
|
265 | 1 | if self._m_degree(word[:-3]) > 0: |
|
266 | 1 | word = word[:-2] |
|
267 | 1 | elif word[-5:] == 'ousli': |
|
268 | 1 | if self._m_degree(word[:-5]) > 0: |
|
269 | 1 | word = word[:-2] |
|
270 | 1 | elif word[-2] == 'o': |
|
271 | 1 | if word[-7:] == 'ization': |
|
272 | 1 | if self._m_degree(word[:-7]) > 0: |
|
273 | 1 | word = word[:-5] + 'e' |
|
274 | 1 | elif word[-5:] == 'ation': |
|
275 | 1 | if self._m_degree(word[:-5]) > 0: |
|
276 | 1 | word = word[:-3] + 'e' |
|
277 | 1 | elif word[-4:] == 'ator': |
|
278 | 1 | if self._m_degree(word[:-4]) > 0: |
|
279 | 1 | word = word[:-2] + 'e' |
|
280 | 1 | elif word[-2] == 's': |
|
281 | 1 | if word[-5:] == 'alism': |
|
282 | 1 | if self._m_degree(word[:-5]) > 0: |
|
283 | 1 | word = word[:-3] |
|
284 | 1 | elif word[-7:] in {'iveness', 'fulness', 'ousness'}: |
|
285 | 1 | if self._m_degree(word[:-7]) > 0: |
|
286 | 1 | word = word[:-4] |
|
287 | 1 | elif word[-2] == 't': |
|
288 | 1 | if word[-5:] == 'aliti': |
|
289 | 1 | if self._m_degree(word[:-5]) > 0: |
|
290 | 1 | word = word[:-3] |
|
291 | 1 | elif word[-5:] == 'iviti': |
|
292 | 1 | if self._m_degree(word[:-5]) > 0: |
|
293 | 1 | word = word[:-3] + 'e' |
|
294 | 1 | elif word[-6:] == 'biliti': |
|
295 | 1 | if self._m_degree(word[:-6]) > 0: |
|
296 | 1 | word = word[:-5] + 'le' |
|
297 | |||
298 | # Step 3 |
||
299 | 1 | if word[-5:] in 'icate': |
|
300 | 1 | if self._m_degree(word[:-5]) > 0: |
|
301 | 1 | word = word[:-3] |
|
302 | 1 | elif word[-5:] == 'ative': |
|
303 | 1 | if self._m_degree(word[:-5]) > 0: |
|
304 | 1 | word = word[:-5] |
|
305 | 1 | elif word[-5:] in {'alize', 'iciti'}: |
|
306 | 1 | if self._m_degree(word[:-5]) > 0: |
|
307 | 1 | word = word[:-3] |
|
308 | 1 | elif word[-4:] == 'ical': |
|
309 | 1 | if self._m_degree(word[:-4]) > 0: |
|
310 | 1 | word = word[:-2] |
|
311 | 1 | elif word[-3:] == 'ful': |
|
312 | 1 | if self._m_degree(word[:-3]) > 0: |
|
313 | 1 | word = word[:-3] |
|
314 | 1 | elif word[-4:] == 'ness': |
|
315 | 1 | if self._m_degree(word[:-4]) > 0: |
|
316 | 1 | word = word[:-4] |
|
317 | |||
318 | # Step 4 |
||
319 | 1 | if word[-2:] == 'al': |
|
320 | 1 | if self._m_degree(word[:-2]) > 1: |
|
321 | 1 | word = word[:-2] |
|
322 | 1 | elif word[-4:] in {'ance', 'ence'}: |
|
323 | 1 | if self._m_degree(word[:-4]) > 1: |
|
324 | 1 | word = word[:-4] |
|
325 | 1 | elif word[-2:] in {'er', 'ic'}: |
|
326 | 1 | if self._m_degree(word[:-2]) > 1: |
|
327 | 1 | word = word[:-2] |
|
328 | 1 | elif word[-4:] in {'able', 'ible'}: |
|
329 | 1 | if self._m_degree(word[:-4]) > 1: |
|
330 | 1 | word = word[:-4] |
|
331 | 1 | elif word[-3:] == 'ant': |
|
332 | 1 | if self._m_degree(word[:-3]) > 1: |
|
333 | 1 | word = word[:-3] |
|
334 | 1 | elif word[-5:] == 'ement': |
|
335 | 1 | if self._m_degree(word[:-5]) > 1: |
|
336 | 1 | word = word[:-5] |
|
337 | 1 | elif word[-4:] == 'ment': |
|
338 | 1 | if self._m_degree(word[:-4]) > 1: |
|
339 | 1 | word = word[:-4] |
|
340 | 1 | elif word[-3:] == 'ent': |
|
341 | 1 | if self._m_degree(word[:-3]) > 1: |
|
342 | 1 | word = word[:-3] |
|
343 | 1 | elif word[-4:] in {'sion', 'tion'}: |
|
344 | 1 | if self._m_degree(word[:-3]) > 1: |
|
345 | 1 | word = word[:-3] |
|
346 | 1 | elif word[-2:] == 'ou': |
|
347 | 1 | if self._m_degree(word[:-2]) > 1: |
|
348 | 1 | word = word[:-2] |
|
349 | 1 | elif word[-3:] in {'ism', 'ate', 'iti', 'ous', 'ive', 'ize'}: |
|
350 | 1 | if self._m_degree(word[:-3]) > 1: |
|
351 | 1 | word = word[:-3] |
|
352 | |||
353 | # Step 5a |
||
354 | 1 | if word[-1] == 'e': |
|
355 | 1 | if self._m_degree(word[:-1]) > 1: |
|
356 | 1 | word = word[:-1] |
|
357 | 1 | elif self._m_degree(word[:-1]) == 1 and not self._ends_in_cvc( |
|
358 | word[:-1] |
||
359 | ): |
||
360 | 1 | word = word[:-1] |
|
361 | |||
362 | # Step 5b |
||
363 | 1 | if word[-2:] == 'll' and self._m_degree(word) > 1: |
|
364 | 1 | word = word[:-1] |
|
365 | |||
366 | # Change 'Y' back to 'y' if it survived stemming |
||
367 | 1 | for i in range(len(word)): |
|
368 | 1 | if word[i] == 'Y': |
|
369 | 1 | word = word[:i] + 'y' + word[i + 1 :] |
|
370 | |||
371 | 1 | return word |
|
372 | |||
412 |