Conditions | 116 |
Total Lines | 236 |
Code Lines | 176 |
Lines | 0 |
Ratio | 0 % |
Tests | 166 |
CRAP Score | 116 |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like abydos.stemmer._porter.Porter.stem() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | # Copyright 2014-2020 by Christopher C. Little. |
||
164 | 1 | def stem(self, word: str) -> str: |
|
165 | """Return Porter stem. |
||
166 | |||
167 | Parameters |
||
168 | ---------- |
||
169 | word : str |
||
170 | The word to stem |
||
171 | |||
172 | Returns |
||
173 | ------- |
||
174 | str |
||
175 | Word stem |
||
176 | |||
177 | 1 | Examples |
|
178 | -------- |
||
179 | 1 | >>> stmr = Porter() |
|
180 | >>> stmr.stem('reading') |
||
181 | 'read' |
||
182 | >>> stmr.stem('suspension') |
||
183 | 'suspens' |
||
184 | >>> stmr.stem('elusiveness') |
||
185 | 'elus' |
||
186 | |||
187 | >>> stmr = Porter(early_english=True) |
||
188 | >>> stmr.stem('eateth') |
||
189 | 'eat' |
||
190 | |||
191 | |||
192 | .. versionadded:: 0.1.0 |
||
193 | .. versionchanged:: 0.3.6 |
||
194 | Encapsulated in class |
||
195 | |||
196 | """ |
||
197 | # lowercase, normalize, and compose |
||
198 | word = normalize('NFC', word.lower()) |
||
199 | |||
200 | # Return word if stem is shorter than 2 |
||
201 | if len(word) < 3: |
||
202 | return word |
||
203 | |||
204 | # Re-map consonantal y to Y (Y will be C, y will be V) |
||
205 | if word[0] == 'y': |
||
206 | word = 'Y' + word[1:] |
||
207 | for i in range(1, len(word)): |
||
208 | if word[i] == 'y' and word[i - 1] in self._vowels: |
||
209 | word = word[:i] + 'Y' + word[i + 1 :] |
||
210 | |||
211 | # Step 1a |
||
212 | if word[-1] == 's': |
||
213 | 1 | if word[-4:] == 'sses': |
|
214 | word = word[:-2] |
||
215 | elif word[-3:] == 'ies': |
||
216 | 1 | word = word[:-2] |
|
217 | 1 | elif word[-2:] == 'ss': |
|
218 | pass |
||
219 | else: |
||
220 | 1 | word = word[:-1] |
|
221 | 1 | ||
222 | 1 | # Step 1b |
|
223 | 1 | step1b_flag = False |
|
224 | 1 | if word[-3:] == 'eed': |
|
225 | if self._m_degree(word[:-3]) > 0: |
||
226 | word = word[:-1] |
||
227 | 1 | elif word[-2:] == 'ed': |
|
228 | 1 | if self._has_vowel(word[:-2]): |
|
229 | 1 | word = word[:-2] |
|
230 | 1 | step1b_flag = True |
|
231 | 1 | elif word[-3:] == 'ing': |
|
232 | 1 | if self._has_vowel(word[:-3]): |
|
233 | 1 | word = word[:-3] |
|
234 | step1b_flag = True |
||
235 | 1 | elif self._early_english: |
|
236 | if word[-3:] == 'est': |
||
237 | if self._has_vowel(word[:-3]): |
||
238 | 1 | word = word[:-3] |
|
239 | 1 | step1b_flag = True |
|
240 | 1 | elif word[-3:] == 'eth': |
|
241 | 1 | if self._has_vowel(word[:-3]): |
|
242 | 1 | word = word[:-3] |
|
243 | 1 | step1b_flag = True |
|
244 | 1 | ||
245 | 1 | if step1b_flag: |
|
246 | 1 | if word[-2:] in {'at', 'bl', 'iz'}: |
|
247 | 1 | word += 'e' |
|
248 | 1 | elif self._ends_in_doubled_cons(word) and word[-1] not in { |
|
249 | 1 | 'l', |
|
250 | 1 | 's', |
|
251 | 1 | 'z', |
|
252 | 1 | }: |
|
253 | 1 | word = word[:-1] |
|
254 | 1 | elif self._m_degree(word) == 1 and self._ends_in_cvc(word): |
|
255 | 1 | word += 'e' |
|
256 | 1 | ||
257 | 1 | # Step 1c |
|
258 | 1 | if word[-1] in {'Y', 'y'} and self._has_vowel(word[:-1]): |
|
259 | word = word[:-1] + 'i' |
||
260 | 1 | ||
261 | 1 | # Step 2 |
|
262 | 1 | if len(word) > 1: |
|
263 | 1 | if word[-2] == 'a': |
|
264 | if word[-7:] == 'ational': |
||
265 | if self._m_degree(word[:-7]) > 0: |
||
266 | word = word[:-5] + 'e' |
||
267 | elif word[-6:] == 'tional': |
||
268 | 1 | if self._m_degree(word[:-6]) > 0: |
|
269 | 1 | word = word[:-2] |
|
270 | 1 | elif word[-2] == 'c': |
|
271 | if word[-4:] in {'enci', 'anci'}: |
||
272 | if self._m_degree(word[:-4]) > 0: |
||
273 | 1 | word = word[:-1] + 'e' |
|
274 | 1 | elif word[-2] == 'e': |
|
275 | if word[-4:] == 'izer': |
||
276 | if self._m_degree(word[:-4]) > 0: |
||
277 | 1 | word = word[:-1] |
|
278 | 1 | elif word[-2] == 'g': |
|
279 | 1 | if word[-4:] == 'logi': |
|
280 | 1 | if self._m_degree(word[:-4]) > 0: |
|
281 | 1 | word = word[:-1] |
|
282 | 1 | elif word[-2] == 'l': |
|
283 | 1 | if word[-3:] == 'bli': |
|
284 | 1 | if self._m_degree(word[:-3]) > 0: |
|
285 | 1 | word = word[:-1] + 'e' |
|
286 | 1 | elif word[-4:] == 'alli': |
|
287 | 1 | if self._m_degree(word[:-4]) > 0: |
|
288 | 1 | word = word[:-2] |
|
289 | 1 | elif word[-5:] == 'entli': |
|
290 | 1 | if self._m_degree(word[:-5]) > 0: |
|
291 | 1 | word = word[:-2] |
|
292 | 1 | elif word[-3:] == 'eli': |
|
293 | 1 | if self._m_degree(word[:-3]) > 0: |
|
294 | 1 | word = word[:-2] |
|
295 | 1 | elif word[-5:] == 'ousli': |
|
296 | 1 | if self._m_degree(word[:-5]) > 0: |
|
297 | 1 | word = word[:-2] |
|
298 | 1 | elif word[-2] == 'o': |
|
299 | 1 | if word[-7:] == 'ization': |
|
300 | 1 | if self._m_degree(word[:-7]) > 0: |
|
301 | 1 | word = word[:-5] + 'e' |
|
302 | 1 | elif word[-5:] == 'ation': |
|
303 | 1 | if self._m_degree(word[:-5]) > 0: |
|
304 | 1 | word = word[:-3] + 'e' |
|
305 | 1 | elif word[-4:] == 'ator': |
|
306 | 1 | if self._m_degree(word[:-4]) > 0: |
|
307 | 1 | word = word[:-2] + 'e' |
|
308 | 1 | elif word[-2] == 's': |
|
309 | 1 | if word[-5:] == 'alism': |
|
310 | 1 | if self._m_degree(word[:-5]) > 0: |
|
311 | 1 | word = word[:-3] |
|
312 | 1 | elif word[-7:] in {'iveness', 'fulness', 'ousness'}: |
|
313 | 1 | if self._m_degree(word[:-7]) > 0: |
|
314 | 1 | word = word[:-4] |
|
315 | 1 | elif word[-2] == 't': |
|
316 | 1 | if word[-5:] == 'aliti': |
|
317 | 1 | if self._m_degree(word[:-5]) > 0: |
|
318 | 1 | word = word[:-3] |
|
319 | 1 | elif word[-5:] == 'iviti': |
|
320 | 1 | if self._m_degree(word[:-5]) > 0: |
|
321 | 1 | word = word[:-3] + 'e' |
|
322 | 1 | elif word[-6:] == 'biliti': |
|
323 | 1 | if self._m_degree(word[:-6]) > 0: |
|
324 | 1 | word = word[:-5] + 'le' |
|
325 | 1 | ||
326 | 1 | # Step 3 |
|
327 | 1 | if word[-5:] in 'icate': |
|
328 | 1 | if self._m_degree(word[:-5]) > 0: |
|
329 | 1 | word = word[:-3] |
|
330 | 1 | elif word[-5:] == 'ative': |
|
331 | 1 | if self._m_degree(word[:-5]) > 0: |
|
332 | 1 | word = word[:-5] |
|
333 | 1 | elif word[-5:] in {'alize', 'iciti'}: |
|
334 | 1 | if self._m_degree(word[:-5]) > 0: |
|
335 | 1 | word = word[:-3] |
|
336 | 1 | elif word[-4:] == 'ical': |
|
337 | 1 | if self._m_degree(word[:-4]) > 0: |
|
338 | 1 | word = word[:-2] |
|
339 | 1 | elif word[-3:] == 'ful': |
|
340 | if self._m_degree(word[:-3]) > 0: |
||
341 | word = word[:-3] |
||
342 | 1 | elif word[-4:] == 'ness': |
|
343 | 1 | if self._m_degree(word[:-4]) > 0: |
|
344 | 1 | word = word[:-4] |
|
345 | 1 | ||
346 | 1 | # Step 4 |
|
347 | 1 | if word[-2:] == 'al': |
|
348 | 1 | if self._m_degree(word[:-2]) > 1: |
|
349 | 1 | word = word[:-2] |
|
350 | 1 | elif word[-4:] in {'ance', 'ence'}: |
|
351 | 1 | if self._m_degree(word[:-4]) > 1: |
|
352 | 1 | word = word[:-4] |
|
353 | 1 | elif word[-2:] in {'er', 'ic'}: |
|
354 | 1 | if self._m_degree(word[:-2]) > 1: |
|
355 | 1 | word = word[:-2] |
|
356 | 1 | elif word[-4:] in {'able', 'ible'}: |
|
357 | 1 | if self._m_degree(word[:-4]) > 1: |
|
358 | 1 | word = word[:-4] |
|
359 | 1 | elif word[-3:] == 'ant': |
|
360 | if self._m_degree(word[:-3]) > 1: |
||
361 | word = word[:-3] |
||
362 | 1 | elif word[-5:] == 'ement': |
|
363 | 1 | if self._m_degree(word[:-5]) > 1: |
|
364 | 1 | word = word[:-5] |
|
365 | 1 | elif word[-4:] == 'ment': |
|
366 | 1 | if self._m_degree(word[:-4]) > 1: |
|
367 | 1 | word = word[:-4] |
|
368 | 1 | elif word[-3:] == 'ent': |
|
369 | 1 | if self._m_degree(word[:-3]) > 1: |
|
370 | 1 | word = word[:-3] |
|
371 | 1 | elif word[-4:] in {'sion', 'tion'}: |
|
372 | 1 | if self._m_degree(word[:-3]) > 1: |
|
373 | 1 | word = word[:-3] |
|
374 | 1 | elif word[-2:] == 'ou': |
|
375 | 1 | if self._m_degree(word[:-2]) > 1: |
|
376 | 1 | word = word[:-2] |
|
377 | 1 | elif word[-3:] in {'ism', 'ate', 'iti', 'ous', 'ive', 'ize'}: |
|
378 | 1 | if self._m_degree(word[:-3]) > 1: |
|
379 | 1 | word = word[:-3] |
|
380 | 1 | ||
381 | 1 | # Step 5a |
|
382 | 1 | if word[-1] == 'e': |
|
383 | 1 | if self._m_degree(word[:-1]) > 1: |
|
384 | 1 | word = word[:-1] |
|
385 | 1 | elif self._m_degree(word[:-1]) == 1 and not self._ends_in_cvc( |
|
386 | 1 | word[:-1] |
|
387 | 1 | ): |
|
388 | 1 | word = word[:-1] |
|
389 | 1 | ||
390 | 1 | # Step 5b |
|
391 | 1 | if word[-2:] == 'll' and self._m_degree(word) > 1: |
|
392 | 1 | word = word[:-1] |
|
393 | 1 | ||
394 | 1 | # Change 'Y' back to 'y' if it survived stemming |
|
395 | for i in range(len(word)): |
||
396 | if word[i] == 'Y': |
||
397 | 1 | word = word[:i] + 'y' + word[i + 1 :] |
|
398 | 1 | ||
399 | 1 | return word |
|
400 | 1 | ||
406 |