| Conditions | 127 |
| Total Lines | 285 |
| Code Lines | 218 |
| Lines | 0 |
| Ratio | 0 % |
| Tests | 187 |
| CRAP Score | 127 |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like abydos.stemmer._Porter2.Porter2.stem() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | # -*- coding: utf-8 -*- |
||
| 87 | 1 | def stem(self, word, early_english=False): |
|
| 88 | """Return the Porter2 (Snowball English) stem. |
||
| 89 | |||
| 90 | Args: |
||
| 91 | word (str): The word to stem |
||
| 92 | early_english (bool): Set to True in order to remove -eth & -est |
||
| 93 | (2nd & 3rd person singular verbal agreement suffixes) |
||
| 94 | |||
| 95 | Returns: |
||
| 96 | str: Word stem |
||
| 97 | |||
| 98 | Examples: |
||
| 99 | >>> stmr = Porter2() |
||
| 100 | >>> stmr.stem('reading') |
||
| 101 | 'read' |
||
| 102 | >>> stmr.stem('suspension') |
||
| 103 | 'suspens' |
||
| 104 | >>> stmr.stem('elusiveness') |
||
| 105 | 'elus' |
||
| 106 | |||
| 107 | >>> stmr.stem('eateth', early_english=True) |
||
| 108 | 'eat' |
||
| 109 | |||
| 110 | """ |
||
| 111 | # lowercase, normalize, and compose |
||
| 112 | 1 | word = normalize('NFC', text_type(word.lower())) |
|
| 113 | # replace apostrophe-like characters with U+0027, per |
||
| 114 | # http://snowball.tartarus.org/texts/apostrophe.html |
||
| 115 | 1 | word = word.replace('’', '\'') |
|
| 116 | 1 | word = word.replace('’', '\'') |
|
| 117 | |||
| 118 | # Exceptions 1 |
||
| 119 | 1 | if word in self._exception1dict: |
|
| 120 | 1 | return self._exception1dict[word] |
|
| 121 | 1 | elif word in self._exception1set: |
|
| 122 | 1 | return word |
|
| 123 | |||
| 124 | # Return word if stem is shorter than 3 |
||
| 125 | 1 | if len(word) < 3: |
|
| 126 | 1 | return word |
|
| 127 | |||
| 128 | # Remove initial ', if present. |
||
| 129 | 1 | while word and word[0] == '\'': |
|
| 130 | 1 | word = word[1:] |
|
| 131 | # Return word if stem is shorter than 2 |
||
| 132 | 1 | if len(word) < 2: |
|
| 133 | 1 | return word |
|
| 134 | |||
| 135 | # Re-map vocalic Y to y (Y will be C, y will be V) |
||
| 136 | 1 | if word[0] == 'y': |
|
| 137 | 1 | word = 'Y' + word[1:] |
|
| 138 | 1 | for i in range(1, len(word)): |
|
| 139 | 1 | if word[i] == 'y' and word[i - 1] in self._vowels: |
|
| 140 | 1 | word = word[:i] + 'Y' + word[i + 1 :] |
|
| 141 | |||
| 142 | 1 | r1_start = self._sb_r1(word, self._r1_prefixes) |
|
| 143 | 1 | r2_start = self._sb_r2(word, self._r1_prefixes) |
|
| 144 | |||
| 145 | # Step 0 |
||
| 146 | 1 | if word[-3:] == '\'s\'': |
|
| 147 | 1 | word = word[:-3] |
|
| 148 | 1 | elif word[-2:] == '\'s': |
|
| 149 | 1 | word = word[:-2] |
|
| 150 | 1 | elif word[-1:] == '\'': |
|
| 151 | 1 | word = word[:-1] |
|
| 152 | # Return word if stem is shorter than 2 |
||
| 153 | 1 | if len(word) < 3: |
|
| 154 | 1 | return word |
|
| 155 | |||
| 156 | # Step 1a |
||
| 157 | 1 | if word[-4:] == 'sses': |
|
| 158 | 1 | word = word[:-2] |
|
| 159 | 1 | elif word[-3:] in {'ied', 'ies'}: |
|
| 160 | 1 | if len(word) > 4: |
|
| 161 | 1 | word = word[:-2] |
|
| 162 | else: |
||
| 163 | 1 | word = word[:-1] |
|
| 164 | 1 | elif word[-2:] in {'us', 'ss'}: |
|
| 165 | 1 | pass |
|
| 166 | 1 | elif word[-1] == 's': |
|
| 167 | 1 | if self._sb_has_vowel(word[:-2]): |
|
| 168 | 1 | word = word[:-1] |
|
| 169 | |||
| 170 | # Exceptions 2 |
||
| 171 | 1 | if word in self._exception2set: |
|
| 172 | 1 | return word |
|
| 173 | |||
| 174 | # Step 1b |
||
| 175 | 1 | step1b_flag = False |
|
| 176 | 1 | if word[-5:] == 'eedly': |
|
| 177 | 1 | if len(word[r1_start:]) >= 5: |
|
| 178 | 1 | word = word[:-3] |
|
| 179 | 1 | elif word[-5:] == 'ingly': |
|
| 180 | 1 | if self._sb_has_vowel(word[:-5]): |
|
| 181 | 1 | word = word[:-5] |
|
| 182 | 1 | step1b_flag = True |
|
| 183 | 1 | elif word[-4:] == 'edly': |
|
| 184 | 1 | if self._sb_has_vowel(word[:-4]): |
|
| 185 | 1 | word = word[:-4] |
|
| 186 | 1 | step1b_flag = True |
|
| 187 | 1 | elif word[-3:] == 'eed': |
|
| 188 | 1 | if len(word[r1_start:]) >= 3: |
|
| 189 | 1 | word = word[:-1] |
|
| 190 | 1 | elif word[-3:] == 'ing': |
|
| 191 | 1 | if self._sb_has_vowel(word[:-3]): |
|
| 192 | 1 | word = word[:-3] |
|
| 193 | 1 | step1b_flag = True |
|
| 194 | 1 | elif word[-2:] == 'ed': |
|
| 195 | 1 | if self._sb_has_vowel(word[:-2]): |
|
| 196 | 1 | word = word[:-2] |
|
| 197 | 1 | step1b_flag = True |
|
| 198 | 1 | elif early_english: |
|
| 199 | 1 | if word[-3:] == 'est': |
|
| 200 | 1 | if self._sb_has_vowel(word[:-3]): |
|
| 201 | 1 | word = word[:-3] |
|
| 202 | 1 | step1b_flag = True |
|
| 203 | 1 | elif word[-3:] == 'eth': |
|
| 204 | 1 | if self._sb_has_vowel(word[:-3]): |
|
| 205 | 1 | word = word[:-3] |
|
| 206 | 1 | step1b_flag = True |
|
| 207 | |||
| 208 | 1 | if step1b_flag: |
|
| 209 | 1 | if word[-2:] in {'at', 'bl', 'iz'}: |
|
| 210 | 1 | word += 'e' |
|
| 211 | 1 | elif word[-2:] in self._doubles: |
|
| 212 | 1 | word = word[:-1] |
|
| 213 | 1 | elif self._sb_short_word(word, self._r1_prefixes): |
|
| 214 | 1 | word += 'e' |
|
| 215 | |||
| 216 | # Step 1c |
||
| 217 | 1 | if ( |
|
| 218 | len(word) > 2 |
||
| 219 | and word[-1] in {'Y', 'y'} |
||
| 220 | and word[-2] not in self._vowels |
||
| 221 | ): |
||
| 222 | 1 | word = word[:-1] + 'i' |
|
| 223 | |||
| 224 | # Step 2 |
||
| 225 | 1 | if word[-2] == 'a': |
|
| 226 | 1 | if word[-7:] == 'ational': |
|
| 227 | 1 | if len(word[r1_start:]) >= 7: |
|
| 228 | 1 | word = word[:-5] + 'e' |
|
| 229 | 1 | elif word[-6:] == 'tional': |
|
| 230 | 1 | if len(word[r1_start:]) >= 6: |
|
| 231 | 1 | word = word[:-2] |
|
| 232 | 1 | elif word[-2] == 'c': |
|
| 233 | 1 | if word[-4:] in {'enci', 'anci'}: |
|
| 234 | 1 | if len(word[r1_start:]) >= 4: |
|
| 235 | 1 | word = word[:-1] + 'e' |
|
| 236 | 1 | elif word[-2] == 'e': |
|
| 237 | 1 | if word[-4:] == 'izer': |
|
| 238 | 1 | if len(word[r1_start:]) >= 4: |
|
| 239 | 1 | word = word[:-1] |
|
| 240 | 1 | elif word[-2] == 'g': |
|
| 241 | 1 | if word[-3:] == 'ogi': |
|
| 242 | 1 | if ( |
|
| 243 | r1_start >= 1 |
||
| 244 | and len(word[r1_start:]) >= 3 |
||
| 245 | and word[-4] == 'l' |
||
| 246 | ): |
||
| 247 | 1 | word = word[:-1] |
|
| 248 | 1 | elif word[-2] == 'l': |
|
| 249 | 1 | if word[-6:] == 'lessli': |
|
| 250 | 1 | if len(word[r1_start:]) >= 6: |
|
| 251 | 1 | word = word[:-2] |
|
| 252 | 1 | elif word[-5:] in {'entli', 'fulli', 'ousli'}: |
|
| 253 | 1 | if len(word[r1_start:]) >= 5: |
|
| 254 | 1 | word = word[:-2] |
|
| 255 | 1 | elif word[-4:] == 'abli': |
|
| 256 | 1 | if len(word[r1_start:]) >= 4: |
|
| 257 | 1 | word = word[:-1] + 'e' |
|
| 258 | 1 | elif word[-4:] == 'alli': |
|
| 259 | 1 | if len(word[r1_start:]) >= 4: |
|
| 260 | 1 | word = word[:-2] |
|
| 261 | 1 | elif word[-3:] == 'bli': |
|
| 262 | 1 | if len(word[r1_start:]) >= 3: |
|
| 263 | 1 | word = word[:-1] + 'e' |
|
| 264 | 1 | elif word[-2:] == 'li': |
|
| 265 | 1 | if ( |
|
| 266 | r1_start >= 1 |
||
| 267 | and len(word[r1_start:]) >= 2 |
||
| 268 | and word[-3] in self._li |
||
| 269 | ): |
||
| 270 | 1 | word = word[:-2] |
|
| 271 | 1 | elif word[-2] == 'o': |
|
| 272 | 1 | if word[-7:] == 'ization': |
|
| 273 | 1 | if len(word[r1_start:]) >= 7: |
|
| 274 | 1 | word = word[:-5] + 'e' |
|
| 275 | 1 | elif word[-5:] == 'ation': |
|
| 276 | 1 | if len(word[r1_start:]) >= 5: |
|
| 277 | 1 | word = word[:-3] + 'e' |
|
| 278 | 1 | elif word[-4:] == 'ator': |
|
| 279 | 1 | if len(word[r1_start:]) >= 4: |
|
| 280 | 1 | word = word[:-2] + 'e' |
|
| 281 | 1 | elif word[-2] == 's': |
|
| 282 | 1 | if word[-7:] in {'fulness', 'ousness', 'iveness'}: |
|
| 283 | 1 | if len(word[r1_start:]) >= 7: |
|
| 284 | 1 | word = word[:-4] |
|
| 285 | 1 | elif word[-5:] == 'alism': |
|
| 286 | 1 | if len(word[r1_start:]) >= 5: |
|
| 287 | 1 | word = word[:-3] |
|
| 288 | 1 | elif word[-2] == 't': |
|
| 289 | 1 | if word[-6:] == 'biliti': |
|
| 290 | 1 | if len(word[r1_start:]) >= 6: |
|
| 291 | 1 | word = word[:-5] + 'le' |
|
| 292 | 1 | elif word[-5:] == 'aliti': |
|
| 293 | 1 | if len(word[r1_start:]) >= 5: |
|
| 294 | 1 | word = word[:-3] |
|
| 295 | 1 | elif word[-5:] == 'iviti': |
|
| 296 | 1 | if len(word[r1_start:]) >= 5: |
|
| 297 | 1 | word = word[:-3] + 'e' |
|
| 298 | |||
| 299 | # Step 3 |
||
| 300 | 1 | if word[-7:] == 'ational': |
|
| 301 | 1 | if len(word[r1_start:]) >= 7: |
|
| 302 | 1 | word = word[:-5] + 'e' |
|
| 303 | 1 | elif word[-6:] == 'tional': |
|
| 304 | 1 | if len(word[r1_start:]) >= 6: |
|
| 305 | 1 | word = word[:-2] |
|
| 306 | 1 | elif word[-5:] in {'alize', 'icate', 'iciti'}: |
|
| 307 | 1 | if len(word[r1_start:]) >= 5: |
|
| 308 | 1 | word = word[:-3] |
|
| 309 | 1 | elif word[-5:] == 'ative': |
|
| 310 | 1 | if len(word[r2_start:]) >= 5: |
|
| 311 | 1 | word = word[:-5] |
|
| 312 | 1 | elif word[-4:] == 'ical': |
|
| 313 | 1 | if len(word[r1_start:]) >= 4: |
|
| 314 | 1 | word = word[:-2] |
|
| 315 | 1 | elif word[-4:] == 'ness': |
|
| 316 | 1 | if len(word[r1_start:]) >= 4: |
|
| 317 | 1 | word = word[:-4] |
|
| 318 | 1 | elif word[-3:] == 'ful': |
|
| 319 | 1 | if len(word[r1_start:]) >= 3: |
|
| 320 | 1 | word = word[:-3] |
|
| 321 | |||
| 322 | # Step 4 |
||
| 323 | 1 | for suffix in ( |
|
| 324 | 'ement', |
||
| 325 | 'ance', |
||
| 326 | 'ence', |
||
| 327 | 'able', |
||
| 328 | 'ible', |
||
| 329 | 'ment', |
||
| 330 | 'ant', |
||
| 331 | 'ent', |
||
| 332 | 'ism', |
||
| 333 | 'ate', |
||
| 334 | 'iti', |
||
| 335 | 'ous', |
||
| 336 | 'ive', |
||
| 337 | 'ize', |
||
| 338 | 'al', |
||
| 339 | 'er', |
||
| 340 | 'ic', |
||
| 341 | ): |
||
| 342 | 1 | if word[-len(suffix) :] == suffix: |
|
| 343 | 1 | if len(word[r2_start:]) >= len(suffix): |
|
| 344 | 1 | word = word[: -len(suffix)] |
|
| 345 | 1 | break |
|
| 346 | else: |
||
| 347 | 1 | if word[-3:] == 'ion': |
|
| 348 | 1 | if ( |
|
| 349 | len(word[r2_start:]) >= 3 |
||
| 350 | and len(word) >= 4 |
||
| 351 | and word[-4] in tuple('st') |
||
| 352 | ): |
||
| 353 | 1 | word = word[:-3] |
|
| 354 | |||
| 355 | # Step 5 |
||
| 356 | 1 | if word[-1] == 'e': |
|
| 357 | 1 | if len(word[r2_start:]) >= 1 or ( |
|
| 358 | len(word[r1_start:]) >= 1 |
||
| 359 | and not self._sb_ends_in_short_syllable(word[:-1]) |
||
| 360 | ): |
||
| 361 | 1 | word = word[:-1] |
|
| 362 | 1 | elif word[-1] == 'l': |
|
| 363 | 1 | if len(word[r2_start:]) >= 1 and word[-2] == 'l': |
|
| 364 | 1 | word = word[:-1] |
|
| 365 | |||
| 366 | # Change 'Y' back to 'y' if it survived stemming |
||
| 367 | 1 | for i in range(0, len(word)): |
|
| 368 | 1 | if word[i] == 'Y': |
|
| 369 | 1 | word = word[:i] + 'y' + word[i + 1 :] |
|
| 370 | |||
| 371 | 1 | return word |
|
| 372 | |||
| 406 |
This check looks for invalid names for a range of different identifiers.
You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.
If your project includes a Pylint configuration file, the settings contained in that file take precedence.
To find out more about Pylint, please refer to their site.