| Conditions | 116 |
| Total Lines | 233 |
| Code Lines | 176 |
| Lines | 0 |
| Ratio | 0 % |
| Tests | 172 |
| CRAP Score | 116 |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like abydos.stemmer._porter.Porter.stem() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | # -*- coding: utf-8 -*- |
||
| 139 | 1 | def stem(self, word, early_english=False): |
|
| 140 | """Return Porter stem. |
||
| 141 | |||
| 142 | Parameters |
||
| 143 | ---------- |
||
| 144 | word : str |
||
| 145 | The word to stem |
||
| 146 | early_english : bool |
||
| 147 | Set to True in order to remove -eth & -est (2nd & 3rd person |
||
| 148 | singular verbal agreement suffixes) |
||
| 149 | |||
| 150 | Returns |
||
| 151 | ------- |
||
| 152 | str |
||
| 153 | Word stem |
||
| 154 | |||
| 155 | Examples |
||
| 156 | -------- |
||
| 157 | >>> stmr = Porter() |
||
| 158 | >>> stmr.stem('reading') |
||
| 159 | 'read' |
||
| 160 | >>> stmr.stem('suspension') |
||
| 161 | 'suspens' |
||
| 162 | >>> stmr.stem('elusiveness') |
||
| 163 | 'elus' |
||
| 164 | |||
| 165 | >>> stmr.stem('eateth', early_english=True) |
||
| 166 | 'eat' |
||
| 167 | |||
| 168 | """ |
||
| 169 | # lowercase, normalize, and compose |
||
| 170 | 1 | word = normalize('NFC', text_type(word.lower())) |
|
| 171 | |||
| 172 | # Return word if stem is shorter than 2 |
||
| 173 | 1 | if len(word) < 3: |
|
| 174 | 1 | return word |
|
| 175 | |||
| 176 | # Re-map consonantal y to Y (Y will be C, y will be V) |
||
| 177 | 1 | if word[0] == 'y': |
|
| 178 | 1 | word = 'Y' + word[1:] |
|
| 179 | 1 | for i in range(1, len(word)): |
|
| 180 | 1 | if word[i] == 'y' and word[i - 1] in self._vowels: |
|
| 181 | 1 | word = word[:i] + 'Y' + word[i + 1 :] |
|
| 182 | |||
| 183 | # Step 1a |
||
| 184 | 1 | if word[-1] == 's': |
|
| 185 | 1 | if word[-4:] == 'sses': |
|
| 186 | 1 | word = word[:-2] |
|
| 187 | 1 | elif word[-3:] == 'ies': |
|
| 188 | 1 | word = word[:-2] |
|
| 189 | 1 | elif word[-2:] == 'ss': |
|
| 190 | 1 | pass |
|
| 191 | else: |
||
| 192 | 1 | word = word[:-1] |
|
| 193 | |||
| 194 | # Step 1b |
||
| 195 | 1 | step1b_flag = False |
|
| 196 | 1 | if word[-3:] == 'eed': |
|
| 197 | 1 | if self._m_degree(word[:-3]) > 0: |
|
| 198 | 1 | word = word[:-1] |
|
| 199 | 1 | elif word[-2:] == 'ed': |
|
| 200 | 1 | if self._has_vowel(word[:-2]): |
|
| 201 | 1 | word = word[:-2] |
|
| 202 | 1 | step1b_flag = True |
|
| 203 | 1 | elif word[-3:] == 'ing': |
|
| 204 | 1 | if self._has_vowel(word[:-3]): |
|
| 205 | 1 | word = word[:-3] |
|
| 206 | 1 | step1b_flag = True |
|
| 207 | 1 | elif early_english: |
|
| 208 | 1 | if word[-3:] == 'est': |
|
| 209 | 1 | if self._has_vowel(word[:-3]): |
|
| 210 | 1 | word = word[:-3] |
|
| 211 | 1 | step1b_flag = True |
|
| 212 | 1 | elif word[-3:] == 'eth': |
|
| 213 | 1 | if self._has_vowel(word[:-3]): |
|
| 214 | 1 | word = word[:-3] |
|
| 215 | 1 | step1b_flag = True |
|
| 216 | |||
| 217 | 1 | if step1b_flag: |
|
| 218 | 1 | if word[-2:] in {'at', 'bl', 'iz'}: |
|
| 219 | 1 | word += 'e' |
|
| 220 | 1 | elif self._ends_in_doubled_cons(word) and word[-1] not in { |
|
| 221 | 'l', |
||
| 222 | 's', |
||
| 223 | 'z', |
||
| 224 | }: |
||
| 225 | 1 | word = word[:-1] |
|
| 226 | 1 | elif self._m_degree(word) == 1 and self._ends_in_cvc(word): |
|
| 227 | 1 | word += 'e' |
|
| 228 | |||
| 229 | # Step 1c |
||
| 230 | 1 | if word[-1] in {'Y', 'y'} and self._has_vowel(word[:-1]): |
|
| 231 | 1 | word = word[:-1] + 'i' |
|
| 232 | |||
| 233 | # Step 2 |
||
| 234 | 1 | if len(word) > 1: |
|
| 235 | 1 | if word[-2] == 'a': |
|
| 236 | 1 | if word[-7:] == 'ational': |
|
| 237 | 1 | if self._m_degree(word[:-7]) > 0: |
|
| 238 | 1 | word = word[:-5] + 'e' |
|
| 239 | 1 | elif word[-6:] == 'tional': |
|
| 240 | 1 | if self._m_degree(word[:-6]) > 0: |
|
| 241 | 1 | word = word[:-2] |
|
| 242 | 1 | elif word[-2] == 'c': |
|
| 243 | 1 | if word[-4:] in {'enci', 'anci'}: |
|
| 244 | 1 | if self._m_degree(word[:-4]) > 0: |
|
| 245 | 1 | word = word[:-1] + 'e' |
|
| 246 | 1 | elif word[-2] == 'e': |
|
| 247 | 1 | if word[-4:] == 'izer': |
|
| 248 | 1 | if self._m_degree(word[:-4]) > 0: |
|
| 249 | 1 | word = word[:-1] |
|
| 250 | 1 | elif word[-2] == 'g': |
|
| 251 | 1 | if word[-4:] == 'logi': |
|
| 252 | 1 | if self._m_degree(word[:-4]) > 0: |
|
| 253 | 1 | word = word[:-1] |
|
| 254 | 1 | elif word[-2] == 'l': |
|
| 255 | 1 | if word[-3:] == 'bli': |
|
| 256 | 1 | if self._m_degree(word[:-3]) > 0: |
|
| 257 | 1 | word = word[:-1] + 'e' |
|
| 258 | 1 | elif word[-4:] == 'alli': |
|
| 259 | 1 | if self._m_degree(word[:-4]) > 0: |
|
| 260 | 1 | word = word[:-2] |
|
| 261 | 1 | elif word[-5:] == 'entli': |
|
| 262 | 1 | if self._m_degree(word[:-5]) > 0: |
|
| 263 | 1 | word = word[:-2] |
|
| 264 | 1 | elif word[-3:] == 'eli': |
|
| 265 | 1 | if self._m_degree(word[:-3]) > 0: |
|
| 266 | 1 | word = word[:-2] |
|
| 267 | 1 | elif word[-5:] == 'ousli': |
|
| 268 | 1 | if self._m_degree(word[:-5]) > 0: |
|
| 269 | 1 | word = word[:-2] |
|
| 270 | 1 | elif word[-2] == 'o': |
|
| 271 | 1 | if word[-7:] == 'ization': |
|
| 272 | 1 | if self._m_degree(word[:-7]) > 0: |
|
| 273 | 1 | word = word[:-5] + 'e' |
|
| 274 | 1 | elif word[-5:] == 'ation': |
|
| 275 | 1 | if self._m_degree(word[:-5]) > 0: |
|
| 276 | 1 | word = word[:-3] + 'e' |
|
| 277 | 1 | elif word[-4:] == 'ator': |
|
| 278 | 1 | if self._m_degree(word[:-4]) > 0: |
|
| 279 | 1 | word = word[:-2] + 'e' |
|
| 280 | 1 | elif word[-2] == 's': |
|
| 281 | 1 | if word[-5:] == 'alism': |
|
| 282 | 1 | if self._m_degree(word[:-5]) > 0: |
|
| 283 | 1 | word = word[:-3] |
|
| 284 | 1 | elif word[-7:] in {'iveness', 'fulness', 'ousness'}: |
|
| 285 | 1 | if self._m_degree(word[:-7]) > 0: |
|
| 286 | 1 | word = word[:-4] |
|
| 287 | 1 | elif word[-2] == 't': |
|
| 288 | 1 | if word[-5:] == 'aliti': |
|
| 289 | 1 | if self._m_degree(word[:-5]) > 0: |
|
| 290 | 1 | word = word[:-3] |
|
| 291 | 1 | elif word[-5:] == 'iviti': |
|
| 292 | 1 | if self._m_degree(word[:-5]) > 0: |
|
| 293 | 1 | word = word[:-3] + 'e' |
|
| 294 | 1 | elif word[-6:] == 'biliti': |
|
| 295 | 1 | if self._m_degree(word[:-6]) > 0: |
|
| 296 | 1 | word = word[:-5] + 'le' |
|
| 297 | |||
| 298 | # Step 3 |
||
| 299 | 1 | if word[-5:] in 'icate': |
|
| 300 | 1 | if self._m_degree(word[:-5]) > 0: |
|
| 301 | 1 | word = word[:-3] |
|
| 302 | 1 | elif word[-5:] == 'ative': |
|
| 303 | 1 | if self._m_degree(word[:-5]) > 0: |
|
| 304 | 1 | word = word[:-5] |
|
| 305 | 1 | elif word[-5:] in {'alize', 'iciti'}: |
|
| 306 | 1 | if self._m_degree(word[:-5]) > 0: |
|
| 307 | 1 | word = word[:-3] |
|
| 308 | 1 | elif word[-4:] == 'ical': |
|
| 309 | 1 | if self._m_degree(word[:-4]) > 0: |
|
| 310 | 1 | word = word[:-2] |
|
| 311 | 1 | elif word[-3:] == 'ful': |
|
| 312 | 1 | if self._m_degree(word[:-3]) > 0: |
|
| 313 | 1 | word = word[:-3] |
|
| 314 | 1 | elif word[-4:] == 'ness': |
|
| 315 | 1 | if self._m_degree(word[:-4]) > 0: |
|
| 316 | 1 | word = word[:-4] |
|
| 317 | |||
| 318 | # Step 4 |
||
| 319 | 1 | if word[-2:] == 'al': |
|
| 320 | 1 | if self._m_degree(word[:-2]) > 1: |
|
| 321 | 1 | word = word[:-2] |
|
| 322 | 1 | elif word[-4:] in {'ance', 'ence'}: |
|
| 323 | 1 | if self._m_degree(word[:-4]) > 1: |
|
| 324 | 1 | word = word[:-4] |
|
| 325 | 1 | elif word[-2:] in {'er', 'ic'}: |
|
| 326 | 1 | if self._m_degree(word[:-2]) > 1: |
|
| 327 | 1 | word = word[:-2] |
|
| 328 | 1 | elif word[-4:] in {'able', 'ible'}: |
|
| 329 | 1 | if self._m_degree(word[:-4]) > 1: |
|
| 330 | 1 | word = word[:-4] |
|
| 331 | 1 | elif word[-3:] == 'ant': |
|
| 332 | 1 | if self._m_degree(word[:-3]) > 1: |
|
| 333 | 1 | word = word[:-3] |
|
| 334 | 1 | elif word[-5:] == 'ement': |
|
| 335 | 1 | if self._m_degree(word[:-5]) > 1: |
|
| 336 | 1 | word = word[:-5] |
|
| 337 | 1 | elif word[-4:] == 'ment': |
|
| 338 | 1 | if self._m_degree(word[:-4]) > 1: |
|
| 339 | 1 | word = word[:-4] |
|
| 340 | 1 | elif word[-3:] == 'ent': |
|
| 341 | 1 | if self._m_degree(word[:-3]) > 1: |
|
| 342 | 1 | word = word[:-3] |
|
| 343 | 1 | elif word[-4:] in {'sion', 'tion'}: |
|
| 344 | 1 | if self._m_degree(word[:-3]) > 1: |
|
| 345 | 1 | word = word[:-3] |
|
| 346 | 1 | elif word[-2:] == 'ou': |
|
| 347 | 1 | if self._m_degree(word[:-2]) > 1: |
|
| 348 | 1 | word = word[:-2] |
|
| 349 | 1 | elif word[-3:] in {'ism', 'ate', 'iti', 'ous', 'ive', 'ize'}: |
|
| 350 | 1 | if self._m_degree(word[:-3]) > 1: |
|
| 351 | 1 | word = word[:-3] |
|
| 352 | |||
| 353 | # Step 5a |
||
| 354 | 1 | if word[-1] == 'e': |
|
| 355 | 1 | if self._m_degree(word[:-1]) > 1: |
|
| 356 | 1 | word = word[:-1] |
|
| 357 | 1 | elif self._m_degree(word[:-1]) == 1 and not self._ends_in_cvc( |
|
| 358 | word[:-1] |
||
| 359 | ): |
||
| 360 | 1 | word = word[:-1] |
|
| 361 | |||
| 362 | # Step 5b |
||
| 363 | 1 | if word[-2:] == 'll' and self._m_degree(word) > 1: |
|
| 364 | 1 | word = word[:-1] |
|
| 365 | |||
| 366 | # Change 'Y' back to 'y' if it survived stemming |
||
| 367 | 1 | for i in range(len(word)): |
|
| 368 | 1 | if word[i] == 'Y': |
|
| 369 | 1 | word = word[:i] + 'y' + word[i + 1 :] |
|
| 370 | |||
| 371 | 1 | return word |
|
| 372 | |||
| 412 |