| Conditions | 14 |
| Total Lines | 124 |
| Code Lines | 71 |
| Lines | 0 |
| Ratio | 0 % |
| Tests | 36 |
| CRAP Score | 14 |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like abydos.stemmer._schinke.Schinke.stem_dict() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | # Copyright 2014-2020 by Christopher C. Little. |
||
| 170 | def stem_dict(self, word: str) -> Dict[str, str]: |
||
| 171 | """Return the stem of a word according to the Schinke stemmer. |
||
| 172 | |||
| 173 | Parameters |
||
| 174 | ---------- |
||
| 175 | word : str |
||
| 176 | The word to stem |
||
| 177 | |||
| 178 | Returns |
||
| 179 | 1 | ------- |
|
| 180 | 1 | dict |
|
| 181 | Word stems in a dictionary |
||
| 182 | |||
| 183 | Examples |
||
| 184 | -------- |
||
| 185 | >>> stmr = Schinke() |
||
| 186 | >>> stmr.stem_dict('atque') |
||
| 187 | {'n': 'atque', 'v': 'atque'} |
||
| 188 | >>> stmr.stem_dict('census') |
||
| 189 | {'n': 'cens', 'v': 'censu'} |
||
| 190 | >>> stmr.stem_dict('virum') |
||
| 191 | {'n': 'uir', 'v': 'uiru'} |
||
| 192 | >>> stmr.stem_dict('populusque') |
||
| 193 | {'n': 'popul', 'v': 'populu'} |
||
| 194 | >>> stmr.stem_dict('senatus') |
||
| 195 | {'n': 'senat', 'v': 'senatu'} |
||
| 196 | |||
| 197 | |||
| 198 | .. versionadded:: 0.6.0 |
||
| 199 | |||
| 200 | """ |
||
| 201 | word = normalize('NFKD', word.lower()) |
||
| 202 | word = ''.join( |
||
| 203 | c |
||
| 204 | for c in word |
||
| 205 | if c |
||
| 206 | in { |
||
| 207 | 'a', |
||
| 208 | 'b', |
||
| 209 | 'c', |
||
| 210 | 'd', |
||
| 211 | 'e', |
||
| 212 | 'f', |
||
| 213 | 'g', |
||
| 214 | 'h', |
||
| 215 | 1 | 'i', |
|
| 216 | 'j', |
||
| 217 | 'k', |
||
| 218 | 1 | 'l', |
|
| 219 | 'm', |
||
| 220 | 'n', |
||
| 221 | 1 | 'o', |
|
| 222 | 1 | 'p', |
|
| 223 | 'q', |
||
| 224 | 1 | 'r', |
|
| 225 | 's', |
||
| 226 | 't', |
||
| 227 | 1 | 'u', |
|
| 228 | 1 | 'v', |
|
| 229 | 'w', |
||
| 230 | 'x', |
||
| 231 | 1 | 'y', |
|
| 232 | 1 | 'z', |
|
| 233 | 1 | } |
|
| 234 | 1 | ) |
|
| 235 | |||
| 236 | 1 | # Rule 2 |
|
| 237 | 1 | word = word.replace('j', 'i').replace('v', 'u') |
|
| 238 | |||
| 239 | 1 | # Rule 3 |
|
| 240 | 1 | if word[-3:] == 'que': |
|
| 241 | 1 | # This diverges from the paper by also returning 'que' itself |
|
| 242 | 1 | # unstemmed |
|
| 243 | if word[:-3] in self._keep_que or word == 'que': |
||
| 244 | 1 | return {'n': word, 'v': word} |
|
| 245 | 1 | else: |
|
| 246 | 1 | word = word[:-3] |
|
| 247 | 1 | ||
| 248 | # Base case will mean returning the words as is |
||
| 249 | noun = word |
||
| 250 | verb = word |
||
| 251 | |||
| 252 | # Rule 4 |
||
| 253 | for endlen in range(4, 0, -1): |
||
| 254 | 1 | if word[-endlen:] in self._n_endings[endlen]: |
|
| 255 | 1 | if len(word) - 2 >= endlen: |
|
| 256 | 1 | noun = word[:-endlen] |
|
| 257 | 1 | else: |
|
| 258 | 1 | noun = word |
|
| 259 | break |
||
| 260 | 1 | ||
| 261 | 1 | for endlen in range(6, 0, -1): |
|
| 262 | if word[-endlen:] in self._v_endings_strip[endlen]: |
||
| 263 | if len(word) - 2 >= endlen: |
||
| 264 | verb = word[:-endlen] |
||
| 265 | 1 | else: |
|
| 266 | 1 | verb = word |
|
| 267 | break |
||
| 268 | 1 | if word[-endlen:] in self._v_endings_alter[endlen]: |
|
| 269 | 1 | if word[-endlen:] in { |
|
| 270 | 'iuntur', |
||
| 271 | 1 | 'erunt', |
|
| 272 | 'untur', |
||
| 273 | 'iunt', |
||
| 274 | 1 | 'unt', |
|
| 275 | }: |
||
| 276 | new_word = word[:-endlen] + 'i' |
||
| 277 | addlen = 1 |
||
| 278 | elif word[-endlen:] in {'beris', 'bor', 'bo'}: |
||
| 279 | new_word = word[:-endlen] + 'bi' |
||
| 280 | addlen = 2 |
||
| 281 | else: |
||
| 282 | new_word = word[:-endlen] + 'eri' |
||
| 283 | addlen = 3 |
||
| 284 | |||
| 285 | # Technically this diverges from the paper by considering the |
||
| 286 | # length of the stem without the new suffix |
||
| 287 | if len(new_word) >= 2 + addlen: |
||
| 288 | verb = new_word |
||
| 289 | else: |
||
| 290 | verb = word |
||
| 291 | break |
||
| 292 | |||
| 293 | return {'n': noun, 'v': verb} |
||
| 294 | |||
| 300 |