| Conditions | 60 |
| Total Lines | 136 |
| Code Lines | 97 |
| Lines | 0 |
| Ratio | 0 % |
| Tests | 88 |
| CRAP Score | 60 |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like abydos.stemmer._snowball_german.SnowballGerman.stem() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | # -*- coding: utf-8 -*- |
||
| 51 | 1 | def stem(self, word, alternate_vowels=False): |
|
| 52 | """Return Snowball German stem. |
||
| 53 | |||
| 54 | Args: |
||
| 55 | word (str): The word to stem |
||
| 56 | alternate_vowels (bool): composes ae as ä, oe as ö, and ue as ü |
||
| 57 | before running the algorithm |
||
| 58 | |||
| 59 | Returns: |
||
| 60 | str: Word stem |
||
| 61 | |||
| 62 | Examples: |
||
| 63 | >>> stmr = SnowballGerman() |
||
| 64 | >>> stmr.stem('lesen') |
||
| 65 | 'les' |
||
| 66 | >>> stmr.stem('graues') |
||
| 67 | 'grau' |
||
| 68 | >>> stmr.stem('buchstabieren') |
||
| 69 | 'buchstabi' |
||
| 70 | |||
| 71 | """ |
||
| 72 | # lowercase, normalize, and compose |
||
| 73 | 1 | word = normalize('NFC', word.lower()) |
|
| 74 | 1 | word = word.replace('ß', 'ss') |
|
| 75 | |||
| 76 | 1 | if len(word) > 2: |
|
| 77 | 1 | for i in range(2, len(word)): |
|
| 78 | 1 | if word[i] in self._vowels and word[i - 2] in self._vowels: |
|
| 79 | 1 | if word[i - 1] == 'u': |
|
| 80 | 1 | word = word[: i - 1] + 'U' + word[i:] |
|
| 81 | 1 | elif word[i - 1] == 'y': |
|
| 82 | 1 | word = word[: i - 1] + 'Y' + word[i:] |
|
| 83 | |||
| 84 | 1 | if alternate_vowels: |
|
| 85 | 1 | word = word.replace('ae', 'ä') |
|
| 86 | 1 | word = word.replace('oe', 'ö') |
|
| 87 | 1 | word = word.replace('que', 'Q') |
|
| 88 | 1 | word = word.replace('ue', 'ü') |
|
| 89 | 1 | word = word.replace('Q', 'que') |
|
| 90 | |||
| 91 | 1 | r1_start = max(3, self._sb_r1(word)) |
|
| 92 | 1 | r2_start = self._sb_r2(word) |
|
| 93 | |||
| 94 | # Step 1 |
||
| 95 | 1 | niss_flag = False |
|
| 96 | 1 | if word[-3:] == 'ern': |
|
| 97 | 1 | if len(word[r1_start:]) >= 3: |
|
| 98 | 1 | word = word[:-3] |
|
| 99 | 1 | elif word[-2:] == 'em': |
|
| 100 | 1 | if len(word[r1_start:]) >= 2: |
|
| 101 | 1 | word = word[:-2] |
|
| 102 | 1 | elif word[-2:] == 'er': |
|
| 103 | 1 | if len(word[r1_start:]) >= 2: |
|
| 104 | 1 | word = word[:-2] |
|
| 105 | 1 | elif word[-2:] == 'en': |
|
| 106 | 1 | if len(word[r1_start:]) >= 2: |
|
| 107 | 1 | word = word[:-2] |
|
| 108 | 1 | niss_flag = True |
|
| 109 | 1 | elif word[-2:] == 'es': |
|
| 110 | 1 | if len(word[r1_start:]) >= 2: |
|
| 111 | 1 | word = word[:-2] |
|
| 112 | 1 | niss_flag = True |
|
| 113 | 1 | elif word[-1:] == 'e': |
|
| 114 | 1 | if len(word[r1_start:]) >= 1: |
|
| 115 | 1 | word = word[:-1] |
|
| 116 | 1 | niss_flag = True |
|
| 117 | 1 | elif word[-1:] == 's': |
|
| 118 | 1 | if ( |
|
| 119 | len(word[r1_start:]) >= 1 |
||
| 120 | and len(word) >= 2 |
||
| 121 | and word[-2] in self._s_endings |
||
| 122 | ): |
||
| 123 | 1 | word = word[:-1] |
|
| 124 | |||
| 125 | 1 | if niss_flag and word[-4:] == 'niss': |
|
| 126 | 1 | word = word[:-1] |
|
| 127 | |||
| 128 | # Step 2 |
||
| 129 | 1 | if word[-3:] == 'est': |
|
| 130 | 1 | if len(word[r1_start:]) >= 3: |
|
| 131 | 1 | word = word[:-3] |
|
| 132 | 1 | elif word[-2:] == 'en': |
|
| 133 | 1 | if len(word[r1_start:]) >= 2: |
|
| 134 | 1 | word = word[:-2] |
|
| 135 | 1 | elif word[-2:] == 'er': |
|
| 136 | 1 | if len(word[r1_start:]) >= 2: |
|
| 137 | 1 | word = word[:-2] |
|
| 138 | 1 | elif word[-2:] == 'st': |
|
| 139 | 1 | if ( |
|
| 140 | len(word[r1_start:]) >= 2 |
||
| 141 | and len(word) >= 6 |
||
| 142 | and word[-3] in self._st_endings |
||
| 143 | ): |
||
| 144 | 1 | word = word[:-2] |
|
| 145 | |||
| 146 | # Step 3 |
||
| 147 | 1 | if word[-4:] == 'isch': |
|
| 148 | 1 | if len(word[r2_start:]) >= 4 and word[-5] != 'e': |
|
| 149 | 1 | word = word[:-4] |
|
| 150 | 1 | elif word[-4:] in {'lich', 'heit'}: |
|
| 151 | 1 | if len(word[r2_start:]) >= 4: |
|
| 152 | 1 | word = word[:-4] |
|
| 153 | 1 | if word[-2:] in {'er', 'en'} and len(word[r1_start:]) >= 2: |
|
| 154 | 1 | word = word[:-2] |
|
| 155 | 1 | elif word[-4:] == 'keit': |
|
| 156 | 1 | if len(word[r2_start:]) >= 4: |
|
| 157 | 1 | word = word[:-4] |
|
| 158 | 1 | if word[-4:] == 'lich' and len(word[r2_start:]) >= 4: |
|
| 159 | 1 | word = word[:-4] |
|
| 160 | 1 | elif word[-2:] == 'ig' and len(word[r2_start:]) >= 2: |
|
| 161 | 1 | word = word[:-2] |
|
| 162 | 1 | elif word[-3:] in {'end', 'ung'}: |
|
| 163 | 1 | if len(word[r2_start:]) >= 3: |
|
| 164 | 1 | word = word[:-3] |
|
| 165 | 1 | if ( |
|
| 166 | word[-2:] == 'ig' |
||
| 167 | and len(word[r2_start:]) >= 2 |
||
| 168 | and word[-3] != 'e' |
||
| 169 | ): |
||
| 170 | 1 | word = word[:-2] |
|
| 171 | 1 | elif word[-2:] in {'ig', 'ik'}: |
|
| 172 | 1 | if len(word[r2_start:]) >= 2 and word[-3] != 'e': |
|
| 173 | 1 | word = word[:-2] |
|
| 174 | |||
| 175 | # Change 'Y' and 'U' back to lowercase if survived stemming |
||
| 176 | 1 | for i in range(0, len(word)): |
|
| 177 | 1 | if word[i] == 'Y': |
|
| 178 | 1 | word = word[:i] + 'y' + word[i + 1 :] |
|
| 179 | 1 | elif word[i] == 'U': |
|
| 180 | 1 | word = word[:i] + 'u' + word[i + 1 :] |
|
| 181 | |||
| 182 | # Remove umlauts |
||
| 183 | 1 | _umlauts = dict(zip((ord(_) for _ in 'äöü'), 'aou')) |
|
| 184 | 1 | word = word.translate(_umlauts) |
|
| 185 | |||
| 186 | 1 | return word |
|
| 187 | |||
| 218 |