Conditions | 60 |
Total Lines | 144 |
Code Lines | 97 |
Lines | 0 |
Ratio | 0 % |
Tests | 81 |
CRAP Score | 60 |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like abydos.stemmer._snowball_german.SnowballGerman.stem() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | # Copyright 2014-2020 by Christopher C. Little. |
||
56 | 1 | def stem(self, word: str) -> str: |
|
57 | """Return Snowball German stem. |
||
58 | |||
59 | Parameters |
||
60 | ---------- |
||
61 | word : str |
||
62 | The word to stem |
||
63 | |||
64 | Returns |
||
65 | ------- |
||
66 | str |
||
67 | Word stem |
||
68 | 1 | ||
69 | Examples |
||
70 | 1 | -------- |
|
71 | >>> stmr = SnowballGerman() |
||
72 | >>> stmr.stem('lesen') |
||
73 | 'les' |
||
74 | >>> stmr.stem('graues') |
||
75 | 'grau' |
||
76 | >>> stmr.stem('buchstabieren') |
||
77 | 'buchstabi' |
||
78 | |||
79 | |||
80 | .. versionadded:: 0.1.0 |
||
81 | .. versionchanged:: 0.3.6 |
||
82 | Encapsulated in class |
||
83 | |||
84 | """ |
||
85 | # lowercase, normalize, and compose |
||
86 | word = normalize('NFC', word.lower()) |
||
87 | word = word.replace('ß', 'ss') |
||
88 | |||
89 | if len(word) > 2: |
||
90 | for i in range(2, len(word)): |
||
91 | if word[i] in self._vowels and word[i - 2] in self._vowels: |
||
92 | if word[i - 1] == 'u': |
||
93 | word = word[: i - 1] + 'U' + word[i:] |
||
94 | elif word[i - 1] == 'y': |
||
95 | word = word[: i - 1] + 'Y' + word[i:] |
||
96 | |||
97 | if self._alternate_vowels: |
||
98 | word = word.replace('ae', 'ä') |
||
99 | word = word.replace('oe', 'ö') |
||
100 | 1 | word = word.replace('que', 'Q') |
|
101 | 1 | word = word.replace('ue', 'ü') |
|
102 | word = word.replace('Q', 'que') |
||
103 | 1 | ||
104 | 1 | r1_start = max(3, self._sb_r1(word)) |
|
105 | 1 | r2_start = self._sb_r2(word) |
|
106 | 1 | ||
107 | 1 | # Step 1 |
|
108 | 1 | niss_flag = False |
|
109 | 1 | if word[-3:] == 'ern': |
|
110 | if len(word[r1_start:]) >= 3: |
||
111 | 1 | word = word[:-3] |
|
112 | 1 | elif word[-2:] == 'em': |
|
113 | 1 | if len(word[r1_start:]) >= 2: |
|
114 | 1 | word = word[:-2] |
|
115 | 1 | elif word[-2:] == 'er': |
|
116 | 1 | if len(word[r1_start:]) >= 2: |
|
117 | word = word[:-2] |
||
118 | 1 | elif word[-2:] == 'en': |
|
119 | 1 | if len(word[r1_start:]) >= 2: |
|
120 | word = word[:-2] |
||
121 | niss_flag = True |
||
122 | 1 | elif word[-2:] == 'es': |
|
123 | 1 | if len(word[r1_start:]) >= 2: |
|
124 | 1 | word = word[:-2] |
|
125 | 1 | niss_flag = True |
|
126 | 1 | elif word[-1:] == 'e': |
|
127 | 1 | if len(word[r1_start:]) >= 1: |
|
128 | 1 | word = word[:-1] |
|
129 | 1 | niss_flag = True |
|
130 | 1 | elif word[-1:] == 's': |
|
131 | 1 | if ( |
|
132 | 1 | len(word[r1_start:]) >= 1 |
|
133 | 1 | and len(word) >= 2 |
|
134 | 1 | and word[-2] in self._s_endings |
|
135 | 1 | ): |
|
136 | 1 | word = word[:-1] |
|
137 | 1 | ||
138 | 1 | if niss_flag and word[-4:] == 'niss': |
|
139 | 1 | word = word[:-1] |
|
140 | 1 | ||
141 | 1 | # Step 2 |
|
142 | 1 | if word[-3:] == 'est': |
|
143 | 1 | if len(word[r1_start:]) >= 3: |
|
144 | 1 | word = word[:-3] |
|
145 | 1 | elif word[-2:] == 'en': |
|
146 | if len(word[r1_start:]) >= 2: |
||
147 | word = word[:-2] |
||
148 | elif word[-2:] == 'er': |
||
149 | if len(word[r1_start:]) >= 2: |
||
150 | 1 | word = word[:-2] |
|
151 | elif word[-2:] == 'st': |
||
152 | 1 | if ( |
|
153 | 1 | len(word[r1_start:]) >= 2 |
|
154 | and len(word) >= 6 |
||
155 | and word[-3] in self._st_endings |
||
156 | 1 | ): |
|
157 | 1 | word = word[:-2] |
|
158 | 1 | ||
159 | 1 | # Step 3 |
|
160 | 1 | if word[-4:] == 'isch': |
|
161 | 1 | if len(word[r2_start:]) >= 4 and word[-5] != 'e': |
|
162 | 1 | word = word[:-4] |
|
163 | 1 | elif word[-4:] in {'lich', 'heit'}: |
|
164 | 1 | if len(word[r2_start:]) >= 4: |
|
165 | 1 | word = word[:-4] |
|
166 | 1 | if word[-2:] in {'er', 'en'} and len(word[r1_start:]) >= 2: |
|
167 | word = word[:-2] |
||
168 | elif word[-4:] == 'keit': |
||
169 | if len(word[r2_start:]) >= 4: |
||
170 | word = word[:-4] |
||
171 | 1 | if word[-4:] == 'lich' and len(word[r2_start:]) >= 4: |
|
172 | word = word[:-4] |
||
173 | elif word[-2:] == 'ig' and len(word[r2_start:]) >= 2: |
||
174 | 1 | word = word[:-2] |
|
175 | 1 | elif word[-3:] in {'end', 'ung'}: |
|
176 | 1 | if len(word[r2_start:]) >= 3: |
|
177 | 1 | word = word[:-3] |
|
178 | 1 | if ( |
|
179 | 1 | word[-2:] == 'ig' |
|
180 | 1 | and len(word[r2_start:]) >= 2 |
|
181 | 1 | and word[-3] != 'e' |
|
182 | 1 | ): |
|
183 | 1 | word = word[:-2] |
|
184 | 1 | elif word[-2:] in {'ig', 'ik'}: |
|
185 | 1 | if len(word[r2_start:]) >= 2 and word[-3] != 'e': |
|
186 | 1 | word = word[:-2] |
|
187 | 1 | ||
188 | 1 | # Change 'Y' and 'U' back to lowercase if survived stemming |
|
189 | 1 | for i in range(0, len(word)): |
|
190 | 1 | if word[i] == 'Y': |
|
191 | 1 | word = word[:i] + 'y' + word[i + 1 :] |
|
192 | 1 | elif word[i] == 'U': |
|
193 | word = word[:i] + 'u' + word[i + 1 :] |
||
194 | |||
195 | # Remove umlauts |
||
196 | _umlauts = dict(zip((ord(_) for _ in 'äöü'), 'aou')) |
||
|
|||
197 | 1 | word = word.translate(_umlauts) |
|
198 | 1 | ||
199 | 1 | return word |
|
200 | 1 | ||
206 |