Conditions | 60 |
Total Lines | 141 |
Code Lines | 97 |
Lines | 0 |
Ratio | 0 % |
Tests | 88 |
CRAP Score | 60 |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like abydos.stemmer._snowball_german.SnowballGerman.stem() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | # -*- coding: utf-8 -*- |
||
51 | 1 | def stem(self, word, alternate_vowels=False): |
|
52 | """Return Snowball German stem. |
||
53 | |||
54 | Parameters |
||
55 | ---------- |
||
56 | word : str |
||
57 | The word to stem |
||
58 | alternate_vowels : bool |
||
59 | Composes ae as ä, oe as ö, and ue as ü before running the algorithm |
||
60 | |||
61 | Returns |
||
62 | ------- |
||
63 | str |
||
64 | Word stem |
||
65 | |||
66 | Examples |
||
67 | -------- |
||
68 | >>> stmr = SnowballGerman() |
||
69 | >>> stmr.stem('lesen') |
||
70 | 'les' |
||
71 | >>> stmr.stem('graues') |
||
72 | 'grau' |
||
73 | >>> stmr.stem('buchstabieren') |
||
74 | 'buchstabi' |
||
75 | |||
76 | """ |
||
77 | # lowercase, normalize, and compose |
||
78 | 1 | word = normalize('NFC', word.lower()) |
|
79 | 1 | word = word.replace('ß', 'ss') |
|
80 | |||
81 | 1 | if len(word) > 2: |
|
82 | 1 | for i in range(2, len(word)): |
|
83 | 1 | if word[i] in self._vowels and word[i - 2] in self._vowels: |
|
84 | 1 | if word[i - 1] == 'u': |
|
85 | 1 | word = word[: i - 1] + 'U' + word[i:] |
|
86 | 1 | elif word[i - 1] == 'y': |
|
87 | 1 | word = word[: i - 1] + 'Y' + word[i:] |
|
88 | |||
89 | 1 | if alternate_vowels: |
|
90 | 1 | word = word.replace('ae', 'ä') |
|
91 | 1 | word = word.replace('oe', 'ö') |
|
92 | 1 | word = word.replace('que', 'Q') |
|
93 | 1 | word = word.replace('ue', 'ü') |
|
94 | 1 | word = word.replace('Q', 'que') |
|
95 | |||
96 | 1 | r1_start = max(3, self._sb_r1(word)) |
|
97 | 1 | r2_start = self._sb_r2(word) |
|
98 | |||
99 | # Step 1 |
||
100 | 1 | niss_flag = False |
|
101 | 1 | if word[-3:] == 'ern': |
|
102 | 1 | if len(word[r1_start:]) >= 3: |
|
103 | 1 | word = word[:-3] |
|
104 | 1 | elif word[-2:] == 'em': |
|
105 | 1 | if len(word[r1_start:]) >= 2: |
|
106 | 1 | word = word[:-2] |
|
107 | 1 | elif word[-2:] == 'er': |
|
108 | 1 | if len(word[r1_start:]) >= 2: |
|
109 | 1 | word = word[:-2] |
|
110 | 1 | elif word[-2:] == 'en': |
|
111 | 1 | if len(word[r1_start:]) >= 2: |
|
112 | 1 | word = word[:-2] |
|
113 | 1 | niss_flag = True |
|
114 | 1 | elif word[-2:] == 'es': |
|
115 | 1 | if len(word[r1_start:]) >= 2: |
|
116 | 1 | word = word[:-2] |
|
117 | 1 | niss_flag = True |
|
118 | 1 | elif word[-1:] == 'e': |
|
119 | 1 | if len(word[r1_start:]) >= 1: |
|
120 | 1 | word = word[:-1] |
|
121 | 1 | niss_flag = True |
|
122 | 1 | elif word[-1:] == 's': |
|
123 | 1 | if ( |
|
124 | len(word[r1_start:]) >= 1 |
||
125 | and len(word) >= 2 |
||
126 | and word[-2] in self._s_endings |
||
127 | ): |
||
128 | 1 | word = word[:-1] |
|
129 | |||
130 | 1 | if niss_flag and word[-4:] == 'niss': |
|
131 | 1 | word = word[:-1] |
|
132 | |||
133 | # Step 2 |
||
134 | 1 | if word[-3:] == 'est': |
|
135 | 1 | if len(word[r1_start:]) >= 3: |
|
136 | 1 | word = word[:-3] |
|
137 | 1 | elif word[-2:] == 'en': |
|
138 | 1 | if len(word[r1_start:]) >= 2: |
|
139 | 1 | word = word[:-2] |
|
140 | 1 | elif word[-2:] == 'er': |
|
141 | 1 | if len(word[r1_start:]) >= 2: |
|
142 | 1 | word = word[:-2] |
|
143 | 1 | elif word[-2:] == 'st': |
|
144 | 1 | if ( |
|
145 | len(word[r1_start:]) >= 2 |
||
146 | and len(word) >= 6 |
||
147 | and word[-3] in self._st_endings |
||
148 | ): |
||
149 | 1 | word = word[:-2] |
|
150 | |||
151 | # Step 3 |
||
152 | 1 | if word[-4:] == 'isch': |
|
153 | 1 | if len(word[r2_start:]) >= 4 and word[-5] != 'e': |
|
154 | 1 | word = word[:-4] |
|
155 | 1 | elif word[-4:] in {'lich', 'heit'}: |
|
156 | 1 | if len(word[r2_start:]) >= 4: |
|
157 | 1 | word = word[:-4] |
|
158 | 1 | if word[-2:] in {'er', 'en'} and len(word[r1_start:]) >= 2: |
|
159 | 1 | word = word[:-2] |
|
160 | 1 | elif word[-4:] == 'keit': |
|
161 | 1 | if len(word[r2_start:]) >= 4: |
|
162 | 1 | word = word[:-4] |
|
163 | 1 | if word[-4:] == 'lich' and len(word[r2_start:]) >= 4: |
|
164 | 1 | word = word[:-4] |
|
165 | 1 | elif word[-2:] == 'ig' and len(word[r2_start:]) >= 2: |
|
166 | 1 | word = word[:-2] |
|
167 | 1 | elif word[-3:] in {'end', 'ung'}: |
|
168 | 1 | if len(word[r2_start:]) >= 3: |
|
169 | 1 | word = word[:-3] |
|
170 | 1 | if ( |
|
171 | word[-2:] == 'ig' |
||
172 | and len(word[r2_start:]) >= 2 |
||
173 | and word[-3] != 'e' |
||
174 | ): |
||
175 | 1 | word = word[:-2] |
|
176 | 1 | elif word[-2:] in {'ig', 'ik'}: |
|
177 | 1 | if len(word[r2_start:]) >= 2 and word[-3] != 'e': |
|
178 | 1 | word = word[:-2] |
|
179 | |||
180 | # Change 'Y' and 'U' back to lowercase if survived stemming |
||
181 | 1 | for i in range(0, len(word)): |
|
182 | 1 | if word[i] == 'Y': |
|
183 | 1 | word = word[:i] + 'y' + word[i + 1 :] |
|
184 | 1 | elif word[i] == 'U': |
|
185 | 1 | word = word[:i] + 'u' + word[i + 1 :] |
|
186 | |||
187 | # Remove umlauts |
||
188 | 1 | _umlauts = dict(zip((ord(_) for _ in 'äöü'), 'aou')) |
|
189 | 1 | word = word.translate(_umlauts) |
|
190 | |||
191 | 1 | return word |
|
192 | |||
228 |