| 1 |  |  | # -*- coding: utf-8 -*- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | # Copyright 2014-2018 by Christopher C. Little. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | # This file is part of Abydos. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | # | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  | # Abydos is free software: you can redistribute it and/or modify | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | # it under the terms of the GNU General Public License as published by | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  | # the Free Software Foundation, either version 3 of the License, or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | # (at your option) any later version. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | # | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  | # Abydos is distributed in the hope that it will be useful, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  | # but WITHOUT ANY WARRANTY; without even the implied warranty of | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  | # GNU General Public License for more details. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  | # | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  | # You should have received a copy of the GNU General Public License | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  | # along with Abydos. If not, see <http://www.gnu.org/licenses/>. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 | 1 |  | """abydos.stemmer._snowball_dutch. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  | Snowball Dutch stemmer | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  | """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 | 1 |  | from __future__ import ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |     absolute_import, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |     division, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |     print_function, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |     unicode_literals, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  | ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 | 1 |  | from unicodedata import normalize | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 | 1 |  | from six import text_type | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 | 1 |  | from six.moves import range | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 | 1 |  | from ._snowball import _Snowball | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 | 1 |  | __all__ = ['SnowballDutch', 'sb_dutch'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 | 1 |  | class SnowballDutch(_Snowball): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |     """Snowball Dutch stemmer. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |     The Snowball Dutch stemmer is defined at: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |     http://snowball.tartarus.org/algorithms/dutch/stemmer.html | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 | 1 |  |     _vowels = {'a', 'e', 'i', 'o', 'u', 'y', 'è'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 | 1 |  |     _not_s_endings = {'a', 'e', 'i', 'j', 'o', 'u', 'y', 'è'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 | 1 |  |     _accented = dict(zip((ord(_) for _ in 'äëïöüáéíóú'), 'aeiouaeiou')) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 | 1 |  |     def _undouble(self, word): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |         """Undouble endings -kk, -dd, and -tt. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |         Parameters | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |         ---------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |         word : str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |           The word to stem | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |         Returns | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |         ------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |         str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |             The word with doubled endings undoubled | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 | 1 |  |         if ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |             len(word) > 1 | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |             and word[-1] == word[-2] | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |             and word[-1] in {'d', 'k', 't'} | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |         ): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 | 1 |  |             return word[:-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 | 1 |  |         return word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 | 1 |  |     def stem(self, word): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |         """Return Snowball Dutch stem. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |         Parameters | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |         ---------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |         word : str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |             The word to stem | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |         Returns | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |         ------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |         str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |             Word stem | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |         Examples | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |         -------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |         >>> stmr = SnowballDutch() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |         >>> stmr.stem('lezen') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |         'lez' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |         >>> stmr.stem('opschorting') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |         'opschort' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |         >>> stmr.stem('ongrijpbaarheid') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |         'ongrijp' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |         # lowercase, normalize, decompose, filter umlauts & acutes out, and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |         # compose | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 | 1 |  |         word = normalize('NFC', text_type(word.lower())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 | 1 |  |         word = word.translate(self._accented) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 | 1 |  |         for i in range(len(word)): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 | 1 |  |             if i == 0 and word[0] == 'y': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 | 1 |  |                 word = 'Y' + word[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 | 1 |  |             elif word[i] == 'y' and word[i - 1] in self._vowels: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 | 1 |  |                 word = word[:i] + 'Y' + word[i + 1 :] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 | 1 |  |             elif ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |                 word[i] == 'i' | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |                 and word[i - 1] in self._vowels | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |                 and i + 1 < len(word) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |                 and word[i + 1] in self._vowels | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |             ): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 | 1 |  |                 word = word[:i] + 'I' + word[i + 1 :] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 | 1 |  |         r1_start = max(3, self._sb_r1(word)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 | 1 |  |         r2_start = self._sb_r2(word) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |         # Step 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 | 1 |  |         if word[-5:] == 'heden': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 | 1 |  |             if len(word[r1_start:]) >= 5: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 | 1 |  |                 word = word[:-3] + 'id' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 | 1 |  |         elif word[-3:] == 'ene': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 | 1 |  |             if len(word[r1_start:]) >= 3 and ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |                 word[-4] not in self._vowels and word[-6:-3] != 'gem' | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |             ): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 | 1 |  |                 word = self._undouble(word[:-3]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 | 1 |  |         elif word[-2:] == 'en': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 | 1 |  |             if len(word[r1_start:]) >= 2 and ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |                 word[-3] not in self._vowels and word[-5:-2] != 'gem' | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |             ): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 | 1 |  |                 word = self._undouble(word[:-2]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 | 1 |  |         elif word[-2:] == 'se': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 | 1 |  |             if ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |                 len(word[r1_start:]) >= 2 | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |                 and word[-3] not in self._not_s_endings | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |             ): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 | 1 |  |                 word = word[:-2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 | 1 |  |         elif word[-1:] == 's': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 | 1 |  |             if ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |                 len(word[r1_start:]) >= 1 | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |                 and word[-2] not in self._not_s_endings | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  |             ): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 | 1 |  |                 word = word[:-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 |  |  |         # Step 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 | 1 |  |         e_removed = False | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 | 1 |  |         if word[-1:] == 'e': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 | 1 |  |             if len(word[r1_start:]) >= 1 and word[-2] not in self._vowels: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 | 1 |  |                 word = self._undouble(word[:-1]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 | 1 |  |                 e_removed = True | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |         # Step 3a | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 | 1 |  |         if word[-4:] == 'heid': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 | 1 |  |             if len(word[r2_start:]) >= 4 and word[-5] != 'c': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 | 1 |  |                 word = word[:-4] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 | 1 |  |                 if word[-2:] == 'en': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 | 1 |  |                     if len(word[r1_start:]) >= 2 and ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |                         word[-3] not in self._vowels and word[-5:-2] != 'gem' | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  |                     ): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 | 1 |  |                         word = self._undouble(word[:-2]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 |  |  |         # Step 3b | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 | 1 |  |         if word[-4:] == 'lijk': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 | 1 |  |             if len(word[r2_start:]) >= 4: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 | 1 |  |                 word = word[:-4] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 |  |  |                 # Repeat step 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 168 | 1 |  |                 if word[-1:] == 'e': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 169 | 1 |  |                     if ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 170 |  |  |                         len(word[r1_start:]) >= 1 | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 171 |  |  |                         and word[-2] not in self._vowels | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 172 |  |  |                     ): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 173 | 1 |  |                         word = self._undouble(word[:-1]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 174 | 1 |  |         elif word[-4:] == 'baar': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 175 | 1 |  |             if len(word[r2_start:]) >= 4: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 176 | 1 |  |                 word = word[:-4] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 177 | 1 |  |         elif word[-3:] in ('end', 'ing'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 178 | 1 |  |             if len(word[r2_start:]) >= 3: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 179 | 1 |  |                 word = word[:-3] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 180 | 1 |  |                 if ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 181 |  |  |                     word[-2:] == 'ig' | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 182 |  |  |                     and len(word[r2_start:]) >= 2 | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 183 |  |  |                     and word[-3] != 'e' | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 184 |  |  |                 ): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 185 | 1 |  |                     word = word[:-2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 186 |  |  |                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 187 | 1 |  |                     word = self._undouble(word) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 188 | 1 |  |         elif word[-3:] == 'bar': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 189 | 1 |  |             if len(word[r2_start:]) >= 3 and e_removed: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 190 | 1 |  |                 word = word[:-3] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 191 | 1 |  |         elif word[-2:] == 'ig': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 192 | 1 |  |             if len(word[r2_start:]) >= 2 and word[-3] != 'e': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 193 | 1 |  |                 word = word[:-2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 194 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 195 |  |  |         # Step 4 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 196 | 1 |  |         if ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 197 |  |  |             len(word) >= 4 | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 198 |  |  |             and word[-3] == word[-2] | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 199 |  |  |             and word[-2] in {'a', 'e', 'o', 'u'} | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 200 |  |  |             and word[-4] not in self._vowels | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 201 |  |  |             and word[-1] not in self._vowels | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 202 |  |  |             and word[-1] != 'I' | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 203 |  |  |         ): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 204 | 1 |  |             word = word[:-2] + word[-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 205 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 206 |  |  |         # Change 'Y' and 'U' back to lowercase if survived stemming | 
            
                                                                                                            
                            
            
                                    
            
            
                | 207 | 1 |  |         for i in range(0, len(word)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 208 | 1 |  |             if word[i] == 'Y': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 209 | 1 |  |                 word = word[:i] + 'y' + word[i + 1 :] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 210 | 1 |  |             elif word[i] == 'I': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 211 | 1 |  |                 word = word[:i] + 'i' + word[i + 1 :] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 212 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 213 | 1 |  |         return word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 214 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 215 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 216 | 1 |  | def sb_dutch(word): | 
            
                                                                        
                            
            
                                    
            
            
                | 217 |  |  |     """Return Snowball Dutch stem. | 
            
                                                                        
                            
            
                                    
            
            
                | 218 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 219 |  |  |     This is a wrapper for :py:meth:`SnowballDutch.stem`. | 
            
                                                                        
                            
            
                                    
            
            
                | 220 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 221 |  |  |     Parameters | 
            
                                                                        
                            
            
                                    
            
            
                | 222 |  |  |     ---------- | 
            
                                                                        
                            
            
                                    
            
            
                | 223 |  |  |     word : str | 
            
                                                                        
                            
            
                                    
            
            
                | 224 |  |  |         The word to stem | 
            
                                                                        
                            
            
                                    
            
            
                | 225 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 226 |  |  |     Returns | 
            
                                                                        
                            
            
                                    
            
            
                | 227 |  |  |     ------- | 
            
                                                                        
                            
            
                                    
            
            
                | 228 |  |  |     str | 
            
                                                                        
                            
            
                                    
            
            
                | 229 |  |  |         Word stem | 
            
                                                                        
                            
            
                                    
            
            
                | 230 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 231 |  |  |     Examples | 
            
                                                                        
                            
            
                                    
            
            
                | 232 |  |  |     -------- | 
            
                                                                        
                            
            
                                    
            
            
                | 233 |  |  |     >>> sb_dutch('lezen') | 
            
                                                                        
                            
            
                                    
            
            
                | 234 |  |  |     'lez' | 
            
                                                                        
                            
            
                                    
            
            
                | 235 |  |  |     >>> sb_dutch('opschorting') | 
            
                                                                        
                            
            
                                    
            
            
                | 236 |  |  |     'opschort' | 
            
                                                                        
                            
            
                                    
            
            
                | 237 |  |  |     >>> sb_dutch('ongrijpbaarheid') | 
            
                                                                        
                            
            
                                    
            
            
                | 238 |  |  |     'ongrijp' | 
            
                                                                        
                            
            
                                    
            
            
                | 239 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 240 |  |  |     """ | 
            
                                                                        
                            
            
                                    
            
            
                | 241 | 1 |  |     return SnowballDutch().stem(word) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 242 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 243 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 244 |  |  | if __name__ == '__main__': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 245 |  |  |     import doctest | 
            
                                                                                                            
                            
            
                                    
            
            
                | 246 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 247 |  |  |     doctest.testmod() | 
            
                                                        
            
                                    
            
            
                | 248 |  |  |  |