| Total Complexity | 41 | 
| Total Lines | 556 | 
| Duplicated Lines | 7.55 % | 
| Changes | 0 | ||
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like tests.stemmer.test_stemmer_snowball often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | # -*- coding: utf-8 -*-  | 
            ||
| 2 | |||
| 3 | # Copyright 2014-2018 by Christopher C. Little.  | 
            ||
| 4 | # This file is part of Abydos.  | 
            ||
| 5 | #  | 
            ||
| 6 | # Abydos is free software: you can redistribute it and/or modify  | 
            ||
| 7 | # it under the terms of the GNU General Public License as published by  | 
            ||
| 8 | # the Free Software Foundation, either version 3 of the License, or  | 
            ||
| 9 | # (at your option) any later version.  | 
            ||
| 10 | #  | 
            ||
| 11 | # Abydos is distributed in the hope that it will be useful,  | 
            ||
| 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of  | 
            ||
| 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the  | 
            ||
| 14 | # GNU General Public License for more details.  | 
            ||
| 15 | #  | 
            ||
| 16 | # You should have received a copy of the GNU General Public License  | 
            ||
| 17 | # along with Abydos. If not, see <http://www.gnu.org/licenses/>.  | 
            ||
| 18 | |||
| 19 | """abydos.tests.test_stemmer_snowball.  | 
            ||
| 20 | |||
| 21 | This module contains unit tests for abydos.stemmer.snowball  | 
            ||
| 22 | """  | 
            ||
| 23 | |||
| 24 | from __future__ import unicode_literals  | 
            ||
| 25 | |||
| 26 | import codecs  | 
            ||
| 27 | import unittest  | 
            ||
| 28 | |||
| 29 | from abydos.stemmer.snowball import _ends_in_cvc, _ends_in_doubled_cons, \  | 
            ||
| 30 | _m_degree, _sb_ends_in_short_syllable, _sb_has_vowel, _sb_r1, _sb_r2, \  | 
            ||
| 31 | _sb_short_word, porter, porter2, sb_danish, sb_dutch, sb_german, \  | 
            ||
| 32 | sb_norwegian, sb_swedish  | 
            ||
| 33 | |||
| 34 | from .. import _corpus_file  | 
            ||
| 35 | |||
| 36 | |||
| 37 | class PorterTestCases(unittest.TestCase):  | 
            ||
| 38 | """Test Porter functions.  | 
            ||
| 39 | |||
| 40 | abydos.stemmer._m_degree, abydos.stemmer.porter,  | 
            ||
| 41 | abydos.stemmer._sb_has_vowel, abydos.stemmer._ends_in_doubled_cons,  | 
            ||
| 42 | & abydos.stemmer._ends_in_cvc  | 
            ||
| 43 | """  | 
            ||
| 44 | |||
| 45 | def test_m_degree(self):  | 
            ||
| 46 | """Test abydos.stemmer._m_degree."""  | 
            ||
| 47 |         _vowels = set('aeiouy') | 
            ||
| 48 | # base case  | 
            ||
| 49 |         self.assertEqual(_m_degree('', _vowels), 0) | 
            ||
| 50 | |||
| 51 | # m==0  | 
            ||
| 52 |         self.assertEqual(_m_degree('tr', _vowels), 0) | 
            ||
| 53 |         self.assertEqual(_m_degree('ee', _vowels), 0) | 
            ||
| 54 |         self.assertEqual(_m_degree('tree', _vowels), 0) | 
            ||
| 55 |         self.assertEqual(_m_degree('y', _vowels), 0) | 
            ||
| 56 |         self.assertEqual(_m_degree('by', _vowels), 0) | 
            ||
| 57 | |||
| 58 | # m==1  | 
            ||
| 59 |         self.assertEqual(_m_degree('trouble', _vowels), 1) | 
            ||
| 60 |         self.assertEqual(_m_degree('oats', _vowels), 1) | 
            ||
| 61 |         self.assertEqual(_m_degree('trees', _vowels), 1) | 
            ||
| 62 |         self.assertEqual(_m_degree('ivy', _vowels), 1) | 
            ||
| 63 | |||
| 64 | # m==2  | 
            ||
| 65 |         self.assertEqual(_m_degree('troubles', _vowels), 2) | 
            ||
| 66 |         self.assertEqual(_m_degree('private', _vowels), 2) | 
            ||
| 67 |         self.assertEqual(_m_degree('oaten', _vowels), 2) | 
            ||
| 68 |         self.assertEqual(_m_degree('orrery', _vowels), 2) | 
            ||
| 69 | |||
| 70 | def test_has_vowel(self):  | 
            ||
| 71 | """Test abydos.stemmer._has_vowel."""  | 
            ||
| 72 |         _vowels = set('aeiouy') | 
            ||
| 73 | # base case  | 
            ||
| 74 |         self.assertFalse(_sb_has_vowel('', _vowels)) | 
            ||
| 75 | |||
| 76 | # False cases  | 
            ||
| 77 |         self.assertFalse(_sb_has_vowel('b', _vowels)) | 
            ||
| 78 |         self.assertFalse(_sb_has_vowel('c', _vowels)) | 
            ||
| 79 |         self.assertFalse(_sb_has_vowel('bc', _vowels)) | 
            ||
| 80 |         self.assertFalse(_sb_has_vowel('bcdfghjklmnpqrstvwxYz', _vowels)) | 
            ||
| 81 |         self.assertFalse(_sb_has_vowel('Y', _vowels)) | 
            ||
| 82 | |||
| 83 | # True cases  | 
            ||
| 84 |         self.assertTrue(_sb_has_vowel('a', _vowels)) | 
            ||
| 85 |         self.assertTrue(_sb_has_vowel('e', _vowels)) | 
            ||
| 86 |         self.assertTrue(_sb_has_vowel('ae', _vowels)) | 
            ||
| 87 |         self.assertTrue(_sb_has_vowel('aeiouy', _vowels)) | 
            ||
| 88 |         self.assertTrue(_sb_has_vowel('y', _vowels)) | 
            ||
| 89 | |||
| 90 |         self.assertTrue(_sb_has_vowel('ade', _vowels)) | 
            ||
| 91 |         self.assertTrue(_sb_has_vowel('cad', _vowels)) | 
            ||
| 92 |         self.assertTrue(_sb_has_vowel('add', _vowels)) | 
            ||
| 93 |         self.assertTrue(_sb_has_vowel('phi', _vowels)) | 
            ||
| 94 |         self.assertTrue(_sb_has_vowel('pfy', _vowels)) | 
            ||
| 95 | |||
| 96 |         self.assertFalse(_sb_has_vowel('pfY', _vowels)) | 
            ||
| 97 | |||
| 98 | def test_ends_in_doubled_cons(self):  | 
            ||
| 99 | """Test abydos.stemmer._ends_in_doubled_cons."""  | 
            ||
| 100 |         _vowels = set('aeiouy') | 
            ||
| 101 | # base case  | 
            ||
| 102 |         self.assertFalse(_ends_in_doubled_cons('', _vowels)) | 
            ||
| 103 | |||
| 104 | # False cases  | 
            ||
| 105 |         self.assertFalse(_ends_in_doubled_cons('b', _vowels)) | 
            ||
| 106 |         self.assertFalse(_ends_in_doubled_cons('c', _vowels)) | 
            ||
| 107 |         self.assertFalse(_ends_in_doubled_cons('bc', _vowels)) | 
            ||
| 108 |         self.assertFalse(_ends_in_doubled_cons('bcdfghjklmnpqrstvwxYz', | 
            ||
| 109 | _vowels))  | 
            ||
| 110 |         self.assertFalse(_ends_in_doubled_cons('Y', _vowels)) | 
            ||
| 111 |         self.assertFalse(_ends_in_doubled_cons('a', _vowels)) | 
            ||
| 112 |         self.assertFalse(_ends_in_doubled_cons('e', _vowels)) | 
            ||
| 113 |         self.assertFalse(_ends_in_doubled_cons('ae', _vowels)) | 
            ||
| 114 |         self.assertFalse(_ends_in_doubled_cons('aeiouy', _vowels)) | 
            ||
| 115 |         self.assertFalse(_ends_in_doubled_cons('y', _vowels)) | 
            ||
| 116 |         self.assertFalse(_ends_in_doubled_cons('ade', _vowels)) | 
            ||
| 117 |         self.assertFalse(_ends_in_doubled_cons('cad', _vowels)) | 
            ||
| 118 |         self.assertFalse(_ends_in_doubled_cons('phi', _vowels)) | 
            ||
| 119 |         self.assertFalse(_ends_in_doubled_cons('pfy', _vowels)) | 
            ||
| 120 |         self.assertFalse(_ends_in_doubled_cons('faddy', _vowels)) | 
            ||
| 121 |         self.assertFalse(_ends_in_doubled_cons('aiii', _vowels)) | 
            ||
| 122 |         self.assertFalse(_ends_in_doubled_cons('ayyy', _vowels)) | 
            ||
| 123 | |||
| 124 | # True cases  | 
            ||
| 125 |         self.assertTrue(_ends_in_doubled_cons('add', _vowels)) | 
            ||
| 126 |         self.assertTrue(_ends_in_doubled_cons('fadd', _vowels)) | 
            ||
| 127 |         self.assertTrue(_ends_in_doubled_cons('fadddd', _vowels)) | 
            ||
| 128 |         self.assertTrue(_ends_in_doubled_cons('raYY', _vowels)) | 
            ||
| 129 |         self.assertTrue(_ends_in_doubled_cons('doll', _vowels)) | 
            ||
| 130 |         self.assertTrue(_ends_in_doubled_cons('parr', _vowels)) | 
            ||
| 131 |         self.assertTrue(_ends_in_doubled_cons('parrr', _vowels)) | 
            ||
| 132 |         self.assertTrue(_ends_in_doubled_cons('bacc', _vowels)) | 
            ||
| 133 | |||
| 134 | def test_ends_in_cvc(self):  | 
            ||
| 135 | """Test abydos.stemmer._ends_in_cvc."""  | 
            ||
| 136 |         _vowels = set('aeiouy') | 
            ||
| 137 | # base case  | 
            ||
| 138 |         self.assertFalse(_ends_in_cvc('', _vowels)) | 
            ||
| 139 | |||
| 140 | # False cases  | 
            ||
| 141 |         self.assertFalse(_ends_in_cvc('b', _vowels)) | 
            ||
| 142 |         self.assertFalse(_ends_in_cvc('c', _vowels)) | 
            ||
| 143 |         self.assertFalse(_ends_in_cvc('bc', _vowels)) | 
            ||
| 144 |         self.assertFalse(_ends_in_cvc('bcdfghjklmnpqrstvwxYz', _vowels)) | 
            ||
| 145 |         self.assertFalse(_ends_in_cvc('YYY', _vowels)) | 
            ||
| 146 |         self.assertFalse(_ends_in_cvc('ddd', _vowels)) | 
            ||
| 147 |         self.assertFalse(_ends_in_cvc('faaf', _vowels)) | 
            ||
| 148 |         self.assertFalse(_ends_in_cvc('rare', _vowels)) | 
            ||
| 149 |         self.assertFalse(_ends_in_cvc('rhy', _vowels)) | 
            ||
| 150 | |||
| 151 | # True cases  | 
            ||
| 152 |         self.assertTrue(_ends_in_cvc('dad', _vowels)) | 
            ||
| 153 |         self.assertTrue(_ends_in_cvc('phad', _vowels)) | 
            ||
| 154 |         self.assertTrue(_ends_in_cvc('faded', _vowels)) | 
            ||
| 155 |         self.assertTrue(_ends_in_cvc('maYor', _vowels)) | 
            ||
| 156 |         self.assertTrue(_ends_in_cvc('enlil', _vowels)) | 
            ||
| 157 |         self.assertTrue(_ends_in_cvc('parer', _vowels)) | 
            ||
| 158 |         self.assertTrue(_ends_in_cvc('padres', _vowels)) | 
            ||
| 159 |         self.assertTrue(_ends_in_cvc('bacyc', _vowels)) | 
            ||
| 160 | |||
| 161 | # Special case for W, X, & Y  | 
            ||
| 162 |         self.assertFalse(_ends_in_cvc('craw', _vowels)) | 
            ||
| 163 |         self.assertFalse(_ends_in_cvc('max', _vowels)) | 
            ||
| 164 |         self.assertFalse(_ends_in_cvc('cray', _vowels)) | 
            ||
| 165 | |||
| 166 | def test_porter(self):  | 
            ||
| 167 | """Test abydos.stemmer.porter."""  | 
            ||
| 168 | # base case  | 
            ||
| 169 |         self.assertEqual(porter(''), '') | 
            ||
| 170 | |||
| 171 | # simple cases  | 
            ||
| 172 |         self.assertEqual(porter('c'), 'c') | 
            ||
| 173 |         self.assertEqual(porter('da'), 'da') | 
            ||
| 174 |         self.assertEqual(porter('ad'), 'ad') | 
            ||
| 175 |         self.assertEqual(porter('sing'), 'sing') | 
            ||
| 176 |         self.assertEqual(porter('singing'), 'sing') | 
            ||
| 177 | |||
| 178 | # missed branch test cases  | 
            ||
| 179 |         self.assertEqual(porter('capitalism'), 'capit') | 
            ||
| 180 |         self.assertEqual(porter('fatalism'), 'fatal') | 
            ||
| 181 |         self.assertEqual(porter('stional'), 'stional') | 
            ||
| 182 |         self.assertEqual(porter('palism'), 'palism') | 
            ||
| 183 |         self.assertEqual(porter('sization'), 'sizat') | 
            ||
| 184 |         self.assertEqual(porter('licated'), 'licat') | 
            ||
| 185 |         self.assertEqual(porter('lical'), 'lical') | 
            ||
| 186 | |||
| 187 | def test_porter_early_english(self):  | 
            ||
| 188 | """Test abydos.stemmer.porter (early English)."""  | 
            ||
| 189 | # base case  | 
            ||
| 190 |         self.assertEqual(porter('', early_english=True), '') | 
            ||
| 191 | |||
| 192 | # simple cases (no different from regular stemmer)  | 
            ||
| 193 |         self.assertEqual(porter('c', early_english=True), 'c') | 
            ||
| 194 |         self.assertEqual(porter('da', early_english=True), 'da') | 
            ||
| 195 |         self.assertEqual(porter('ad', early_english=True), 'ad') | 
            ||
| 196 |         self.assertEqual(porter('sing', early_english=True), 'sing') | 
            ||
| 197 |         self.assertEqual(porter('singing', early_english=True), 'sing') | 
            ||
| 198 | |||
| 199 | # make  | 
            ||
| 200 |         self.assertEqual(porter('make', early_english=True), 'make') | 
            ||
| 201 |         self.assertEqual(porter('makes', early_english=True), 'make') | 
            ||
| 202 |         self.assertEqual(porter('maketh', early_english=True), 'make') | 
            ||
| 203 |         self.assertEqual(porter('makest', early_english=True), 'make') | 
            ||
| 204 | |||
| 205 | # say  | 
            ||
| 206 |         self.assertEqual(porter('say', early_english=True), 'sai') | 
            ||
| 207 |         self.assertEqual(porter('says', early_english=True), 'sai') | 
            ||
| 208 |         self.assertEqual(porter('sayeth', early_english=True), 'sai') | 
            ||
| 209 |         self.assertEqual(porter('sayest', early_english=True), 'sai') | 
            ||
| 210 | |||
| 211 | # missed branch test cases  | 
            ||
| 212 |         self.assertEqual(porter('best', early_english=True), 'best') | 
            ||
| 213 |         self.assertEqual(porter('meth', early_english=True), 'meth') | 
            ||
| 214 | |||
| 215 | def test_porter_snowball(self):  | 
            ||
| 216 | """Test abydos.stemmer.porter (Snowball testset).  | 
            ||
| 217 | |||
| 218 | These test cases are from  | 
            ||
| 219 | http://snowball.tartarus.org/algorithms/porter/diffs.txt  | 
            ||
| 220 | """  | 
            ||
| 221 | # Snowball Porter test set  | 
            ||
| 222 |         with open(_corpus_file('snowball_porter.csv')) as snowball_ts: | 
            ||
| 223 | next(snowball_ts)  | 
            ||
| 224 | for line in snowball_ts:  | 
            ||
| 225 | if line[0] != '#':  | 
            ||
| 226 |                     line = line.strip().split(',') | 
            ||
| 227 | word, stem = line[0], line[1]  | 
            ||
| 228 | self.assertEqual(porter(word), stem.lower())  | 
            ||
| 229 | |||
| 230 | |||
| 231 | class Porter2TestCases(unittest.TestCase):  | 
            ||
| 232 | """Test Porter2 functions.  | 
            ||
| 233 | |||
| 234 | abydos.stemmer._sb_r1, abydos.stemmer._sb_r2,  | 
            ||
| 235 | abydos.stemmer._sb_ends_in_short_syllable, abydos.stemmer._sb_short_word,  | 
            ||
| 236 | & abydos.stemmer.porter2  | 
            ||
| 237 | """  | 
            ||
| 238 | |||
| 239 | def test_sb_r1(self):  | 
            ||
| 240 | """Test abydos.stemmer._sb_r1."""  | 
            ||
| 241 |         _vowels = set('aeiouy') | 
            ||
| 242 | # base case  | 
            ||
| 243 |         self.assertEqual(_sb_r1('', _vowels), 0) | 
            ||
| 244 | |||
| 245 | # examples from http://snowball.tartarus.org/texts/r1r2.html  | 
            ||
| 246 |         self.assertEqual(_sb_r1('beautiful', _vowels), 5) | 
            ||
| 247 |         self.assertEqual(_sb_r1('beauty', _vowels), 5) | 
            ||
| 248 |         self.assertEqual(_sb_r1('beau', _vowels), 4) | 
            ||
| 249 |         self.assertEqual(_sb_r1('animadversion', _vowels), 2) | 
            ||
| 250 |         self.assertEqual(_sb_r1('sprinkled', _vowels), 5) | 
            ||
| 251 |         self.assertEqual(_sb_r1('eucharist', _vowels), 3) | 
            ||
| 252 | |||
| 253 | def test_sb_r2(self):  | 
            ||
| 254 | """Test abydos.stemmer._sb_r2."""  | 
            ||
| 255 |         _vowels = set('aeiouy') | 
            ||
| 256 | # base case  | 
            ||
| 257 |         self.assertEqual(_sb_r2('', _vowels), 0) | 
            ||
| 258 | |||
| 259 | # examples from http://snowball.tartarus.org/texts/r1r2.html  | 
            ||
| 260 |         self.assertEqual(_sb_r2('beautiful', _vowels), 7) | 
            ||
| 261 |         self.assertEqual(_sb_r2('beauty', _vowels), 6) | 
            ||
| 262 |         self.assertEqual(_sb_r2('beau', _vowels), 4) | 
            ||
| 263 |         self.assertEqual(_sb_r2('animadversion', _vowels), 4) | 
            ||
| 264 |         self.assertEqual(_sb_r2('sprinkled', _vowels), 9) | 
            ||
| 265 |         self.assertEqual(_sb_r2('eucharist', _vowels), 6) | 
            ||
| 266 | |||
| 267 | def test_sb_ends_in_short_syllable(self):  | 
            ||
| 268 | """Test abydos.stemmer._sb_ends_in_short_syllable."""  | 
            ||
| 269 |         _vowels = set('aeiouy') | 
            ||
| 270 |         _codanonvowels = set('bcdfghjklmnpqrstvz\'') | 
            ||
| 271 | # base case  | 
            ||
| 272 |         self.assertFalse(_sb_ends_in_short_syllable('', _vowels, | 
            ||
| 273 | _codanonvowels))  | 
            ||
| 274 | |||
| 275 | # examples from  | 
            ||
| 276 | # http://snowball.tartarus.org/algorithms/english/stemmer.html  | 
            ||
| 277 |         self.assertTrue(_sb_ends_in_short_syllable('rap', _vowels, | 
            ||
| 278 | _codanonvowels))  | 
            ||
| 279 |         self.assertTrue(_sb_ends_in_short_syllable('trap', _vowels, | 
            ||
| 280 | _codanonvowels))  | 
            ||
| 281 |         self.assertTrue(_sb_ends_in_short_syllable('entrap', _vowels, | 
            ||
| 282 | _codanonvowels))  | 
            ||
| 283 |         self.assertTrue(_sb_ends_in_short_syllable('ow', _vowels, | 
            ||
| 284 | _codanonvowels))  | 
            ||
| 285 |         self.assertTrue(_sb_ends_in_short_syllable('on', _vowels, | 
            ||
| 286 | _codanonvowels))  | 
            ||
| 287 |         self.assertTrue(_sb_ends_in_short_syllable('at', _vowels, | 
            ||
| 288 | _codanonvowels))  | 
            ||
| 289 |         self.assertFalse(_sb_ends_in_short_syllable('uproot', _vowels, | 
            ||
| 290 | _codanonvowels))  | 
            ||
| 291 |         self.assertFalse(_sb_ends_in_short_syllable('uproot', _vowels, | 
            ||
| 292 | _codanonvowels))  | 
            ||
| 293 |         self.assertFalse(_sb_ends_in_short_syllable('bestow', _vowels, | 
            ||
| 294 | _codanonvowels))  | 
            ||
| 295 |         self.assertFalse(_sb_ends_in_short_syllable('disturb', _vowels, | 
            ||
| 296 | _codanonvowels))  | 
            ||
| 297 | |||
| 298 | # missed branch test cases  | 
            ||
| 299 |         self.assertFalse(_sb_ends_in_short_syllable('d', _vowels, | 
            ||
| 300 | _codanonvowels))  | 
            ||
| 301 |         self.assertFalse(_sb_ends_in_short_syllable('a', _vowels, | 
            ||
| 302 | _codanonvowels))  | 
            ||
| 303 | |||
| 304 | def test_sb_short_word(self):  | 
            ||
| 305 | """Test abydos.stemmer._sb_short_word."""  | 
            ||
| 306 |         _vowels = set('aeiouy') | 
            ||
| 307 |         _codanonvowels = set('bcdfghjklmnpqrstvz\'') | 
            ||
| 308 | # base case  | 
            ||
| 309 |         self.assertFalse(_sb_short_word('', _vowels, _codanonvowels)) | 
            ||
| 310 | |||
| 311 | # examples from  | 
            ||
| 312 | # http://snowball.tartarus.org/algorithms/english/stemmer.html  | 
            ||
| 313 |         self.assertTrue(_sb_short_word('bed', _vowels, _codanonvowels)) | 
            ||
| 314 |         self.assertTrue(_sb_short_word('shed', _vowels, _codanonvowels)) | 
            ||
| 315 |         self.assertTrue(_sb_short_word('shred', _vowels, _codanonvowels)) | 
            ||
| 316 |         self.assertFalse(_sb_short_word('bead', _vowels, _codanonvowels)) | 
            ||
| 317 |         self.assertFalse(_sb_short_word('embed', _vowels, _codanonvowels)) | 
            ||
| 318 |         self.assertFalse(_sb_short_word('beds', _vowels, _codanonvowels)) | 
            ||
| 319 | |||
| 320 | def test_porter2(self):  | 
            ||
| 321 | """Test abydos.stemmer.porter2."""  | 
            ||
| 322 | # base case  | 
            ||
| 323 |         self.assertEqual(porter2(''), '') | 
            ||
| 324 | |||
| 325 | # simple cases  | 
            ||
| 326 |         self.assertEqual(porter2('c'), 'c') | 
            ||
| 327 |         self.assertEqual(porter2('da'), 'da') | 
            ||
| 328 |         self.assertEqual(porter2('ad'), 'ad') | 
            ||
| 329 |         self.assertEqual(porter2('sing'), 'sing') | 
            ||
| 330 |         self.assertEqual(porter2('singing'), 'sing') | 
            ||
| 331 | |||
| 332 | # missed branch test cases  | 
            ||
| 333 |         self.assertEqual(porter2('capitalism'), 'capit') | 
            ||
| 334 |         self.assertEqual(porter2('fatalism'), 'fatal') | 
            ||
| 335 |         self.assertEqual(porter2('dog\'s'), 'dog') | 
            ||
| 336 |         self.assertEqual(porter2('A\'s\''), 'a') | 
            ||
| 337 |         self.assertEqual(porter2('agreedly'), 'agre') | 
            ||
| 338 |         self.assertEqual(porter2('feedly'), 'feed') | 
            ||
| 339 |         self.assertEqual(porter2('stional'), 'stional') | 
            ||
| 340 |         self.assertEqual(porter2('palism'), 'palism') | 
            ||
| 341 |         self.assertEqual(porter2('sization'), 'sizat') | 
            ||
| 342 |         self.assertEqual(porter2('licated'), 'licat') | 
            ||
| 343 |         self.assertEqual(porter2('lical'), 'lical') | 
            ||
| 344 |         self.assertEqual(porter2('clessly'), 'clessli') | 
            ||
| 345 |         self.assertEqual(porter2('tably'), 'tabli') | 
            ||
| 346 |         self.assertEqual(porter2('sizer'), 'sizer') | 
            ||
| 347 |         self.assertEqual(porter2('livity'), 'liviti') | 
            ||
| 348 | |||
| 349 | def test_porter2_early_english(self):  | 
            ||
| 350 | """Test abydos.stemmer.porter2 (early English)."""  | 
            ||
| 351 | # base case  | 
            ||
| 352 |         self.assertEqual(porter2('', early_english=True), '') | 
            ||
| 353 | |||
| 354 | # simple cases (no different from regular stemmer)  | 
            ||
| 355 |         self.assertEqual(porter2('c', early_english=True), 'c') | 
            ||
| 356 |         self.assertEqual(porter2('da', early_english=True), 'da') | 
            ||
| 357 |         self.assertEqual(porter2('ad', early_english=True), 'ad') | 
            ||
| 358 |         self.assertEqual(porter2('sing', early_english=True), 'sing') | 
            ||
| 359 |         self.assertEqual(porter2('singing', early_english=True), 'sing') | 
            ||
| 360 | |||
| 361 | # make  | 
            ||
| 362 |         self.assertEqual(porter2('make', early_english=True), 'make') | 
            ||
| 363 |         self.assertEqual(porter2('makes', early_english=True), 'make') | 
            ||
| 364 |         self.assertEqual(porter2('maketh', early_english=True), 'make') | 
            ||
| 365 |         self.assertEqual(porter2('makest', early_english=True), 'make') | 
            ||
| 366 | |||
| 367 | # say  | 
            ||
| 368 |         self.assertEqual(porter2('say', early_english=True), 'say') | 
            ||
| 369 |         self.assertEqual(porter2('says', early_english=True), 'say') | 
            ||
| 370 |         self.assertEqual(porter2('sayeth', early_english=True), 'say') | 
            ||
| 371 |         self.assertEqual(porter2('sayest', early_english=True), 'say') | 
            ||
| 372 | |||
| 373 | # missed branch test cases  | 
            ||
| 374 |         self.assertEqual(porter2('best', early_english=True), 'best') | 
            ||
| 375 |         self.assertEqual(porter2('meth', early_english=True), 'meth') | 
            ||
| 376 | |||
| 377 | def test_porter2_snowball(self):  | 
            ||
| 378 | """Test abydos.stemmer.porter2 (Snowball testset).  | 
            ||
| 379 | |||
| 380 | These test cases are from  | 
            ||
| 381 | http://snowball.tartarus.org/algorithms/english/diffs.txt  | 
            ||
| 382 | """  | 
            ||
| 383 | # Snowball Porter test set  | 
            ||
| 384 |         with open(_corpus_file('snowball_porter2.csv')) as snowball_ts: | 
            ||
| 385 | next(snowball_ts)  | 
            ||
| 386 | for line in snowball_ts:  | 
            ||
| 387 | if line[0] != '#':  | 
            ||
| 388 |                     line = line.strip().split(',') | 
            ||
| 389 | word, stem = line[0], line[1]  | 
            ||
| 390 | self.assertEqual(porter2(word), stem.lower())  | 
            ||
| 391 | |||
| 392 | |||
| 393 | class SnowballTestCases(unittest.TestCase):  | 
            ||
| 394 | """Test Snowball functions.  | 
            ||
| 395 | |||
| 396 | abydos.stemmer.sb_german, abydos.stemmer.sb_dutch,  | 
            ||
| 397 | abydos.stemmer.sb_norwegian, abydos.stemmer.sb_swedish, &  | 
            ||
| 398 | abydos.stemmer.sb_danish  | 
            ||
| 399 | """  | 
            ||
| 400 | |||
| 401 | def test_sb_german_snowball(self):  | 
            ||
| 402 | """Test abydos.stemmer.sb_german (Snowball testset).  | 
            ||
| 403 | |||
| 404 | These test cases are from  | 
            ||
| 405 | http://snowball.tartarus.org/algorithms/german/diffs.txt  | 
            ||
| 406 | """  | 
            ||
| 407 | # base case  | 
            ||
| 408 |         self.assertEqual(sb_german(''), '') | 
            ||
| 409 | |||
| 410 | # Snowball German test set  | 
            ||
| 411 |         with codecs.open(_corpus_file('snowball_german.csv'), | 
            ||
| 412 | encoding='utf-8') as snowball_ts:  | 
            ||
| 413 | next(snowball_ts)  | 
            ||
| 414 | for line in snowball_ts:  | 
            ||
| 415 | if line[0] != '#':  | 
            ||
| 416 |                     line = line.strip().split(',') | 
            ||
| 417 | word, stem = line[0], line[1]  | 
            ||
| 418 | self.assertEqual(sb_german(word), stem.lower())  | 
            ||
| 419 | |||
| 420 | # missed branch test cases  | 
            ||
| 421 |         self.assertEqual(sb_german('ikeit'), 'ikeit') | 
            ||
| 422 | |||
| 423 | def test_sb_german_snowball_alt(self):  | 
            ||
| 424 | """Test abydos.stemmer.sb_german (alternate vowels)."""  | 
            ||
| 425 | # base case  | 
            ||
| 426 |         self.assertEqual(sb_german('', alternate_vowels=True), '') | 
            ||
| 427 | |||
| 428 | # dämmerung,dammer  | 
            ||
| 429 |         self.assertEqual(sb_german('dämmerung', alternate_vowels=True), | 
            ||
| 430 | 'dammer')  | 
            ||
| 431 |         self.assertEqual(sb_german('daemmerung', alternate_vowels=True), | 
            ||
| 432 | 'dammer')  | 
            ||
| 433 |         self.assertEqual(sb_german('dämmerung'), 'dammer') | 
            ||
| 434 |         self.assertEqual(sb_german('daemmerung'), 'daemmer') | 
            ||
| 435 | |||
| 436 | # brötchen,brotch  | 
            ||
| 437 |         self.assertEqual(sb_german('brötchen', alternate_vowels=True), | 
            ||
| 438 | 'brotch')  | 
            ||
| 439 |         self.assertEqual(sb_german('broetchen', alternate_vowels=True), | 
            ||
| 440 | 'brotch')  | 
            ||
| 441 |         self.assertEqual(sb_german('brötchen'), 'brotch') | 
            ||
| 442 |         self.assertEqual(sb_german('broetchen'), 'broetch') | 
            ||
| 443 | |||
| 444 | # büro,buro  | 
            ||
| 445 |         self.assertEqual(sb_german('büro', alternate_vowels=True), 'buro') | 
            ||
| 446 |         self.assertEqual(sb_german('buero', alternate_vowels=True), 'buro') | 
            ||
| 447 |         self.assertEqual(sb_german('büro'), 'buro') | 
            ||
| 448 |         self.assertEqual(sb_german('buero'), 'buero') | 
            ||
| 449 | |||
| 450 | # häufen,hauf  | 
            ||
| 451 |         self.assertEqual(sb_german('häufen', alternate_vowels=True), 'hauf') | 
            ||
| 452 |         self.assertEqual(sb_german('haeufen', alternate_vowels=True), 'hauf') | 
            ||
| 453 |         self.assertEqual(sb_german('häufen'), 'hauf') | 
            ||
| 454 |         self.assertEqual(sb_german('haeufen'), 'haeuf') | 
            ||
| 455 | |||
| 456 | # quelle,quell  | 
            ||
| 457 |         self.assertEqual(sb_german('qülle', alternate_vowels=True), 'qull') | 
            ||
| 458 |         self.assertEqual(sb_german('quelle', alternate_vowels=True), 'quell') | 
            ||
| 459 |         self.assertEqual(sb_german('qülle'), 'qull') | 
            ||
| 460 |         self.assertEqual(sb_german('quelle'), 'quell') | 
            ||
| 461 | |||
| 462 | # feuer,feuer  | 
            ||
| 463 |         self.assertEqual(sb_german('feür', alternate_vowels=True), 'feur') | 
            ||
| 464 |         self.assertEqual(sb_german('feuer', alternate_vowels=True), 'feu') | 
            ||
| 465 |         self.assertEqual(sb_german('feür'), 'feur') | 
            ||
| 466 |         self.assertEqual(sb_german('feuer'), 'feu') | 
            ||
| 467 | |||
| 468 | # über,uber  | 
            ||
| 469 |         self.assertEqual(sb_german('über', alternate_vowels=True), 'uber') | 
            ||
| 470 |         self.assertEqual(sb_german('ueber', alternate_vowels=True), 'uber') | 
            ||
| 471 |         self.assertEqual(sb_german('über'), 'uber') | 
            ||
| 472 |         self.assertEqual(sb_german('ueber'), 'ueb') | 
            ||
| 473 | |||
| 474 | def test_sb_dutch_snowball(self):  | 
            ||
| 475 | """Test abydos.stemmer.sb_dutch (Snowball testset).  | 
            ||
| 476 | |||
| 477 | These test cases are from  | 
            ||
| 478 | http://snowball.tartarus.org/algorithms/dutch/diffs.txt  | 
            ||
| 479 | """  | 
            ||
| 480 | # base case  | 
            ||
| 481 |         self.assertEqual(sb_dutch(''), '') | 
            ||
| 482 | |||
| 483 | # Snowball Dutch test set  | 
            ||
| 484 |         with codecs.open(_corpus_file('snowball_dutch.csv'), | 
            ||
| 485 | encoding='utf-8') as snowball_ts:  | 
            ||
| 486 | next(snowball_ts)  | 
            ||
| 487 | for line in snowball_ts:  | 
            ||
| 488 | if line[0] != '#':  | 
            ||
| 489 |                     line = line.strip().split(',') | 
            ||
| 490 | word, stem = line[0], line[1]  | 
            ||
| 491 | self.assertEqual(sb_dutch(word), stem.lower())  | 
            ||
| 492 | |||
| 493 | # missed branch test cases  | 
            ||
| 494 |         self.assertEqual(sb_dutch('zondulielijk'), 'zondulie') | 
            ||
| 495 | |||
| 496 | def test_sb_norwegian_snowball(self):  | 
            ||
| 497 | """Test abydos.stemmer.sb_norwegian (Snowball testset).  | 
            ||
| 498 | |||
| 499 | These test cases are from  | 
            ||
| 500 | http://snowball.tartarus.org/algorithms/norwegian/diffs.txt  | 
            ||
| 501 | """  | 
            ||
| 502 | # base case  | 
            ||
| 503 |         self.assertEqual(sb_norwegian(''), '') | 
            ||
| 504 | |||
| 505 | # Snowball Norwegian test set  | 
            ||
| 506 |         with codecs.open(_corpus_file('snowball_norwegian.csv'), | 
            ||
| 507 | encoding='utf-8') as snowball_ts:  | 
            ||
| 508 | next(snowball_ts)  | 
            ||
| 509 | for line in snowball_ts:  | 
            ||
| 510 | if line[0] != '#':  | 
            ||
| 511 |                     line = line.strip().split(',') | 
            ||
| 512 | word, stem = line[0], line[1]  | 
            ||
| 513 | self.assertEqual(sb_norwegian(word), stem.lower())  | 
            ||
| 514 | |||
| 515 | def test_sb_swedish_snowball(self):  | 
            ||
| 516 | """Test abydos.stemmer.sb_swedish (Snowball testset).  | 
            ||
| 517 | |||
| 518 | These test cases are from  | 
            ||
| 519 | http://snowball.tartarus.org/algorithms/swedish/diffs.txt  | 
            ||
| 520 | """  | 
            ||
| 521 | # base case  | 
            ||
| 522 |         self.assertEqual(sb_swedish(''), '') | 
            ||
| 523 | |||
| 524 | # Snowball Swedish test set  | 
            ||
| 525 |         with codecs.open(_corpus_file('snowball_swedish.csv'), | 
            ||
| 526 | encoding='utf-8') as snowball_ts:  | 
            ||
| 527 | next(snowball_ts)  | 
            ||
| 528 | for line in snowball_ts:  | 
            ||
| 529 | if line[0] != '#':  | 
            ||
| 530 |                     line = line.strip().split(',') | 
            ||
| 531 | word, stem = line[0], line[1]  | 
            ||
| 532 | self.assertEqual(sb_swedish(word), stem.lower())  | 
            ||
| 533 | |||
| 534 | def test_sb_danish_snowball(self):  | 
            ||
| 535 | """Test abydos.stemmer.sb_danish (Snowball testset).  | 
            ||
| 536 | |||
| 537 | These test cases are from  | 
            ||
| 538 | http://snowball.tartarus.org/algorithms/danish/diffs.txt  | 
            ||
| 539 | """  | 
            ||
| 540 | # base case  | 
            ||
| 541 |         self.assertEqual(sb_danish(''), '') | 
            ||
| 542 | |||
| 543 | # Snowball Danish test set  | 
            ||
| 544 |         with codecs.open(_corpus_file('snowball_danish.csv'), | 
            ||
| 545 | encoding='utf-8') as snowball_ts:  | 
            ||
| 546 | next(snowball_ts)  | 
            ||
| 547 | for line in snowball_ts:  | 
            ||
| 548 | if line[0] != '#':  | 
            ||
| 549 |                     line = line.strip().split(',') | 
            ||
| 550 | word, stem = line[0], line[1]  | 
            ||
| 551 | self.assertEqual(sb_danish(word), stem.lower())  | 
            ||
| 552 | |||
| 553 | |||
| 554 | if __name__ == '__main__':  | 
            ||
| 555 | unittest.main()  | 
            ||
| 556 |