Completed
Branch master (87ccc1)
by Chris
08:42
created

tests.stemmer.test_stemmer_snowball   A

Complexity

Total Complexity 41

Size/Duplication

Total Lines 556
Duplicated Lines 7.55 %

Importance

Changes 0
Metric Value
eloc 330
dl 42
loc 556
rs 9.1199
c 0
b 0
f 0
wmc 41

How to fix   Duplicated Code    Complexity   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

Complexity

 Tip:   Before tackling complexity, make sure that you eliminate any duplication first. This often can reduce the size of classes significantly.

Complex classes like tests.stemmer.test_stemmer_snowball often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
"""abydos.tests.test_stemmer_snowball.
20
21
This module contains unit tests for abydos.stemmer.snowball
22
"""
23
24
from __future__ import unicode_literals
25
26
import codecs
27
import unittest
28
29
from abydos.stemmer.snowball import _ends_in_cvc, _ends_in_doubled_cons, \
30
    _m_degree, _sb_ends_in_short_syllable, _sb_has_vowel, _sb_r1, _sb_r2, \
31
    _sb_short_word, porter, porter2, sb_danish, sb_dutch, sb_german, \
32
    sb_norwegian, sb_swedish
33
34
from .. import _corpus_file
35
36
37
class PorterTestCases(unittest.TestCase):
38
    """Test Porter functions.
39
40
    abydos.stemmer._m_degree, abydos.stemmer.porter,
41
    abydos.stemmer._sb_has_vowel, abydos.stemmer._ends_in_doubled_cons,
42
    & abydos.stemmer._ends_in_cvc
43
    """
44
45
    def test_m_degree(self):
46
        """Test abydos.stemmer._m_degree."""
47
        _vowels = set('aeiouy')
48
        # base case
49
        self.assertEqual(_m_degree('', _vowels), 0)
50
51
        # m==0
52
        self.assertEqual(_m_degree('tr', _vowels), 0)
53
        self.assertEqual(_m_degree('ee', _vowels), 0)
54
        self.assertEqual(_m_degree('tree', _vowels), 0)
55
        self.assertEqual(_m_degree('y', _vowels), 0)
56
        self.assertEqual(_m_degree('by', _vowels), 0)
57
58
        # m==1
59
        self.assertEqual(_m_degree('trouble', _vowels), 1)
60
        self.assertEqual(_m_degree('oats', _vowels), 1)
61
        self.assertEqual(_m_degree('trees', _vowels), 1)
62
        self.assertEqual(_m_degree('ivy', _vowels), 1)
63
64
        # m==2
65
        self.assertEqual(_m_degree('troubles', _vowels), 2)
66
        self.assertEqual(_m_degree('private', _vowels), 2)
67
        self.assertEqual(_m_degree('oaten', _vowels), 2)
68
        self.assertEqual(_m_degree('orrery', _vowels), 2)
69
70
    def test_has_vowel(self):
71
        """Test abydos.stemmer._has_vowel."""
72
        _vowels = set('aeiouy')
73
        # base case
74
        self.assertFalse(_sb_has_vowel('', _vowels))
75
76
        # False cases
77
        self.assertFalse(_sb_has_vowel('b', _vowels))
78
        self.assertFalse(_sb_has_vowel('c', _vowels))
79
        self.assertFalse(_sb_has_vowel('bc', _vowels))
80
        self.assertFalse(_sb_has_vowel('bcdfghjklmnpqrstvwxYz', _vowels))
81
        self.assertFalse(_sb_has_vowel('Y', _vowels))
82
83
        # True cases
84
        self.assertTrue(_sb_has_vowel('a', _vowels))
85
        self.assertTrue(_sb_has_vowel('e', _vowels))
86
        self.assertTrue(_sb_has_vowel('ae', _vowels))
87
        self.assertTrue(_sb_has_vowel('aeiouy', _vowels))
88
        self.assertTrue(_sb_has_vowel('y', _vowels))
89
90
        self.assertTrue(_sb_has_vowel('ade', _vowels))
91
        self.assertTrue(_sb_has_vowel('cad', _vowels))
92
        self.assertTrue(_sb_has_vowel('add', _vowels))
93
        self.assertTrue(_sb_has_vowel('phi', _vowels))
94
        self.assertTrue(_sb_has_vowel('pfy', _vowels))
95
96
        self.assertFalse(_sb_has_vowel('pfY', _vowels))
97
98
    def test_ends_in_doubled_cons(self):
99
        """Test abydos.stemmer._ends_in_doubled_cons."""
100
        _vowels = set('aeiouy')
101
        # base case
102
        self.assertFalse(_ends_in_doubled_cons('', _vowels))
103
104
        # False cases
105
        self.assertFalse(_ends_in_doubled_cons('b', _vowels))
106
        self.assertFalse(_ends_in_doubled_cons('c', _vowels))
107
        self.assertFalse(_ends_in_doubled_cons('bc', _vowels))
108
        self.assertFalse(_ends_in_doubled_cons('bcdfghjklmnpqrstvwxYz',
109
                                               _vowels))
110
        self.assertFalse(_ends_in_doubled_cons('Y', _vowels))
111
        self.assertFalse(_ends_in_doubled_cons('a', _vowels))
112
        self.assertFalse(_ends_in_doubled_cons('e', _vowels))
113
        self.assertFalse(_ends_in_doubled_cons('ae', _vowels))
114
        self.assertFalse(_ends_in_doubled_cons('aeiouy', _vowels))
115
        self.assertFalse(_ends_in_doubled_cons('y', _vowels))
116
        self.assertFalse(_ends_in_doubled_cons('ade', _vowels))
117
        self.assertFalse(_ends_in_doubled_cons('cad', _vowels))
118
        self.assertFalse(_ends_in_doubled_cons('phi', _vowels))
119
        self.assertFalse(_ends_in_doubled_cons('pfy', _vowels))
120
        self.assertFalse(_ends_in_doubled_cons('faddy', _vowels))
121
        self.assertFalse(_ends_in_doubled_cons('aiii', _vowels))
122
        self.assertFalse(_ends_in_doubled_cons('ayyy', _vowels))
123
124
        # True cases
125
        self.assertTrue(_ends_in_doubled_cons('add', _vowels))
126
        self.assertTrue(_ends_in_doubled_cons('fadd', _vowels))
127
        self.assertTrue(_ends_in_doubled_cons('fadddd', _vowels))
128
        self.assertTrue(_ends_in_doubled_cons('raYY', _vowels))
129
        self.assertTrue(_ends_in_doubled_cons('doll', _vowels))
130
        self.assertTrue(_ends_in_doubled_cons('parr', _vowels))
131
        self.assertTrue(_ends_in_doubled_cons('parrr', _vowels))
132
        self.assertTrue(_ends_in_doubled_cons('bacc', _vowels))
133
134
    def test_ends_in_cvc(self):
135
        """Test abydos.stemmer._ends_in_cvc."""
136
        _vowels = set('aeiouy')
137
        # base case
138
        self.assertFalse(_ends_in_cvc('', _vowels))
139
140
        # False cases
141
        self.assertFalse(_ends_in_cvc('b', _vowels))
142
        self.assertFalse(_ends_in_cvc('c', _vowels))
143
        self.assertFalse(_ends_in_cvc('bc', _vowels))
144
        self.assertFalse(_ends_in_cvc('bcdfghjklmnpqrstvwxYz', _vowels))
145
        self.assertFalse(_ends_in_cvc('YYY', _vowels))
146
        self.assertFalse(_ends_in_cvc('ddd', _vowels))
147
        self.assertFalse(_ends_in_cvc('faaf', _vowels))
148
        self.assertFalse(_ends_in_cvc('rare', _vowels))
149
        self.assertFalse(_ends_in_cvc('rhy', _vowels))
150
151
        # True cases
152
        self.assertTrue(_ends_in_cvc('dad', _vowels))
153
        self.assertTrue(_ends_in_cvc('phad', _vowels))
154
        self.assertTrue(_ends_in_cvc('faded', _vowels))
155
        self.assertTrue(_ends_in_cvc('maYor', _vowels))
156
        self.assertTrue(_ends_in_cvc('enlil', _vowels))
157
        self.assertTrue(_ends_in_cvc('parer', _vowels))
158
        self.assertTrue(_ends_in_cvc('padres', _vowels))
159
        self.assertTrue(_ends_in_cvc('bacyc', _vowels))
160
161
        # Special case for W, X, & Y
162
        self.assertFalse(_ends_in_cvc('craw', _vowels))
163
        self.assertFalse(_ends_in_cvc('max', _vowels))
164
        self.assertFalse(_ends_in_cvc('cray', _vowels))
165
166
    def test_porter(self):
167
        """Test abydos.stemmer.porter."""
168
        # base case
169
        self.assertEqual(porter(''), '')
170
171
        # simple cases
172
        self.assertEqual(porter('c'), 'c')
173
        self.assertEqual(porter('da'), 'da')
174
        self.assertEqual(porter('ad'), 'ad')
175
        self.assertEqual(porter('sing'), 'sing')
176
        self.assertEqual(porter('singing'), 'sing')
177
178
        # missed branch test cases
179
        self.assertEqual(porter('capitalism'), 'capit')
180
        self.assertEqual(porter('fatalism'), 'fatal')
181
        self.assertEqual(porter('stional'), 'stional')
182
        self.assertEqual(porter('palism'), 'palism')
183
        self.assertEqual(porter('sization'), 'sizat')
184
        self.assertEqual(porter('licated'), 'licat')
185
        self.assertEqual(porter('lical'), 'lical')
186
187
    def test_porter_early_english(self):
188
        """Test abydos.stemmer.porter (early English)."""
189
        # base case
190
        self.assertEqual(porter('', early_english=True), '')
191
192
        # simple cases (no different from regular stemmer)
193
        self.assertEqual(porter('c', early_english=True), 'c')
194
        self.assertEqual(porter('da', early_english=True), 'da')
195
        self.assertEqual(porter('ad', early_english=True), 'ad')
196
        self.assertEqual(porter('sing', early_english=True), 'sing')
197
        self.assertEqual(porter('singing', early_english=True), 'sing')
198
199
        # make
200
        self.assertEqual(porter('make', early_english=True), 'make')
201
        self.assertEqual(porter('makes', early_english=True), 'make')
202
        self.assertEqual(porter('maketh', early_english=True), 'make')
203
        self.assertEqual(porter('makest', early_english=True), 'make')
204
205
        # say
206
        self.assertEqual(porter('say', early_english=True), 'sai')
207
        self.assertEqual(porter('says', early_english=True), 'sai')
208
        self.assertEqual(porter('sayeth', early_english=True), 'sai')
209
        self.assertEqual(porter('sayest', early_english=True), 'sai')
210
211
        # missed branch test cases
212
        self.assertEqual(porter('best', early_english=True), 'best')
213
        self.assertEqual(porter('meth', early_english=True), 'meth')
214
215
    def test_porter_snowball(self):
216
        """Test abydos.stemmer.porter (Snowball testset).
217
218
        These test cases are from
219
        http://snowball.tartarus.org/algorithms/porter/diffs.txt
220
        """
221
        #  Snowball Porter test set
222
        with open(_corpus_file('snowball_porter.csv')) as snowball_ts:
223
            next(snowball_ts)
224
            for line in snowball_ts:
225
                if line[0] != '#':
226
                    line = line.strip().split(',')
227
                    word, stem = line[0], line[1]
228
                    self.assertEqual(porter(word), stem.lower())
229
230
231
class Porter2TestCases(unittest.TestCase):
232
    """Test Porter2 functions.
233
234
    abydos.stemmer._sb_r1, abydos.stemmer._sb_r2,
235
    abydos.stemmer._sb_ends_in_short_syllable, abydos.stemmer._sb_short_word,
236
    & abydos.stemmer.porter2
237
    """
238
239
    def test_sb_r1(self):
240
        """Test abydos.stemmer._sb_r1."""
241
        _vowels = set('aeiouy')
242
        # base case
243
        self.assertEqual(_sb_r1('', _vowels), 0)
244
245
        # examples from http://snowball.tartarus.org/texts/r1r2.html
246
        self.assertEqual(_sb_r1('beautiful', _vowels), 5)
247
        self.assertEqual(_sb_r1('beauty', _vowels), 5)
248
        self.assertEqual(_sb_r1('beau', _vowels), 4)
249
        self.assertEqual(_sb_r1('animadversion', _vowels), 2)
250
        self.assertEqual(_sb_r1('sprinkled', _vowels), 5)
251
        self.assertEqual(_sb_r1('eucharist', _vowels), 3)
252
253
    def test_sb_r2(self):
254
        """Test abydos.stemmer._sb_r2."""
255
        _vowels = set('aeiouy')
256
        # base case
257
        self.assertEqual(_sb_r2('', _vowels), 0)
258
259
        # examples from http://snowball.tartarus.org/texts/r1r2.html
260
        self.assertEqual(_sb_r2('beautiful', _vowels), 7)
261
        self.assertEqual(_sb_r2('beauty', _vowels), 6)
262
        self.assertEqual(_sb_r2('beau', _vowels), 4)
263
        self.assertEqual(_sb_r2('animadversion', _vowels), 4)
264
        self.assertEqual(_sb_r2('sprinkled', _vowels), 9)
265
        self.assertEqual(_sb_r2('eucharist', _vowels), 6)
266
267
    def test_sb_ends_in_short_syllable(self):
268
        """Test abydos.stemmer._sb_ends_in_short_syllable."""
269
        _vowels = set('aeiouy')
270
        _codanonvowels = set('bcdfghjklmnpqrstvz\'')
271
        # base case
272
        self.assertFalse(_sb_ends_in_short_syllable('', _vowels,
273
                                                    _codanonvowels))
274
275
        # examples from
276
        # http://snowball.tartarus.org/algorithms/english/stemmer.html
277
        self.assertTrue(_sb_ends_in_short_syllable('rap', _vowels,
278
                                                   _codanonvowels))
279
        self.assertTrue(_sb_ends_in_short_syllable('trap', _vowels,
280
                                                   _codanonvowels))
281
        self.assertTrue(_sb_ends_in_short_syllable('entrap', _vowels,
282
                                                   _codanonvowels))
283
        self.assertTrue(_sb_ends_in_short_syllable('ow', _vowels,
284
                                                   _codanonvowels))
285
        self.assertTrue(_sb_ends_in_short_syllable('on', _vowels,
286
                                                   _codanonvowels))
287
        self.assertTrue(_sb_ends_in_short_syllable('at', _vowels,
288
                                                   _codanonvowels))
289
        self.assertFalse(_sb_ends_in_short_syllable('uproot', _vowels,
290
                                                    _codanonvowels))
291
        self.assertFalse(_sb_ends_in_short_syllable('uproot', _vowels,
292
                                                    _codanonvowels))
293
        self.assertFalse(_sb_ends_in_short_syllable('bestow', _vowels,
294
                                                    _codanonvowels))
295
        self.assertFalse(_sb_ends_in_short_syllable('disturb', _vowels,
296
                                                    _codanonvowels))
297
298
        # missed branch test cases
299
        self.assertFalse(_sb_ends_in_short_syllable('d', _vowels,
300
                                                    _codanonvowels))
301
        self.assertFalse(_sb_ends_in_short_syllable('a', _vowels,
302
                                                    _codanonvowels))
303
304
    def test_sb_short_word(self):
305
        """Test abydos.stemmer._sb_short_word."""
306
        _vowels = set('aeiouy')
307
        _codanonvowels = set('bcdfghjklmnpqrstvz\'')
308
        # base case
309
        self.assertFalse(_sb_short_word('', _vowels, _codanonvowels))
310
311
        # examples from
312
        # http://snowball.tartarus.org/algorithms/english/stemmer.html
313
        self.assertTrue(_sb_short_word('bed', _vowels, _codanonvowels))
314
        self.assertTrue(_sb_short_word('shed', _vowels, _codanonvowels))
315
        self.assertTrue(_sb_short_word('shred', _vowels, _codanonvowels))
316
        self.assertFalse(_sb_short_word('bead', _vowels, _codanonvowels))
317
        self.assertFalse(_sb_short_word('embed', _vowels, _codanonvowels))
318
        self.assertFalse(_sb_short_word('beds', _vowels, _codanonvowels))
319
320
    def test_porter2(self):
321
        """Test abydos.stemmer.porter2."""
322
        # base case
323
        self.assertEqual(porter2(''), '')
324
325
        # simple cases
326
        self.assertEqual(porter2('c'), 'c')
327
        self.assertEqual(porter2('da'), 'da')
328
        self.assertEqual(porter2('ad'), 'ad')
329
        self.assertEqual(porter2('sing'), 'sing')
330
        self.assertEqual(porter2('singing'), 'sing')
331
332
        # missed branch test cases
333
        self.assertEqual(porter2('capitalism'), 'capit')
334
        self.assertEqual(porter2('fatalism'), 'fatal')
335
        self.assertEqual(porter2('dog\'s'), 'dog')
336
        self.assertEqual(porter2('A\'s\''), 'a')
337
        self.assertEqual(porter2('agreedly'), 'agre')
338
        self.assertEqual(porter2('feedly'), 'feed')
339
        self.assertEqual(porter2('stional'), 'stional')
340
        self.assertEqual(porter2('palism'), 'palism')
341
        self.assertEqual(porter2('sization'), 'sizat')
342
        self.assertEqual(porter2('licated'), 'licat')
343
        self.assertEqual(porter2('lical'), 'lical')
344
        self.assertEqual(porter2('clessly'), 'clessli')
345
        self.assertEqual(porter2('tably'), 'tabli')
346
        self.assertEqual(porter2('sizer'), 'sizer')
347
        self.assertEqual(porter2('livity'), 'liviti')
348
349
    def test_porter2_early_english(self):
350
        """Test abydos.stemmer.porter2 (early English)."""
351
        # base case
352
        self.assertEqual(porter2('', early_english=True), '')
353
354
        # simple cases (no different from regular stemmer)
355
        self.assertEqual(porter2('c', early_english=True), 'c')
356
        self.assertEqual(porter2('da', early_english=True), 'da')
357
        self.assertEqual(porter2('ad', early_english=True), 'ad')
358
        self.assertEqual(porter2('sing', early_english=True), 'sing')
359
        self.assertEqual(porter2('singing', early_english=True), 'sing')
360
361
        # make
362
        self.assertEqual(porter2('make', early_english=True), 'make')
363
        self.assertEqual(porter2('makes', early_english=True), 'make')
364
        self.assertEqual(porter2('maketh', early_english=True), 'make')
365
        self.assertEqual(porter2('makest', early_english=True), 'make')
366
367
        # say
368
        self.assertEqual(porter2('say', early_english=True), 'say')
369
        self.assertEqual(porter2('says', early_english=True), 'say')
370
        self.assertEqual(porter2('sayeth', early_english=True), 'say')
371
        self.assertEqual(porter2('sayest', early_english=True), 'say')
372
373
        # missed branch test cases
374
        self.assertEqual(porter2('best', early_english=True), 'best')
375
        self.assertEqual(porter2('meth', early_english=True), 'meth')
376
377
    def test_porter2_snowball(self):
378
        """Test abydos.stemmer.porter2 (Snowball testset).
379
380
        These test cases are from
381
        http://snowball.tartarus.org/algorithms/english/diffs.txt
382
        """
383
        #  Snowball Porter test set
384
        with open(_corpus_file('snowball_porter2.csv')) as snowball_ts:
385
            next(snowball_ts)
386
            for line in snowball_ts:
387
                if line[0] != '#':
388
                    line = line.strip().split(',')
389
                    word, stem = line[0], line[1]
390
                    self.assertEqual(porter2(word), stem.lower())
391
392
393
class SnowballTestCases(unittest.TestCase):
394
    """Test Snowball functions.
395
396
    abydos.stemmer.sb_german, abydos.stemmer.sb_dutch,
397
    abydos.stemmer.sb_norwegian, abydos.stemmer.sb_swedish, &
398
    abydos.stemmer.sb_danish
399
    """
400
401
    def test_sb_german_snowball(self):
402
        """Test abydos.stemmer.sb_german (Snowball testset).
403
404
        These test cases are from
405
        http://snowball.tartarus.org/algorithms/german/diffs.txt
406
        """
407
        # base case
408
        self.assertEqual(sb_german(''), '')
409
410
        #  Snowball German test set
411
        with codecs.open(_corpus_file('snowball_german.csv'),
412
                         encoding='utf-8') as snowball_ts:
413
            next(snowball_ts)
414
            for line in snowball_ts:
415
                if line[0] != '#':
416
                    line = line.strip().split(',')
417
                    word, stem = line[0], line[1]
418
                    self.assertEqual(sb_german(word), stem.lower())
419
420
        # missed branch test cases
421
        self.assertEqual(sb_german('ikeit'), 'ikeit')
422
423
    def test_sb_german_snowball_alt(self):
424
        """Test abydos.stemmer.sb_german (alternate vowels)."""
425
        # base case
426
        self.assertEqual(sb_german('', alternate_vowels=True), '')
427
428
        # dämmerung,dammer
429
        self.assertEqual(sb_german('dämmerung', alternate_vowels=True),
430
                         'dammer')
431
        self.assertEqual(sb_german('daemmerung', alternate_vowels=True),
432
                         'dammer')
433
        self.assertEqual(sb_german('dämmerung'), 'dammer')
434
        self.assertEqual(sb_german('daemmerung'), 'daemmer')
435
436
        # brötchen,brotch
437
        self.assertEqual(sb_german('brötchen', alternate_vowels=True),
438
                         'brotch')
439
        self.assertEqual(sb_german('broetchen', alternate_vowels=True),
440
                         'brotch')
441
        self.assertEqual(sb_german('brötchen'), 'brotch')
442
        self.assertEqual(sb_german('broetchen'), 'broetch')
443
444
        # büro,buro
445
        self.assertEqual(sb_german('büro', alternate_vowels=True), 'buro')
446
        self.assertEqual(sb_german('buero', alternate_vowels=True), 'buro')
447
        self.assertEqual(sb_german('büro'), 'buro')
448
        self.assertEqual(sb_german('buero'), 'buero')
449
450
        # häufen,hauf
451
        self.assertEqual(sb_german('häufen', alternate_vowels=True), 'hauf')
452
        self.assertEqual(sb_german('haeufen', alternate_vowels=True), 'hauf')
453
        self.assertEqual(sb_german('häufen'), 'hauf')
454
        self.assertEqual(sb_german('haeufen'), 'haeuf')
455
456
        # quelle,quell
457
        self.assertEqual(sb_german('qülle', alternate_vowels=True), 'qull')
458
        self.assertEqual(sb_german('quelle', alternate_vowels=True), 'quell')
459
        self.assertEqual(sb_german('qülle'), 'qull')
460
        self.assertEqual(sb_german('quelle'), 'quell')
461
462
        # feuer,feuer
463
        self.assertEqual(sb_german('feür', alternate_vowels=True), 'feur')
464
        self.assertEqual(sb_german('feuer', alternate_vowels=True), 'feu')
465
        self.assertEqual(sb_german('feür'), 'feur')
466
        self.assertEqual(sb_german('feuer'), 'feu')
467
468
        # über,uber
469
        self.assertEqual(sb_german('über', alternate_vowels=True), 'uber')
470
        self.assertEqual(sb_german('ueber', alternate_vowels=True), 'uber')
471
        self.assertEqual(sb_german('über'), 'uber')
472
        self.assertEqual(sb_german('ueber'), 'ueb')
473
474
    def test_sb_dutch_snowball(self):
475
        """Test abydos.stemmer.sb_dutch (Snowball testset).
476
477
        These test cases are from
478
        http://snowball.tartarus.org/algorithms/dutch/diffs.txt
479
        """
480
        # base case
481
        self.assertEqual(sb_dutch(''), '')
482
483
        #  Snowball Dutch test set
484
        with codecs.open(_corpus_file('snowball_dutch.csv'),
485
                         encoding='utf-8') as snowball_ts:
486
            next(snowball_ts)
487
            for line in snowball_ts:
488
                if line[0] != '#':
489
                    line = line.strip().split(',')
490
                    word, stem = line[0], line[1]
491
                    self.assertEqual(sb_dutch(word), stem.lower())
492
493
        # missed branch test cases
494
        self.assertEqual(sb_dutch('zondulielijk'), 'zondulie')
495
496
    def test_sb_norwegian_snowball(self):
497
        """Test abydos.stemmer.sb_norwegian (Snowball testset).
498
499
        These test cases are from
500
        http://snowball.tartarus.org/algorithms/norwegian/diffs.txt
501
        """
502
        # base case
503
        self.assertEqual(sb_norwegian(''), '')
504
505
        #  Snowball Norwegian test set
506
        with codecs.open(_corpus_file('snowball_norwegian.csv'),
507
                         encoding='utf-8') as snowball_ts:
508
            next(snowball_ts)
509
            for line in snowball_ts:
510
                if line[0] != '#':
511
                    line = line.strip().split(',')
512
                    word, stem = line[0], line[1]
513
                    self.assertEqual(sb_norwegian(word), stem.lower())
514
515
    def test_sb_swedish_snowball(self):
516
        """Test abydos.stemmer.sb_swedish (Snowball testset).
517
518
        These test cases are from
519
        http://snowball.tartarus.org/algorithms/swedish/diffs.txt
520
        """
521
        # base case
522
        self.assertEqual(sb_swedish(''), '')
523
524
        #  Snowball Swedish test set
525
        with codecs.open(_corpus_file('snowball_swedish.csv'),
526
                         encoding='utf-8') as snowball_ts:
527
            next(snowball_ts)
528
            for line in snowball_ts:
529
                if line[0] != '#':
530
                    line = line.strip().split(',')
531
                    word, stem = line[0], line[1]
532
                    self.assertEqual(sb_swedish(word), stem.lower())
533
534
    def test_sb_danish_snowball(self):
535
        """Test abydos.stemmer.sb_danish (Snowball testset).
536
537
        These test cases are from
538
        http://snowball.tartarus.org/algorithms/danish/diffs.txt
539
        """
540
        # base case
541
        self.assertEqual(sb_danish(''), '')
542
543
        #  Snowball Danish test set
544
        with codecs.open(_corpus_file('snowball_danish.csv'),
545
                         encoding='utf-8') as snowball_ts:
546
            next(snowball_ts)
547
            for line in snowball_ts:
548
                if line[0] != '#':
549
                    line = line.strip().split(',')
550
                    word, stem = line[0], line[1]
551
                    self.assertEqual(sb_danish(word), stem.lower())
552
553
554
if __name__ == '__main__':
555
    unittest.main()
556