Code Duplication    Length = 18-21 lines in 5 locations

tests/test_stemmer.py 5 locations

@@ 541-561 (lines=21) @@
538
        self.assertEqual(sb_german('über'), 'uber')
539
        self.assertEqual(sb_german('ueber'), 'ueb')
540
541
    def test_sb_dutch_snowball(self):
542
        """Test abydos.stemmer.sb_dutch (Snowball testset).
543
544
        These test cases are from
545
        http://snowball.tartarus.org/algorithms/dutch/diffs.txt
546
        """
547
        # base case
548
        self.assertEqual(sb_dutch(''), '')
549
550
        #  Snowball Dutch test set
551
        with codecs.open(TESTDIR+'/corpora/snowball_dutch.csv',
552
                         encoding='utf-8') as snowball_testset:
553
            next(snowball_testset)
554
            for line in snowball_testset:
555
                if line[0] != '#':
556
                    line = line.strip().split(',')
557
                    word, stem = line[0], line[1]
558
                    self.assertEqual(sb_dutch(word), stem.lower())
559
560
        # missed branch test cases
561
        self.assertEqual(sb_dutch('zondulielijk'), 'zondulie')
562
563
    def test_sb_norwegian_snowball(self):
564
        """Test abydos.stemmer.sb_norwegian (Snowball testset).
@@ 468-488 (lines=21) @@
465
    abydos.stemmer.sb_danish
466
    """
467
468
    def test_sb_german_snowball(self):
469
        """Test abydos.stemmer.sb_german (Snowball testset).
470
471
        These test cases are from
472
        http://snowball.tartarus.org/algorithms/german/diffs.txt
473
        """
474
        # base case
475
        self.assertEqual(sb_german(''), '')
476
477
        #  Snowball German test set
478
        with codecs.open(TESTDIR+'/corpora/snowball_german.csv',
479
                         encoding='utf-8') as snowball_testset:
480
            next(snowball_testset)
481
            for line in snowball_testset:
482
                if line[0] != '#':
483
                    line = line.strip().split(',')
484
                    word, stem = line[0], line[1]
485
                    self.assertEqual(sb_german(word), stem.lower())
486
487
        # missed branch test cases
488
        self.assertEqual(sb_german('ikeit'), 'ikeit')
489
490
    def test_sb_german_snowball_alt(self):
491
        """Test abydos.stemmer.sb_german (alternate vowels)."""
@@ 601-618 (lines=18) @@
598
                    word, stem = line[0], line[1]
599
                    self.assertEqual(sb_swedish(word), stem.lower())
600
601
    def test_sb_danish_snowball(self):
602
        """Test abydos.stemmer.sb_danish (Snowball testset).
603
604
        These test cases are from
605
        http://snowball.tartarus.org/algorithms/danish/diffs.txt
606
        """
607
        # base case
608
        self.assertEqual(sb_danish(''), '')
609
610
        #  Snowball Danish test set
611
        with codecs.open(TESTDIR+'/corpora/snowball_danish.csv',
612
                         encoding='utf-8') as snowball_testset:
613
            next(snowball_testset)
614
            for line in snowball_testset:
615
                if line[0] != '#':
616
                    line = line.strip().split(',')
617
                    word, stem = line[0], line[1]
618
                    self.assertEqual(sb_danish(word), stem.lower())
619
620
621
class CLEFTestCases(unittest.TestCase):
@@ 582-599 (lines=18) @@
579
                    word, stem = line[0], line[1]
580
                    self.assertEqual(sb_norwegian(word), stem.lower())
581
582
    def test_sb_swedish_snowball(self):
583
        """Test abydos.stemmer.sb_swedish (Snowball testset).
584
585
        These test cases are from
586
        http://snowball.tartarus.org/algorithms/swedish/diffs.txt
587
        """
588
        # base case
589
        self.assertEqual(sb_swedish(''), '')
590
591
        #  Snowball Swedish test set
592
        with codecs.open(TESTDIR+'/corpora/snowball_swedish.csv',
593
                         encoding='utf-8') as snowball_testset:
594
            next(snowball_testset)
595
            for line in snowball_testset:
596
                if line[0] != '#':
597
                    line = line.strip().split(',')
598
                    word, stem = line[0], line[1]
599
                    self.assertEqual(sb_swedish(word), stem.lower())
600
601
    def test_sb_danish_snowball(self):
602
        """Test abydos.stemmer.sb_danish (Snowball testset).
@@ 563-580 (lines=18) @@
560
        # missed branch test cases
561
        self.assertEqual(sb_dutch('zondulielijk'), 'zondulie')
562
563
    def test_sb_norwegian_snowball(self):
564
        """Test abydos.stemmer.sb_norwegian (Snowball testset).
565
566
        These test cases are from
567
        http://snowball.tartarus.org/algorithms/norwegian/diffs.txt
568
        """
569
        # base case
570
        self.assertEqual(sb_norwegian(''), '')
571
572
        #  Snowball Norwegian test set
573
        with codecs.open(TESTDIR+'/corpora/snowball_norwegian.csv',
574
                         encoding='utf-8') as snowball_testset:
575
            next(snowball_testset)
576
            for line in snowball_testset:
577
                if line[0] != '#':
578
                    line = line.strip().split(',')
579
                    word, stem = line[0], line[1]
580
                    self.assertEqual(sb_norwegian(word), stem.lower())
581
582
    def test_sb_swedish_snowball(self):
583
        """Test abydos.stemmer.sb_swedish (Snowball testset).