| 
                    1
                 | 
                                    
                                                     | 
                
                 | 
                # -*- coding: utf-8 -*-  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    2
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    3
                 | 
                                    
                                                     | 
                
                 | 
                # Copyright 2014-2018 by Christopher C. Little.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    4
                 | 
                                    
                                                     | 
                
                 | 
                # This file is part of Abydos.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    5
                 | 
                                    
                                                     | 
                
                 | 
                #  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    6
                 | 
                                    
                                                     | 
                
                 | 
                # Abydos is free software: you can redistribute it and/or modify  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    7
                 | 
                                    
                                                     | 
                
                 | 
                # it under the terms of the GNU General Public License as published by  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    8
                 | 
                                    
                                                     | 
                
                 | 
                # the Free Software Foundation, either version 3 of the License, or  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    9
                 | 
                                    
                                                     | 
                
                 | 
                # (at your option) any later version.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    10
                 | 
                                    
                                                     | 
                
                 | 
                #  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    11
                 | 
                                    
                                                     | 
                
                 | 
                # Abydos is distributed in the hope that it will be useful,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    12
                 | 
                                    
                                                     | 
                
                 | 
                # but WITHOUT ANY WARRANTY; without even the implied warranty of  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    13
                 | 
                                    
                                                     | 
                
                 | 
                # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    14
                 | 
                                    
                                                     | 
                
                 | 
                # GNU General Public License for more details.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    15
                 | 
                                    
                                                     | 
                
                 | 
                #  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    16
                 | 
                                    
                                                     | 
                
                 | 
                # You should have received a copy of the GNU General Public License  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    17
                 | 
                                    
                                                     | 
                
                 | 
                # along with Abydos. If not, see <http://www.gnu.org/licenses/>.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    18
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    19
                 | 
                                    
                                                     | 
                
                 | 
                """abydos.tests.test_stemmer.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    20
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    21
                 | 
                                    
                                                     | 
                
                 | 
                This module contains unit tests for abydos.stemmer  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    22
                 | 
                                    
                                                     | 
                
                 | 
                """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    23
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    24
                 | 
                                    
                                                     | 
                
                 | 
                from __future__ import unicode_literals  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    25
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    26
                 | 
                                    
                                                     | 
                
                 | 
                import codecs  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    27
                 | 
                                    
                                                     | 
                
                 | 
                import os  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    28
                 | 
                                    
                                                     | 
                
                 | 
                import unittest  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    29
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    30
                 | 
                                    
                                                     | 
                
                 | 
                from abydos.stemmer import _ends_in_cvc, _ends_in_doubled_cons, _m_degree, \  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    31
                 | 
                                    
                                                     | 
                
                 | 
                    _sb_ends_in_short_syllable, _sb_has_vowel, _sb_r1, _sb_r2, \  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    32
                 | 
                                    
                                                     | 
                
                 | 
                    _sb_short_word, caumanns, clef_german, clef_german_plus, clef_swedish, \  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    33
                 | 
                                    
                                                     | 
                
                 | 
                    lovins, paice_husk, porter, porter2, sb_danish, sb_dutch, sb_german, \  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    34
                 | 
                                    
                                                     | 
                
                 | 
                    sb_norwegian, sb_swedish, uealite  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    35
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    36
                 | 
                                    
                                                     | 
                
                 | 
                TESTDIR = os.path.dirname(__file__)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    37
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    38
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    39
                 | 
                                    
                                                     | 
                
                 | 
                class LovinsTestCases(unittest.TestCase):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    40
                 | 
                                    
                                                     | 
                
                 | 
                    """Test Lovins functions.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    41
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    42
                 | 
                                    
                                                     | 
                
                 | 
                    abydos.stemmer.lovins  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    43
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    44
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    45
                 | 
                                    
                                                     | 
                
                 | 
                    def test_lovins(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    46
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.lovins."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    47
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    48
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins(''), '') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    49
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    50
                 | 
                                    
                                                     | 
                
                 | 
                        # test cases from Lovins' "Development of a Stemming Algorithm":  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    51
                 | 
                                    
                                                     | 
                
                 | 
                        # http://www.mt-archive.info/MT-1968-Lovins.pdf  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    52
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('magnesia'), 'magnes') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    53
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('magnesite'), 'magnes') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    54
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('magnesian'), 'magnes') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    55
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('magnesium'), 'magnes') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    56
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('magnet'), 'magnet') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    57
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('magnetic'), 'magnet') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    58
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('magneto'), 'magnet') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    59
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('magnetically'), 'magnet') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    60
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('magnetism'), 'magnet') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    61
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('magnetite'), 'magnet') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    62
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('magnetitic'), 'magnet') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    63
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('magnetizable'), 'magnet') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    64
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('magnetization'), 'magnet') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    65
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('magnetize'), 'magnet') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    66
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('magnetometer'), 'magnetometer') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    67
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('magnetometric'), 'magnetometer') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    68
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('magnetometry'), 'magnetometer') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    69
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('magnetomotive'), 'magnetomot') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    70
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('magnetron'), 'magnetron') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    71
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('metal'), 'metal') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    72
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('metall'), 'metal') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    73
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('metallically'), 'metal') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    74
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('metalliferous'), 'metallifer') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    75
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('metallize'), 'metal') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    76
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('metallurgical'), 'metallurg') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    77
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('metallurgy'), 'metallurg') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    78
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('induction'), 'induc') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    79
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('inductance'), 'induc') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    80
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('induced'), 'induc') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    81
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('angular'), 'angl') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    82
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('angle'), 'angl') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    83
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    84
                 | 
                                    
                                                     | 
                
                 | 
                        # missed branch test cases  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    85
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(lovins('feminism'), 'fem') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    86
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    87
                 | 
                                    
                                                     | 
                
                 | 
                    def test_lovins_snowball(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    88
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.lovins (Snowball testset).  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    89
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    90
                 | 
                                    
                                                     | 
                
                 | 
                        These test cases are from  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    91
                 | 
                                    
                                                     | 
                
                 | 
                        https://github.com/snowballstem/snowball-data/tree/master/lovins  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    92
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    93
                 | 
                                    
                                                     | 
                
                 | 
                        #  Snowball Lovins test set  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    94
                 | 
                                    
                                                     | 
                
                 | 
                        with codecs.open(TESTDIR+'/corpora/snowball_lovins.csv',  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    95
                 | 
                                    
                                                     | 
                
                 | 
                                         encoding='utf-8') as snowball_testset:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    96
                 | 
                                    
                                                     | 
                
                 | 
                            next(snowball_testset)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    97
                 | 
                                    
                                                     | 
                
                 | 
                            for line in snowball_testset:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    98
                 | 
                                    
                                                     | 
                
                 | 
                                if line[0] != '#':  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    99
                 | 
                                    
                                                     | 
                
                 | 
                                    line = line.strip().split(',') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    100
                 | 
                                    
                                                     | 
                
                 | 
                                    word, stem = line[0], line[1]  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    101
                 | 
                                    
                                                     | 
                
                 | 
                                    self.assertEqual(lovins(word), stem.lower())  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    102
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    103
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    104
                 | 
                                    
                                                     | 
                
                 | 
                class PorterTestCases(unittest.TestCase):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    105
                 | 
                                    
                                                     | 
                
                 | 
                    """Test Porter functions.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    106
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    107
                 | 
                                    
                                                     | 
                
                 | 
                    abydos.stemmer._m_degree, abydos.stemmer.porter,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    108
                 | 
                                    
                                                     | 
                
                 | 
                    abydos.stemmer._sb_has_vowel, abydos.stemmer._ends_in_doubled_cons,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    109
                 | 
                                    
                                                     | 
                
                 | 
                    & abydos.stemmer._ends_in_cvc  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    110
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    111
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    112
                 | 
                                    
                                                     | 
                
                 | 
                    def test_m_degree(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    113
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer._m_degree."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    114
                 | 
                                    
                                                     | 
                
                 | 
                        _vowels = set('aeiouy') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    115
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    116
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_m_degree('', _vowels), 0) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    117
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    118
                 | 
                                    
                                                     | 
                
                 | 
                        # m==0  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    119
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_m_degree('tr', _vowels), 0) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    120
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_m_degree('ee', _vowels), 0) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    121
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_m_degree('tree', _vowels), 0) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    122
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_m_degree('y', _vowels), 0) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    123
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_m_degree('by', _vowels), 0) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    124
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    125
                 | 
                                    
                                                     | 
                
                 | 
                        # m==1  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    126
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_m_degree('trouble', _vowels), 1) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    127
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_m_degree('oats', _vowels), 1) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    128
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_m_degree('trees', _vowels), 1) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    129
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_m_degree('ivy', _vowels), 1) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    130
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    131
                 | 
                                    
                                                     | 
                
                 | 
                        # m==2  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    132
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_m_degree('troubles', _vowels), 2) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    133
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_m_degree('private', _vowels), 2) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    134
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_m_degree('oaten', _vowels), 2) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    135
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_m_degree('orrery', _vowels), 2) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    136
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    137
                 | 
                                    
                                                     | 
                
                 | 
                    def test_has_vowel(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    138
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer._has_vowel."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    139
                 | 
                                    
                                                     | 
                
                 | 
                        _vowels = set('aeiouy') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    140
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    141
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_sb_has_vowel('', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    142
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    143
                 | 
                                    
                                                     | 
                
                 | 
                        # False cases  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    144
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_sb_has_vowel('b', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    145
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_sb_has_vowel('c', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    146
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_sb_has_vowel('bc', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    147
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_sb_has_vowel('bcdfghjklmnpqrstvwxYz', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    148
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_sb_has_vowel('Y', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    149
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    150
                 | 
                                    
                                                     | 
                
                 | 
                        # True cases  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    151
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_sb_has_vowel('a', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    152
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_sb_has_vowel('e', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    153
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_sb_has_vowel('ae', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    154
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_sb_has_vowel('aeiouy', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    155
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_sb_has_vowel('y', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    156
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    157
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_sb_has_vowel('ade', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    158
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_sb_has_vowel('cad', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    159
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_sb_has_vowel('add', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    160
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_sb_has_vowel('phi', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    161
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_sb_has_vowel('pfy', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    162
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    163
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_sb_has_vowel('pfY', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    164
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    165
                 | 
                                    
                                                     | 
                
                 | 
                    def test_ends_in_doubled_cons(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    166
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer._ends_in_doubled_cons."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    167
                 | 
                                    
                                                     | 
                
                 | 
                        _vowels = set('aeiouy') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    168
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    169
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_doubled_cons('', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    170
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    171
                 | 
                                    
                                                     | 
                
                 | 
                        # False cases  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    172
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_doubled_cons('b', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    173
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_doubled_cons('c', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    174
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_doubled_cons('bc', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    175
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_doubled_cons('bcdfghjklmnpqrstvwxYz', | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    176
                 | 
                                    
                                                     | 
                
                 | 
                                                               _vowels))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    177
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_doubled_cons('Y', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    178
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_doubled_cons('a', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    179
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_doubled_cons('e', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    180
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_doubled_cons('ae', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    181
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_doubled_cons('aeiouy', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    182
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_doubled_cons('y', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    183
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_doubled_cons('ade', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    184
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_doubled_cons('cad', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    185
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_doubled_cons('phi', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    186
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_doubled_cons('pfy', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    187
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_doubled_cons('faddy', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    188
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_doubled_cons('aiii', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    189
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_doubled_cons('ayyy', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    190
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    191
                 | 
                                    
                                                     | 
                
                 | 
                        # True cases  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    192
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_ends_in_doubled_cons('add', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    193
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_ends_in_doubled_cons('fadd', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    194
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_ends_in_doubled_cons('fadddd', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    195
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_ends_in_doubled_cons('raYY', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    196
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_ends_in_doubled_cons('doll', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    197
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_ends_in_doubled_cons('parr', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    198
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_ends_in_doubled_cons('parrr', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    199
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_ends_in_doubled_cons('bacc', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    200
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    201
                 | 
                                    
                                                     | 
                
                 | 
                    def test_ends_in_cvc(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    202
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer._ends_in_cvc."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    203
                 | 
                                    
                                                     | 
                
                 | 
                        _vowels = set('aeiouy') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    204
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    205
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_cvc('', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    206
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    207
                 | 
                                    
                                                     | 
                
                 | 
                        # False cases  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    208
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_cvc('b', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    209
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_cvc('c', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    210
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_cvc('bc', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    211
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_cvc('bcdfghjklmnpqrstvwxYz', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    212
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_cvc('YYY', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    213
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_cvc('ddd', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    214
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_cvc('faaf', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    215
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_cvc('rare', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    216
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_cvc('rhy', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    217
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    218
                 | 
                                    
                                                     | 
                
                 | 
                        # True cases  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    219
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_ends_in_cvc('dad', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    220
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_ends_in_cvc('phad', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    221
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_ends_in_cvc('faded', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    222
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_ends_in_cvc('maYor', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    223
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_ends_in_cvc('enlil', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    224
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_ends_in_cvc('parer', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    225
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_ends_in_cvc('padres', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    226
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_ends_in_cvc('bacyc', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    227
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    228
                 | 
                                    
                                                     | 
                
                 | 
                        # Special case for W, X, & Y  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    229
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_cvc('craw', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    230
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_cvc('max', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    231
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_ends_in_cvc('cray', _vowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    232
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    233
                 | 
                                    
                                                     | 
                
                 | 
                    def test_porter(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    234
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.porter."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    235
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    236
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter(''), '') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    237
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    238
                 | 
                                    
                                                     | 
                
                 | 
                        # simple cases  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    239
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('c'), 'c') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    240
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('da'), 'da') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    241
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('ad'), 'ad') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    242
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('sing'), 'sing') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    243
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('singing'), 'sing') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    244
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    245
                 | 
                                    
                                                     | 
                
                 | 
                        # missed branch test cases  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    246
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('capitalism'), 'capit') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    247
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('fatalism'), 'fatal') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    248
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('stional'), 'stional') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    249
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('palism'), 'palism') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    250
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('sization'), 'sizat') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    251
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('licated'), 'licat') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    252
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('lical'), 'lical') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    253
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    254
                 | 
                                    
                                                     | 
                
                 | 
                    def test_porter_early_english(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    255
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.porter (early English)."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    256
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    257
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('', early_english=True), '') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    258
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    259
                 | 
                                    
                                                     | 
                
                 | 
                        # simple cases (no different from regular stemmer)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    260
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('c', early_english=True), 'c') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    261
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('da', early_english=True), 'da') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    262
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('ad', early_english=True), 'ad') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    263
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('sing', early_english=True), 'sing') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    264
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('singing', early_english=True), 'sing') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    265
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    266
                 | 
                                    
                                                     | 
                
                 | 
                        # make  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    267
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('make', early_english=True), 'make') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    268
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('makes', early_english=True), 'make') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    269
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('maketh', early_english=True), 'make') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    270
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('makest', early_english=True), 'make') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    271
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    272
                 | 
                                    
                                                     | 
                
                 | 
                        # say  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    273
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('say', early_english=True), 'sai') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    274
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('says', early_english=True), 'sai') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    275
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('sayeth', early_english=True), 'sai') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    276
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('sayest', early_english=True), 'sai') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    277
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    278
                 | 
                                    
                                                     | 
                
                 | 
                        # missed branch test cases  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    279
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('best', early_english=True), 'best') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    280
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter('meth', early_english=True), 'meth') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    281
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    282
                 | 
                                    
                                                     | 
                
                 | 
                    def test_porter_snowball(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    283
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.porter (Snowball testset).  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    284
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    285
                 | 
                                    
                                                     | 
                
                 | 
                        These test cases are from  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    286
                 | 
                                    
                                                     | 
                
                 | 
                        http://snowball.tartarus.org/algorithms/porter/diffs.txt  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    287
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    288
                 | 
                                    
                                                     | 
                
                 | 
                        #  Snowball Porter test set  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    289
                 | 
                                    
                                                     | 
                
                 | 
                        with open(TESTDIR+'/corpora/snowball_porter.csv') as snowball_testset:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    290
                 | 
                                    
                                                     | 
                
                 | 
                            next(snowball_testset)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    291
                 | 
                                    
                                                     | 
                
                 | 
                            for line in snowball_testset:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    292
                 | 
                                    
                                                     | 
                
                 | 
                                if line[0] != '#':  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    293
                 | 
                                    
                                                     | 
                
                 | 
                                    line = line.strip().split(',') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    294
                 | 
                                    
                                                     | 
                
                 | 
                                    word, stem = line[0], line[1]  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    295
                 | 
                                    
                                                     | 
                
                 | 
                                    self.assertEqual(porter(word), stem.lower())  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    296
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    297
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    298
                 | 
                                    
                                                     | 
                
                 | 
                class Porter2TestCases(unittest.TestCase):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    299
                 | 
                                    
                                                     | 
                
                 | 
                    """Test Porter2 functions.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    300
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    301
                 | 
                                    
                                                     | 
                
                 | 
                    abydos.stemmer._sb_r1, abydos.stemmer._sb_r2,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    302
                 | 
                                    
                                                     | 
                
                 | 
                    abydos.stemmer._sb_ends_in_short_syllable, abydos.stemmer._sb_short_word,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    303
                 | 
                                    
                                                     | 
                
                 | 
                    & abydos.stemmer.porter2  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    304
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    305
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    306
                 | 
                                    
                                                     | 
                
                 | 
                    def test_sb_r1(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    307
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer._sb_r1."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    308
                 | 
                                    
                                                     | 
                
                 | 
                        _vowels = set('aeiouy') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    309
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    310
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_sb_r1('', _vowels), 0) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    311
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    312
                 | 
                                    
                                                     | 
                
                 | 
                        # examples from http://snowball.tartarus.org/texts/r1r2.html  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    313
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_sb_r1('beautiful', _vowels), 5) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    314
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_sb_r1('beauty', _vowels), 5) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    315
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_sb_r1('beau', _vowels), 4) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    316
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_sb_r1('animadversion', _vowels), 2) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    317
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_sb_r1('sprinkled', _vowels), 5) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    318
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_sb_r1('eucharist', _vowels), 3) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    319
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    320
                 | 
                                    
                                                     | 
                
                 | 
                    def test_sb_r2(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    321
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer._sb_r2."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    322
                 | 
                                    
                                                     | 
                
                 | 
                        _vowels = set('aeiouy') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    323
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    324
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_sb_r2('', _vowels), 0) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    325
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    326
                 | 
                                    
                                                     | 
                
                 | 
                        # examples from http://snowball.tartarus.org/texts/r1r2.html  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    327
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_sb_r2('beautiful', _vowels), 7) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    328
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_sb_r2('beauty', _vowels), 6) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    329
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_sb_r2('beau', _vowels), 4) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    330
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_sb_r2('animadversion', _vowels), 4) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    331
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_sb_r2('sprinkled', _vowels), 9) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    332
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(_sb_r2('eucharist', _vowels), 6) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    333
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    334
                 | 
                                    
                                                     | 
                
                 | 
                    def test_sb_ends_in_short_syllable(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    335
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer._sb_ends_in_short_syllable."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    336
                 | 
                                    
                                                     | 
                
                 | 
                        _vowels = set('aeiouy') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    337
                 | 
                                    
                                                     | 
                
                 | 
                        _codanonvowels = set('bcdfghjklmnpqrstvz\'') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    338
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    339
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_sb_ends_in_short_syllable('', _vowels, | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    340
                 | 
                                    
                                                     | 
                
                 | 
                                                                    _codanonvowels))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    341
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    342
                 | 
                                    
                                                     | 
                
                 | 
                        # examples from  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    343
                 | 
                                    
                                                     | 
                
                 | 
                        # http://snowball.tartarus.org/algorithms/english/stemmer.html  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    344
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_sb_ends_in_short_syllable('rap', _vowels, | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    345
                 | 
                                    
                                                     | 
                
                 | 
                                                                   _codanonvowels))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    346
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_sb_ends_in_short_syllable('trap', _vowels, | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    347
                 | 
                                    
                                                     | 
                
                 | 
                                                                   _codanonvowels))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    348
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_sb_ends_in_short_syllable('entrap', _vowels, | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    349
                 | 
                                    
                                                     | 
                
                 | 
                                                                   _codanonvowels))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    350
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_sb_ends_in_short_syllable('ow', _vowels, | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    351
                 | 
                                    
                                                     | 
                
                 | 
                                                                   _codanonvowels))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    352
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_sb_ends_in_short_syllable('on', _vowels, | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    353
                 | 
                                    
                                                     | 
                
                 | 
                                                                   _codanonvowels))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    354
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_sb_ends_in_short_syllable('at', _vowels, | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    355
                 | 
                                    
                                                     | 
                
                 | 
                                                                   _codanonvowels))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    356
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_sb_ends_in_short_syllable('uproot', _vowels, | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    357
                 | 
                                    
                                                     | 
                
                 | 
                                                                    _codanonvowels))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    358
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_sb_ends_in_short_syllable('uproot', _vowels, | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    359
                 | 
                                    
                                                     | 
                
                 | 
                                                                    _codanonvowels))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    360
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_sb_ends_in_short_syllable('bestow', _vowels, | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    361
                 | 
                                    
                                                     | 
                
                 | 
                                                                    _codanonvowels))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    362
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_sb_ends_in_short_syllable('disturb', _vowels, | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    363
                 | 
                                    
                                                     | 
                
                 | 
                                                                    _codanonvowels))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    364
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    365
                 | 
                                    
                                                     | 
                
                 | 
                        # missed branch test cases  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    366
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_sb_ends_in_short_syllable('d', _vowels, | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    367
                 | 
                                    
                                                     | 
                
                 | 
                                                                    _codanonvowels))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    368
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_sb_ends_in_short_syllable('a', _vowels, | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    369
                 | 
                                    
                                                     | 
                
                 | 
                                                                    _codanonvowels))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    370
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    371
                 | 
                                    
                                                     | 
                
                 | 
                    def test_sb_short_word(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    372
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer._sb_short_word."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    373
                 | 
                                    
                                                     | 
                
                 | 
                        _vowels = set('aeiouy') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    374
                 | 
                                    
                                                     | 
                
                 | 
                        _codanonvowels = set('bcdfghjklmnpqrstvz\'') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    375
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    376
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_sb_short_word('', _vowels, _codanonvowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    377
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    378
                 | 
                                    
                                                     | 
                
                 | 
                        # examples from  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    379
                 | 
                                    
                                                     | 
                
                 | 
                        # http://snowball.tartarus.org/algorithms/english/stemmer.html  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    380
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_sb_short_word('bed', _vowels, _codanonvowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    381
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_sb_short_word('shed', _vowels, _codanonvowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    382
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertTrue(_sb_short_word('shred', _vowels, _codanonvowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    383
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_sb_short_word('bead', _vowels, _codanonvowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    384
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_sb_short_word('embed', _vowels, _codanonvowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    385
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertFalse(_sb_short_word('beds', _vowels, _codanonvowels)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    386
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    387
                 | 
                                    
                                                     | 
                
                 | 
                    def test_porter2(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    388
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.porter2."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    389
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    390
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2(''), '') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    391
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    392
                 | 
                                    
                                                     | 
                
                 | 
                        # simple cases  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    393
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('c'), 'c') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    394
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('da'), 'da') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    395
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('ad'), 'ad') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    396
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('sing'), 'sing') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    397
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('singing'), 'sing') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    398
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    399
                 | 
                                    
                                                     | 
                
                 | 
                        # missed branch test cases  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    400
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('capitalism'), 'capit') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    401
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('fatalism'), 'fatal') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    402
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('dog\'s'), 'dog') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    403
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('A\'s\''), 'a') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    404
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('agreedly'), 'agre') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    405
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('feedly'), 'feed') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    406
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('stional'), 'stional') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    407
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('palism'), 'palism') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    408
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('sization'), 'sizat') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    409
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('licated'), 'licat') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    410
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('lical'), 'lical') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    411
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('clessly'), 'clessli') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    412
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('tably'), 'tabli') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    413
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('sizer'), 'sizer') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    414
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('livity'), 'liviti') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    415
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    416
                 | 
                                    
                                                     | 
                
                 | 
                    def test_porter2_early_english(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    417
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.porter2 (early English)."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    418
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    419
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('', early_english=True), '') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    420
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    421
                 | 
                                    
                                                     | 
                
                 | 
                        # simple cases (no different from regular stemmer)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    422
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('c', early_english=True), 'c') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    423
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('da', early_english=True), 'da') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    424
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('ad', early_english=True), 'ad') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    425
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('sing', early_english=True), 'sing') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    426
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('singing', early_english=True), 'sing') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    427
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    428
                 | 
                                    
                                                     | 
                
                 | 
                        # make  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    429
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('make', early_english=True), 'make') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    430
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('makes', early_english=True), 'make') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    431
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('maketh', early_english=True), 'make') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    432
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('makest', early_english=True), 'make') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    433
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    434
                 | 
                                    
                                                     | 
                
                 | 
                        # say  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    435
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('say', early_english=True), 'say') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    436
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('says', early_english=True), 'say') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    437
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('sayeth', early_english=True), 'say') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    438
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('sayest', early_english=True), 'say') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    439
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    440
                 | 
                                    
                                                     | 
                
                 | 
                        # missed branch test cases  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    441
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('best', early_english=True), 'best') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    442
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(porter2('meth', early_english=True), 'meth') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    443
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    444
                 | 
                                    
                                                     | 
                
                 | 
                    def test_porter2_snowball(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    445
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.porter2 (Snowball testset).  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    446
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    447
                 | 
                                    
                                                     | 
                
                 | 
                        These test cases are from  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    448
                 | 
                                    
                                                     | 
                
                 | 
                        http://snowball.tartarus.org/algorithms/english/diffs.txt  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    449
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    450
                 | 
                                    
                                                     | 
                
                 | 
                        #  Snowball Porter test set  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    451
                 | 
                                    
                                                     | 
                
                 | 
                        with open(TESTDIR+'/corpora/snowball_porter2.csv') as snowball_testset:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    452
                 | 
                                    
                                                     | 
                
                 | 
                            next(snowball_testset)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    453
                 | 
                                    
                                                     | 
                
                 | 
                            for line in snowball_testset:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    454
                 | 
                                    
                                                     | 
                
                 | 
                                if line[0] != '#':  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    455
                 | 
                                    
                                                     | 
                
                 | 
                                    line = line.strip().split(',') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    456
                 | 
                                    
                                                     | 
                
                 | 
                                    word, stem = line[0], line[1]  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    457
                 | 
                                    
                                                     | 
                
                 | 
                                    self.assertEqual(porter2(word), stem.lower())  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    458
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    459
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    460
                 | 
                                    
                                                     | 
                
                 | 
                class SnowballTestCases(unittest.TestCase):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    461
                 | 
                                    
                                                     | 
                
                 | 
                    """Test Snowball functions.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    462
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    463
                 | 
                                    
                                                     | 
                
                 | 
                    abydos.stemmer.sb_german, abydos.stemmer.sb_dutch,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    464
                 | 
                                    
                                                     | 
                
                 | 
                    abydos.stemmer.sb_norwegian, abydos.stemmer.sb_swedish, &  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    465
                 | 
                                    
                                                     | 
                
                 | 
                    abydos.stemmer.sb_danish  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    466
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    467
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                                                    
                                                                                                        
            
            
                | 
                    468
                 | 
                                    
                                                     | 
                
                View Code Duplication | 
                    def test_sb_german_snowball(self):  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    469
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.sb_german (Snowball testset).  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    470
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    471
                 | 
                                    
                                                     | 
                
                 | 
                        These test cases are from  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    472
                 | 
                                    
                                                     | 
                
                 | 
                        http://snowball.tartarus.org/algorithms/german/diffs.txt  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    473
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    474
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    475
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german(''), '') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    476
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    477
                 | 
                                    
                                                     | 
                
                 | 
                        #  Snowball German test set  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    478
                 | 
                                    
                                                     | 
                
                 | 
                        with codecs.open(TESTDIR+'/corpora/snowball_german.csv',  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    479
                 | 
                                    
                                                     | 
                
                 | 
                                         encoding='utf-8') as snowball_testset:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    480
                 | 
                                    
                                                     | 
                
                 | 
                            next(snowball_testset)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    481
                 | 
                                    
                                                     | 
                
                 | 
                            for line in snowball_testset:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    482
                 | 
                                    
                                                     | 
                
                 | 
                                if line[0] != '#':  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    483
                 | 
                                    
                                                     | 
                
                 | 
                                    line = line.strip().split(',') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    484
                 | 
                                    
                                                     | 
                
                 | 
                                    word, stem = line[0], line[1]  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    485
                 | 
                                    
                                                     | 
                
                 | 
                                    self.assertEqual(sb_german(word), stem.lower())  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    486
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    487
                 | 
                                    
                                                     | 
                
                 | 
                        # missed branch test cases  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    488
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('ikeit'), 'ikeit') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    489
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    490
                 | 
                                    
                                                     | 
                
                 | 
                    def test_sb_german_snowball_alt(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    491
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.sb_german (alternate vowels)."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    492
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    493
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('', alternate_vowels=True), '') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    494
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    495
                 | 
                                    
                                                     | 
                
                 | 
                        # dämmerung,dammer  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    496
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('dämmerung', alternate_vowels=True), | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    497
                 | 
                                    
                                                     | 
                
                 | 
                                         'dammer')  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    498
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('daemmerung', alternate_vowels=True), | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    499
                 | 
                                    
                                                     | 
                
                 | 
                                         'dammer')  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    500
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('dämmerung'), 'dammer') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    501
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('daemmerung'), 'daemmer') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    502
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    503
                 | 
                                    
                                                     | 
                
                 | 
                        # brötchen,brotch  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    504
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('brötchen', alternate_vowels=True), | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    505
                 | 
                                    
                                                     | 
                
                 | 
                                         'brotch')  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    506
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('broetchen', alternate_vowels=True), | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    507
                 | 
                                    
                                                     | 
                
                 | 
                                         'brotch')  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    508
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('brötchen'), 'brotch') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    509
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('broetchen'), 'broetch') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    510
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    511
                 | 
                                    
                                                     | 
                
                 | 
                        # büro,buro  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    512
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('büro', alternate_vowels=True), 'buro') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    513
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('buero', alternate_vowels=True), 'buro') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    514
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('büro'), 'buro') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    515
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('buero'), 'buero') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    516
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    517
                 | 
                                    
                                                     | 
                
                 | 
                        # häufen,hauf  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    518
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('häufen', alternate_vowels=True), 'hauf') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    519
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('haeufen', alternate_vowels=True), 'hauf') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    520
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('häufen'), 'hauf') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    521
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('haeufen'), 'haeuf') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    522
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    523
                 | 
                                    
                                                     | 
                
                 | 
                        # quelle,quell  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    524
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('qülle', alternate_vowels=True), 'qull') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    525
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('quelle', alternate_vowels=True), 'quell') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    526
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('qülle'), 'qull') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    527
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('quelle'), 'quell') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    528
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    529
                 | 
                                    
                                                     | 
                
                 | 
                        # feuer,feuer  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    530
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('feür', alternate_vowels=True), 'feur') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    531
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('feuer', alternate_vowels=True), 'feu') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    532
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('feür'), 'feur') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    533
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('feuer'), 'feu') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    534
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    535
                 | 
                                    
                                                     | 
                
                 | 
                        # über,uber  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    536
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('über', alternate_vowels=True), 'uber') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    537
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('ueber', alternate_vowels=True), 'uber') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    538
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('über'), 'uber') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    539
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_german('ueber'), 'ueb') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    540
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                                                    
                                                                                                        
            
            
                | 
                    541
                 | 
                                    
                                                     | 
                
                View Code Duplication | 
                    def test_sb_dutch_snowball(self):  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    542
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.sb_dutch (Snowball testset).  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    543
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    544
                 | 
                                    
                                                     | 
                
                 | 
                        These test cases are from  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    545
                 | 
                                    
                                                     | 
                
                 | 
                        http://snowball.tartarus.org/algorithms/dutch/diffs.txt  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    546
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    547
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    548
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_dutch(''), '') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    549
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    550
                 | 
                                    
                                                     | 
                
                 | 
                        #  Snowball Dutch test set  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    551
                 | 
                                    
                                                     | 
                
                 | 
                        with codecs.open(TESTDIR+'/corpora/snowball_dutch.csv',  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    552
                 | 
                                    
                                                     | 
                
                 | 
                                         encoding='utf-8') as snowball_testset:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    553
                 | 
                                    
                                                     | 
                
                 | 
                            next(snowball_testset)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    554
                 | 
                                    
                                                     | 
                
                 | 
                            for line in snowball_testset:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    555
                 | 
                                    
                                                     | 
                
                 | 
                                if line[0] != '#':  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    556
                 | 
                                    
                                                     | 
                
                 | 
                                    line = line.strip().split(',') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    557
                 | 
                                    
                                                     | 
                
                 | 
                                    word, stem = line[0], line[1]  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    558
                 | 
                                    
                                                     | 
                
                 | 
                                    self.assertEqual(sb_dutch(word), stem.lower())  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    559
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    560
                 | 
                                    
                                                     | 
                
                 | 
                        # missed branch test cases  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    561
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_dutch('zondulielijk'), 'zondulie') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    562
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                                                    
                                                                                                        
            
            
                | 
                    563
                 | 
                                    
                                                     | 
                
                View Code Duplication | 
                    def test_sb_norwegian_snowball(self):  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    564
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.sb_norwegian (Snowball testset).  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    565
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    566
                 | 
                                    
                                                     | 
                
                 | 
                        These test cases are from  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    567
                 | 
                                    
                                                     | 
                
                 | 
                        http://snowball.tartarus.org/algorithms/norwegian/diffs.txt  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    568
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    569
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    570
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_norwegian(''), '') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    571
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    572
                 | 
                                    
                                                     | 
                
                 | 
                        #  Snowball Norwegian test set  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    573
                 | 
                                    
                                                     | 
                
                 | 
                        with codecs.open(TESTDIR+'/corpora/snowball_norwegian.csv',  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    574
                 | 
                                    
                                                     | 
                
                 | 
                                         encoding='utf-8') as snowball_testset:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    575
                 | 
                                    
                                                     | 
                
                 | 
                            next(snowball_testset)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    576
                 | 
                                    
                                                     | 
                
                 | 
                            for line in snowball_testset:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    577
                 | 
                                    
                                                     | 
                
                 | 
                                if line[0] != '#':  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    578
                 | 
                                    
                                                     | 
                
                 | 
                                    line = line.strip().split(',') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    579
                 | 
                                    
                                                     | 
                
                 | 
                                    word, stem = line[0], line[1]  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    580
                 | 
                                    
                                                     | 
                
                 | 
                                    self.assertEqual(sb_norwegian(word), stem.lower())  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    581
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                                                    
                                                                                                        
            
            
                | 
                    582
                 | 
                                    
                                                     | 
                
                View Code Duplication | 
                    def test_sb_swedish_snowball(self):  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    583
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.sb_swedish (Snowball testset).  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    584
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    585
                 | 
                                    
                                                     | 
                
                 | 
                        These test cases are from  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    586
                 | 
                                    
                                                     | 
                
                 | 
                        http://snowball.tartarus.org/algorithms/swedish/diffs.txt  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    587
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    588
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    589
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_swedish(''), '') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    590
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    591
                 | 
                                    
                                                     | 
                
                 | 
                        #  Snowball Swedish test set  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    592
                 | 
                                    
                                                     | 
                
                 | 
                        with codecs.open(TESTDIR+'/corpora/snowball_swedish.csv',  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    593
                 | 
                                    
                                                     | 
                
                 | 
                                         encoding='utf-8') as snowball_testset:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    594
                 | 
                                    
                                                     | 
                
                 | 
                            next(snowball_testset)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    595
                 | 
                                    
                                                     | 
                
                 | 
                            for line in snowball_testset:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    596
                 | 
                                    
                                                     | 
                
                 | 
                                if line[0] != '#':  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    597
                 | 
                                    
                                                     | 
                
                 | 
                                    line = line.strip().split(',') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    598
                 | 
                                    
                                                     | 
                
                 | 
                                    word, stem = line[0], line[1]  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    599
                 | 
                                    
                                                     | 
                
                 | 
                                    self.assertEqual(sb_swedish(word), stem.lower())  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    600
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                                                    
                                                                                                        
            
            
                | 
                    601
                 | 
                                    
                                                     | 
                
                View Code Duplication | 
                    def test_sb_danish_snowball(self):  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    602
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.sb_danish (Snowball testset).  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    603
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    604
                 | 
                                    
                                                     | 
                
                 | 
                        These test cases are from  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    605
                 | 
                                    
                                                     | 
                
                 | 
                        http://snowball.tartarus.org/algorithms/danish/diffs.txt  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    606
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    607
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    608
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(sb_danish(''), '') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    609
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    610
                 | 
                                    
                                                     | 
                
                 | 
                        #  Snowball Danish test set  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    611
                 | 
                                    
                                                     | 
                
                 | 
                        with codecs.open(TESTDIR+'/corpora/snowball_danish.csv',  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    612
                 | 
                                    
                                                     | 
                
                 | 
                                         encoding='utf-8') as snowball_testset:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    613
                 | 
                                    
                                                     | 
                
                 | 
                            next(snowball_testset)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    614
                 | 
                                    
                                                     | 
                
                 | 
                            for line in snowball_testset:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    615
                 | 
                                    
                                                     | 
                
                 | 
                                if line[0] != '#':  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    616
                 | 
                                    
                                                     | 
                
                 | 
                                    line = line.strip().split(',') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    617
                 | 
                                    
                                                     | 
                
                 | 
                                    word, stem = line[0], line[1]  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    618
                 | 
                                    
                                                     | 
                
                 | 
                                    self.assertEqual(sb_danish(word), stem.lower())  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    619
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    620
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    621
                 | 
                                    
                                                     | 
                
                 | 
                class CLEFTestCases(unittest.TestCase):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    622
                 | 
                                    
                                                     | 
                
                 | 
                    """Test CLEF functions.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    623
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    624
                 | 
                                    
                                                     | 
                
                 | 
                    abydos.stemmer.clef_german, abydos.stemmer.clef_german_plus, &  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    625
                 | 
                                    
                                                     | 
                
                 | 
                    abydos.stemmer.clef_swedish  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    626
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    627
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    628
                 | 
                                    
                                                     | 
                
                 | 
                    def test_clef_german(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    629
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.clef_german."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    630
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    631
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german(''), '') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    632
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    633
                 | 
                                    
                                                     | 
                
                 | 
                        # len <= 2  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    634
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('ä'), 'a') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    635
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('er'), 'er') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    636
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('es'), 'es') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    637
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('äh'), 'ah') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    638
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    639
                 | 
                                    
                                                     | 
                
                 | 
                        # len > 2  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    640
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('deinen'), 'dein') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    641
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('können'), 'konn') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    642
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('Damen'), 'dame') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    643
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('kleines'), 'klein') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    644
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('Namen'), 'name') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    645
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('Äpfel'), 'apfel') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    646
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('Jahre'), 'jahr') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    647
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('Mannes'), 'mann') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    648
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('Häuser'), 'haus') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    649
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('Motoren'), 'motor') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    650
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('kleine'), 'klein') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    651
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('Pfingsten'), 'pfingst') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    652
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('lautest'), 'lautest') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    653
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('lauteste'), 'lautest') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    654
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('lautere'), 'lauter') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    655
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('lautste'), 'lautst') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    656
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german('kleinen'), 'klei') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    657
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    658
                 | 
                                    
                                                     | 
                
                 | 
                    def test_clef_german_plus(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    659
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.clef_german_plus."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    660
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    661
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus(''), '') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    662
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    663
                 | 
                                    
                                                     | 
                
                 | 
                        # len <= 2  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    664
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('ä'), 'a') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    665
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('er'), 'er') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    666
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('es'), 'es') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    667
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('äh'), 'ah') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    668
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    669
                 | 
                                    
                                                     | 
                
                 | 
                        # len > 2  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    670
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('deinen'), 'dein') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    671
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('können'), 'konn') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    672
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('Damen'), 'dam') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    673
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('kleines'), 'klein') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    674
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('Namen'), 'nam') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    675
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('Äpfel'), 'apfel') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    676
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('Jahre'), 'jahr') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    677
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('Mannes'), 'mann') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    678
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('Häuser'), 'haus') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    679
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('Motoren'), 'motor') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    680
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('kleine'), 'klein') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    681
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('Pfingsten'), 'pfing') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    682
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('lautest'), 'laut') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    683
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('lauteste'), 'laut') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    684
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('lautere'), 'laut') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    685
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('lautste'), 'laut') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    686
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('kleinen'), 'klein') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    687
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_german_plus('Pfarrern'), 'pfarr') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    688
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    689
                 | 
                                    
                                                     | 
                
                 | 
                    def test_clef_swedish(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    690
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.clef_swedish."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    691
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    692
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish(''), '') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    693
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    694
                 | 
                                    
                                                     | 
                
                 | 
                        # unstemmed  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    695
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('konung'), 'konung') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    696
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    697
                 | 
                                    
                                                     | 
                
                 | 
                        # len <= 3  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    698
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('km'), 'km') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    699
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('ja'), 'ja') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    700
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('de'), 'de') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    701
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('in'), 'in') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    702
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('a'), 'a') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    703
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('mer'), 'mer') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    704
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('s'), 's') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    705
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('e'), 'e') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    706
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('oss'), 'oss') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    707
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('hos'), 'hos') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    708
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    709
                 | 
                                    
                                                     | 
                
                 | 
                        # genitive  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    710
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('svenskars'), 'svensk') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    711
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('stadens'), 'stad') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    712
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('kommuns'), 'kommu') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    713
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('aftonbladets'), 'aftonblad') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    714
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    715
                 | 
                                    
                                                     | 
                
                 | 
                        # len > 7  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    716
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('fängelser'), 'fäng') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    717
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('möjligheten'), 'möjlig') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    718
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    719
                 | 
                                    
                                                     | 
                
                 | 
                        # len > 6  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    720
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('svenskar'), 'svensk') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    721
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('myndigheterna'), 'myndighet') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    722
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('avgörande'), 'avgör') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    723
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('fängelse'), 'fäng') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    724
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('viktigaste'), 'viktig') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    725
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('kvinnorna'), 'kvinn') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    726
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('åklagaren'), 'åklag') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    727
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    728
                 | 
                                    
                                                     | 
                
                 | 
                        # len > 5  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    729
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('tidigare'), 'tidig') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    730
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('senast'), 'sen') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    731
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('möjlighet'), 'möjlig') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    732
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    733
                 | 
                                    
                                                     | 
                
                 | 
                        # len > 4  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    734
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('svenskar'), 'svensk') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    735
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('skriver'), 'skriv') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    736
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('människor'), 'människ') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    737
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('staden'), 'stad') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    738
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('kunnat'), 'kunn') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    739
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('samarbete'), 'samarbe') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    740
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('aftonbladet'), 'aftonblad') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    741
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    742
                 | 
                                    
                                                     | 
                
                 | 
                        # len > 3  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    743
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('allt'), 'all') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    744
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('vilka'), 'vilk') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    745
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('länge'), 'läng') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    746
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(clef_swedish('kommun'), 'kommu') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    747
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    748
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    749
                 | 
                                    
                                                     | 
                
                 | 
                class CaumannsTestCases(unittest.TestCase):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    750
                 | 
                                    
                                                     | 
                
                 | 
                    """Test Caumanns functions.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    751
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    752
                 | 
                                    
                                                     | 
                
                 | 
                    abydos.stemmer.caumanns  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    753
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    754
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    755
                 | 
                                    
                                                     | 
                
                 | 
                    def test_caumanns(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    756
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.caumanns."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    757
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    758
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns(''), '') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    759
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    760
                 | 
                                    
                                                     | 
                
                 | 
                        # tests from Caumanns' description of the algorithm  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    761
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('singt'), 'sing') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    762
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('singen'), 'sing') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    763
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('beliebt'), 'belieb') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    764
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('beliebtester'), 'belieb') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    765
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('stören'), 'stor') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    766
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('stöhnen'), 'stoh') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    767
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('Kuß'), 'kuss') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    768
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('Küsse'), 'kuss') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    769
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('Verlierer'), 'verlier') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    770
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('Verlies'), 'verlie') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    771
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('Maus'), 'mau') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    772
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('Mauer'), 'mau') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    773
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('Störsender'), 'stor') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    774
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    775
                 | 
                                    
                                                     | 
                
                 | 
                        # additional tests to achieve full coverage  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    776
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('Müllerinnen'), 'mullerin') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    777
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('Matrix'), 'matrix') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    778
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('Matrizen'), 'matrix') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    779
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    780
                 | 
                                    
                                                     | 
                
                 | 
                    def test_caumanns_lucene(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    781
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.caumanns (Lucene tests).  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    782
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    783
                 | 
                                    
                                                     | 
                
                 | 
                        Based on tests from  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    784
                 | 
                                    
                                                     | 
                
                 | 
                        https://svn.apache.org/repos/asf/lucene.net/trunk/test/contrib/Analyzers/De/data.txt  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    785
                 | 
                                    
                                                     | 
                
                 | 
                        This is presumably Apache-licensed.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    786
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    787
                 | 
                                    
                                                     | 
                
                 | 
                        # German special characters are replaced:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    788
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('häufig'), 'haufig') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    789
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('üor'), 'uor') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    790
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('björk'), 'bjork') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    791
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    792
                 | 
                                    
                                                     | 
                
                 | 
                        # here the stemmer works okay, it maps related words to the same stem:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    793
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('abschließen'), 'abschliess') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    794
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('abschließender'), 'abschliess') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    795
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('abschließendes'), 'abschliess') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    796
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('abschließenden'), 'abschliess') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    797
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    798
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('Tisch'), 'tisch') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    799
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('Tische'), 'tisch') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    800
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('Tischen'), 'tisch') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    801
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('geheimtür'), 'geheimtur') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    802
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    803
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('Haus'), 'hau') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    804
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('Hauses'), 'hau') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    805
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('Häuser'), 'hau') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    806
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('Häusern'), 'hau') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    807
                 | 
                                    
                                                     | 
                
                 | 
                        # here's a case where overstemming occurs, i.e. a word is  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    808
                 | 
                                    
                                                     | 
                
                 | 
                        # mapped to the same stem as unrelated words:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    809
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('hauen'), 'hau') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    810
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    811
                 | 
                                    
                                                     | 
                
                 | 
                        # here's a case where understemming occurs, i.e. two related words  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    812
                 | 
                                    
                                                     | 
                
                 | 
                        # are not mapped to the same stem. This is the case with basically  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    813
                 | 
                                    
                                                     | 
                
                 | 
                        # all irregular forms:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    814
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('Drama'), 'drama') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    815
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('Dramen'), 'dram') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    816
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    817
                 | 
                                    
                                                     | 
                
                 | 
                        # replace "ß" with 'ss':  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    818
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('Ausmaß'), 'ausmass') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    819
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    820
                 | 
                                    
                                                     | 
                
                 | 
                        # fake words to test if suffixes are cut off:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    821
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('xxxxxe'), 'xxxxx') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    822
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('xxxxxs'), 'xxxxx') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    823
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('xxxxxn'), 'xxxxx') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    824
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('xxxxxt'), 'xxxxx') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    825
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('xxxxxem'), 'xxxxx') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    826
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('xxxxxer'), 'xxxxx') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    827
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('xxxxxnd'), 'xxxxx') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    828
                 | 
                                    
                                                     | 
                
                 | 
                        # the suffixes are also removed when combined:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    829
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('xxxxxetende'), 'xxxxx') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    830
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    831
                 | 
                                    
                                                     | 
                
                 | 
                        # words that are shorter than four charcters are not changed:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    832
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('xxe'), 'xxe') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    833
                 | 
                                    
                                                     | 
                
                 | 
                        # -em and -er are not removed from words shorter than five characters:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    834
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('xxem'), 'xxem') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    835
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('xxer'), 'xxer') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    836
                 | 
                                    
                                                     | 
                
                 | 
                        # -nd is not removed from words shorter than six characters:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    837
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(caumanns('xxxnd'), 'xxxnd') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    838
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    839
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    840
                 | 
                                    
                                                     | 
                
                 | 
                class UEALiteTestCases(unittest.TestCase):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    841
                 | 
                                    
                                                     | 
                
                 | 
                    """Test UEA-lite functions.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    842
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    843
                 | 
                                    
                                                     | 
                
                 | 
                    abydos.stemmer.uealite  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    844
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    845
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    846
                 | 
                                    
                                                     | 
                
                 | 
                    def test_uealite(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    847
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.uealite."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    848
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    849
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite(''), '') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    850
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    851
                 | 
                                    
                                                     | 
                
                 | 
                        # test cases copied from Ruby port  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    852
                 | 
                                    
                                                     | 
                
                 | 
                        # https://github.com/ealdent/uea-stemmer/blob/master/test/uea_stemmer_test.rb  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    853
                 | 
                                    
                                                     | 
                
                 | 
                        # These are corrected to match the Java version's output.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    854
                 | 
                                    
                                                     | 
                
                 | 
                        # stem base words to just the base word  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    855
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('man'), 'man') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    856
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('happiness'), 'happiness') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    857
                 | 
                                    
                                                     | 
                
                 | 
                        # stem theses as thesis but not bases as basis  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    858
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('theses'), 'thesis') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    859
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertNotEqual(uealite('bases'), 'basis') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    860
                 | 
                                    
                                                     | 
                
                 | 
                        # stem preterite words ending in -ed without the -ed  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    861
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('ordained'), 'ordain') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    862
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('killed'), 'kill') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    863
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('liked'), 'lik') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    864
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('helped'), 'help') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    865
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('scarred'), 'scarre') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    866
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('invited'), 'invit') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    867
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('exited'), 'exit') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    868
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('debited'), 'debit') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    869
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('smited'), 'smit') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    870
                 | 
                                    
                                                     | 
                
                 | 
                        # stem progressive verbs and gerunds without the -ing  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    871
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('running'), 'run') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    872
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('settings'), 'set') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    873
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('timing'), 'time') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    874
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('dying'), 'dy') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    875
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('harping'), 'harp') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    876
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('charring'), 'char') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    877
                 | 
                                    
                                                     | 
                
                 | 
                        # not stem false progressive verbs such as 'sing'  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    878
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('ring'), 'ring') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    879
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('sing'), 'se') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    880
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('bring'), 'br') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    881
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('fling'), 'fle') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    882
                 | 
                                    
                                                     | 
                
                 | 
                        # stem various plural nouns and 3rd-pres verbs without the -s/-es  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    883
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('changes'), 'change') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    884
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('deaths'), 'death') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    885
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('shadows'), 'shadow') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    886
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('flies'), 'fly') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    887
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('things'), 'thing') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    888
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('nothings'), 'nothing') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    889
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('witches'), 'witch') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    890
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('makes'), 'mak') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    891
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('smokes'), 'smok') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    892
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('does'), 'do') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    893
                 | 
                                    
                                                     | 
                
                 | 
                        # stem various words with -des suffix  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    894
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('abodes'), 'abod') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    895
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('escapades'), 'escapad') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    896
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('crusades'), 'crusad') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    897
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('grades'), 'grad') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    898
                 | 
                                    
                                                     | 
                
                 | 
                        # stem various words with -res suffix  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    899
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('wires'), 'wir') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    900
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('acres'), 'acr') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    901
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('fires'), 'fir') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    902
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('cares'), 'car') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    903
                 | 
                                    
                                                     | 
                
                 | 
                        # stem acronyms when pluralized otherwise they should be left alone  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    904
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('USA'), 'USA') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    905
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('FLOSS'), 'FLOSS') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    906
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('MREs'), 'MRE') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    907
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('USAED'), 'USAED') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    908
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    909
                 | 
                                    
                                                     | 
                
                 | 
                        # test cases copied from Ruby port  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    910
                 | 
                                    
                                                     | 
                
                 | 
                        # https://github.com/ealdent/uea-stemmer/blob/master/test/uea_stemmer_test.rb  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    911
                 | 
                                    
                                                     | 
                
                 | 
                        # stem base words to just the base word  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    912
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('man', var='Adams'), 'man') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    913
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('happiness', var='Adams'), 'happiness') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    914
                 | 
                                    
                                                     | 
                
                 | 
                        # stem theses as thesis but not bases as basis  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    915
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('theses', var='Adams'), 'thesis') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    916
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertNotEqual(uealite('bases', var='Adams'), 'basis') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    917
                 | 
                                    
                                                     | 
                
                 | 
                        # stem preterite words ending in -ed without the -ed  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    918
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('ordained', var='Adams'), 'ordain') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    919
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('killed', var='Adams'), 'kill') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    920
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('liked', var='Adams'), 'like') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    921
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('helped', var='Adams'), 'help') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    922
                 | 
                                    
                                                     | 
                
                 | 
                        # self.assertEqual(uealite('scarred', var='Adams'), 'scar') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    923
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('invited', var='Adams'), 'invite') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    924
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('exited', var='Adams'), 'exit') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    925
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('debited', var='Adams'), 'debit') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    926
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('smited', var='Adams'), 'smite') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    927
                 | 
                                    
                                                     | 
                
                 | 
                        # stem progressive verbs and gerunds without the -ing  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    928
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('running', var='Adams'), 'run') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    929
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('settings', var='Adams'), 'set') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    930
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('timing', var='Adams'), 'time') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    931
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('dying', var='Adams'), 'die') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    932
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('harping', var='Adams'), 'harp') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    933
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('charring', var='Adams'), 'char') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    934
                 | 
                                    
                                                     | 
                
                 | 
                        # not stem false progressive verbs such as 'sing'  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    935
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('ring', var='Adams'), 'ring') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    936
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('sing', var='Adams'), 'sing') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    937
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('ring', var='Adams'), 'ring') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    938
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('bring', var='Adams'), 'bring') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    939
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('fling', var='Adams'), 'fling') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    940
                 | 
                                    
                                                     | 
                
                 | 
                        # stem various plural nouns and 3rd-pres verbs without the -s/-es  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    941
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('changes', var='Adams'), 'change') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    942
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('deaths', var='Adams'), 'death') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    943
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('shadows', var='Adams'), 'shadow') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    944
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('flies', var='Adams'), 'fly') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    945
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('things', var='Adams'), 'thing') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    946
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('nothings', var='Adams'), 'nothing') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    947
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('witches', var='Adams'), 'witch') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    948
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('makes', var='Adams'), 'make') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    949
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('smokes', var='Adams'), 'smoke') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    950
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('does', var='Adams'), 'do') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    951
                 | 
                                    
                                                     | 
                
                 | 
                        # stem various words with -des suffix  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    952
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('abodes', var='Adams'), 'abode') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    953
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('escapades', var='Adams'), 'escapade') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    954
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('crusades', var='Adams'), 'crusade') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    955
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('grades', var='Adams'), 'grade') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    956
                 | 
                                    
                                                     | 
                
                 | 
                        # stem various words with -res suffix  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    957
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('wires', var='Adams'), 'wire') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    958
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('acres', var='Adams'), 'acre') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    959
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('fires', var='Adams'), 'fire') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    960
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('cares', var='Adams'), 'care') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    961
                 | 
                                    
                                                     | 
                
                 | 
                        # stem acronyms when pluralized otherwise they should be left alone  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    962
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('USA', var='Adams'), 'USA') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    963
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('FLOSS', var='Adams'), 'FLOSS') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    964
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('MREs', var='Adams'), 'MRE') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    965
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(uealite('USAED', var='Adams'), 'USAED') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    966
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    967
                 | 
                                    
                                                     | 
                
                 | 
                    def test_uealite_wsj_set(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    968
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.uealite (WSJ testset)."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    969
                 | 
                                    
                                                     | 
                
                 | 
                        with open(TESTDIR + '/corpora/uea-lite_wsj.csv') as wsj_testset:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    970
                 | 
                                    
                                                     | 
                
                 | 
                            for wsj_line in wsj_testset:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    971
                 | 
                                    
                                                     | 
                
                 | 
                                (word, uea, rule) = wsj_line.strip().split(',') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    972
                 | 
                                    
                                                     | 
                
                 | 
                                self.assertEqual(uealite(word, return_rule_no=True),  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    973
                 | 
                                    
                                                     | 
                
                 | 
                                                 (uea, float(rule)))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    974
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    975
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    976
                 | 
                                    
                                                     | 
                
                 | 
                class PaiceHuskTestCases(unittest.TestCase):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    977
                 | 
                                    
                                                     | 
                
                 | 
                    """Test Paice-Husk functions.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    978
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    979
                 | 
                                    
                                                     | 
                
                 | 
                    abydos.stemmer.paice_husk  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    980
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    981
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    982
                 | 
                                    
                                                     | 
                
                 | 
                    def test_paice_husk(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    983
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.paice_husk."""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    984
                 | 
                                    
                                                     | 
                
                 | 
                        # base case  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    985
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(paice_husk(''), '') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    986
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    987
                 | 
                                    
                                                     | 
                
                 | 
                        # cases copied from  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    988
                 | 
                                    
                                                     | 
                
                 | 
                        # https://doi.org/10.1145/101306.101310  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    989
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(paice_husk('maximum'), 'maxim') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    990
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(paice_husk('presumably'), 'presum') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    991
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(paice_husk('multiply'), 'multiply') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    992
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(paice_husk('provision'), 'provid') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    993
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(paice_husk('owed'), 'ow') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    994
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(paice_husk('owing'), 'ow') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    995
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(paice_husk('ear'), 'ear') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    996
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(paice_husk('saying'), 'say') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    997
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(paice_husk('crying'), 'cry') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    998
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(paice_husk('string'), 'string') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    999
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(paice_husk('meant'), 'meant') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1000
                 | 
                                    
                                                     | 
                
                 | 
                        self.assertEqual(paice_husk('cement'), 'cem') | 
            
            
                                                                                                            
                                                                
            
                                    
            
            
                | 
                    1001
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    1002
                 | 
                                    
                                                     | 
                
                 | 
                    def test_paice_husk_hopper_set(self):  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    1003
                 | 
                                    
                                                     | 
                
                 | 
                        """Test abydos.stemmer.paice_husk (Hopper262 testset).  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    1004
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    1005
                 | 
                                    
                                                     | 
                
                 | 
                        Source:  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    1006
                 | 
                                    
                                                     | 
                
                 | 
                        https://raw.githubusercontent.com/Hopper262/paice-husk-stemmer/master/wordlist.txt  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    1007
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    1008
                 | 
                                    
                                                     | 
                
                 | 
                        The only correction made from stemmed values in the Hopper262 set/  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    1009
                 | 
                                    
                                                     | 
                
                 | 
                        implementations were:  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    1010
                 | 
                                    
                                                     | 
                
                 | 
                         - ymca : ymc -> ymca  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    1011
                 | 
                                    
                                                     | 
                
                 | 
                         - yttrium : yttr -> yttri  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    1012
                 | 
                                    
                                                     | 
                
                 | 
                         - ywca : ywc -> ywca  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    1013
                 | 
                                    
                                                     | 
                
                 | 
                        The Pascal reference implementation does not consider 'y' in initial  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    1014
                 | 
                                    
                                                     | 
                
                 | 
                        position to be a vowel.  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    1015
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    1016
                 | 
                                    
                                                     | 
                
                 | 
                        with open(TESTDIR + '/corpora/paicehusk.csv') as hopper_testset:  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    1017
                 | 
                                    
                                                     | 
                
                 | 
                            for hopper_line in hopper_testset:  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    1018
                 | 
                                    
                                                     | 
                
                 | 
                                (word, stem) = hopper_line.strip().split(',') | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    1019
                 | 
                                    
                                                     | 
                
                 | 
                                self.assertEqual(paice_husk(word), stem)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1020
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1021
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1022
                 | 
                                    
                                                     | 
                
                 | 
                if __name__ == '__main__':  | 
            
            
                                                                                                            
                                                                
            
                                    
            
            
                | 
                    1023
                 | 
                                    
                                                     | 
                
                 | 
                    unittest.main()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    1024
                 | 
                                    
                                                     | 
                
                 | 
                 |