Completed
Branch master (87ccc1)
by Chris
08:42
created

tests.stemmer.test_stemmer_uealite.UEALiteTestCases.test_uealite()   B

Complexity

Conditions 1

Size

Total Lines 143
Code Lines 111

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 111
nop 1
dl 0
loc 143
rs 7
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
"""abydos.tests.test_stemmer_uealite.
20
21
This module contains unit tests for abydos.stemmer.uealite
22
"""
23
24
from __future__ import unicode_literals
25
26
import unittest
27
28
from abydos.stemmer.uealite import uealite
29
30
from .. import _corpus_file
31
32
33
class UEALiteTestCases(unittest.TestCase):
34
    """Test UEA-lite functions.
35
36
    abydos.stemmer.uealite
37
    """
38
39
    def test_uealite(self):
40
        """Test abydos.stemmer.uealite."""
41
        # base case
42
        self.assertEqual(uealite(''), '')
43
44
        # test cases copied from Ruby port
45
        # https://github.com/ealdent/uea-stemmer/blob/master/test/uea_stemmer_test.rb
46
        # These are corrected to match the Java version's output.
47
        # stem base words to just the base word
48
        self.assertEqual(uealite('man'), 'man')
49
        self.assertEqual(uealite('happiness'), 'happiness')
50
        # stem theses as thesis but not bases as basis
51
        self.assertEqual(uealite('theses'), 'thesis')
52
        self.assertNotEqual(uealite('bases'), 'basis')
53
        # stem preterite words ending in -ed without the -ed
54
        self.assertEqual(uealite('ordained'), 'ordain')
55
        self.assertEqual(uealite('killed'), 'kill')
56
        self.assertEqual(uealite('liked'), 'lik')
57
        self.assertEqual(uealite('helped'), 'help')
58
        self.assertEqual(uealite('scarred'), 'scarre')
59
        self.assertEqual(uealite('invited'), 'invit')
60
        self.assertEqual(uealite('exited'), 'exit')
61
        self.assertEqual(uealite('debited'), 'debit')
62
        self.assertEqual(uealite('smited'), 'smit')
63
        # stem progressive verbs and gerunds without the -ing
64
        self.assertEqual(uealite('running'), 'run')
65
        self.assertEqual(uealite('settings'), 'set')
66
        self.assertEqual(uealite('timing'), 'time')
67
        self.assertEqual(uealite('dying'), 'dy')
68
        self.assertEqual(uealite('harping'), 'harp')
69
        self.assertEqual(uealite('charring'), 'char')
70
        # not stem false progressive verbs such as 'sing'
71
        self.assertEqual(uealite('ring'), 'ring')
72
        self.assertEqual(uealite('sing'), 'se')
73
        self.assertEqual(uealite('bring'), 'br')
74
        self.assertEqual(uealite('fling'), 'fle')
75
        # stem various plural nouns and 3rd-pres verbs without the -s/-es
76
        self.assertEqual(uealite('changes'), 'change')
77
        self.assertEqual(uealite('deaths'), 'death')
78
        self.assertEqual(uealite('shadows'), 'shadow')
79
        self.assertEqual(uealite('flies'), 'fly')
80
        self.assertEqual(uealite('things'), 'thing')
81
        self.assertEqual(uealite('nothings'), 'nothing')
82
        self.assertEqual(uealite('witches'), 'witch')
83
        self.assertEqual(uealite('makes'), 'mak')
84
        self.assertEqual(uealite('smokes'), 'smok')
85
        self.assertEqual(uealite('does'), 'do')
86
        # stem various words with -des suffix
87
        self.assertEqual(uealite('abodes'), 'abod')
88
        self.assertEqual(uealite('escapades'), 'escapad')
89
        self.assertEqual(uealite('crusades'), 'crusad')
90
        self.assertEqual(uealite('grades'), 'grad')
91
        # stem various words with -res suffix
92
        self.assertEqual(uealite('wires'), 'wir')
93
        self.assertEqual(uealite('acres'), 'acr')
94
        self.assertEqual(uealite('fires'), 'fir')
95
        self.assertEqual(uealite('cares'), 'car')
96
        # stem acronyms when pluralized otherwise they should be left alone
97
        self.assertEqual(uealite('USA'), 'USA')
98
        self.assertEqual(uealite('FLOSS'), 'FLOSS')
99
        self.assertEqual(uealite('MREs'), 'MRE')
100
        self.assertEqual(uealite('USAED'), 'USAED')
101
102
        # test cases copied from Ruby port
103
        # https://github.com/ealdent/uea-stemmer/blob/master/test/uea_stemmer_test.rb
104
        # stem base words to just the base word
105
        self.assertEqual(uealite('man', var='Adams'), 'man')
106
        self.assertEqual(uealite('happiness', var='Adams'), 'happiness')
107
        # stem theses as thesis but not bases as basis
108
        self.assertEqual(uealite('theses', var='Adams'), 'thesis')
109
        self.assertNotEqual(uealite('bases', var='Adams'), 'basis')
110
        # stem preterite words ending in -ed without the -ed
111
        self.assertEqual(uealite('ordained', var='Adams'), 'ordain')
112
        self.assertEqual(uealite('killed', var='Adams'), 'kill')
113
        self.assertEqual(uealite('liked', var='Adams'), 'like')
114
        self.assertEqual(uealite('helped', var='Adams'), 'help')
115
        # self.assertEqual(uealite('scarred', var='Adams'), 'scar')
116
        self.assertEqual(uealite('invited', var='Adams'), 'invite')
117
        self.assertEqual(uealite('exited', var='Adams'), 'exit')
118
        self.assertEqual(uealite('debited', var='Adams'), 'debit')
119
        self.assertEqual(uealite('smited', var='Adams'), 'smite')
120
        # stem progressive verbs and gerunds without the -ing
121
        self.assertEqual(uealite('running', var='Adams'), 'run')
122
        self.assertEqual(uealite('settings', var='Adams'), 'set')
123
        self.assertEqual(uealite('timing', var='Adams'), 'time')
124
        self.assertEqual(uealite('dying', var='Adams'), 'die')
125
        self.assertEqual(uealite('harping', var='Adams'), 'harp')
126
        self.assertEqual(uealite('charring', var='Adams'), 'char')
127
        # not stem false progressive verbs such as 'sing'
128
        self.assertEqual(uealite('ring', var='Adams'), 'ring')
129
        self.assertEqual(uealite('sing', var='Adams'), 'sing')
130
        self.assertEqual(uealite('ring', var='Adams'), 'ring')
131
        self.assertEqual(uealite('bring', var='Adams'), 'bring')
132
        self.assertEqual(uealite('fling', var='Adams'), 'fling')
133
        # stem various plural nouns and 3rd-pres verbs without the -s/-es
134
        self.assertEqual(uealite('changes', var='Adams'), 'change')
135
        self.assertEqual(uealite('deaths', var='Adams'), 'death')
136
        self.assertEqual(uealite('shadows', var='Adams'), 'shadow')
137
        self.assertEqual(uealite('flies', var='Adams'), 'fly')
138
        self.assertEqual(uealite('things', var='Adams'), 'thing')
139
        self.assertEqual(uealite('nothings', var='Adams'), 'nothing')
140
        self.assertEqual(uealite('witches', var='Adams'), 'witch')
141
        self.assertEqual(uealite('makes', var='Adams'), 'make')
142
        self.assertEqual(uealite('smokes', var='Adams'), 'smoke')
143
        self.assertEqual(uealite('does', var='Adams'), 'do')
144
        # stem various words with -des suffix
145
        self.assertEqual(uealite('abodes', var='Adams'), 'abode')
146
        self.assertEqual(uealite('escapades', var='Adams'), 'escapade')
147
        self.assertEqual(uealite('crusades', var='Adams'), 'crusade')
148
        self.assertEqual(uealite('grades', var='Adams'), 'grade')
149
        # stem various words with -res suffix
150
        self.assertEqual(uealite('wires', var='Adams'), 'wire')
151
        self.assertEqual(uealite('acres', var='Adams'), 'acre')
152
        self.assertEqual(uealite('fires', var='Adams'), 'fire')
153
        self.assertEqual(uealite('cares', var='Adams'), 'care')
154
        # stem acronyms when pluralized otherwise they should be left alone
155
        self.assertEqual(uealite('USA', var='Adams'), 'USA')
156
        self.assertEqual(uealite('FLOSS', var='Adams'), 'FLOSS')
157
        self.assertEqual(uealite('MREs', var='Adams'), 'MRE')
158
        self.assertEqual(uealite('USAED', var='Adams'), 'USAED')
159
160
        # Perl version tests
161
        self.assertEqual(uealite('ragings'), 'rage')
162
        self.assertEqual(uealite('ragings', var='Perl'), 'rag')
163
164
        # complete coverage
165
        self.assertEqual(uealite('was'), 'was')
166
        self.assertEqual(uealite('during'), 'during')
167
        self.assertEqual(uealite('abcdefghijklmnopqrstuvwxyz',
168
                                 max_word_length=20),
169
                         'abcdefghijklmnopqrstuvwxyz')
170
        self.assertEqual(uealite('10'), '10')
171
        self.assertEqual(uealite('top-ten'), 'top-ten')
172
        self.assertEqual(uealite('top-10'), 'top-10')
173
        self.assertEqual(uealite('top_ten'), 'top_ten')
174
        self.assertEqual(uealite('ABCDEFGHIJKLMs', max_acro_length=8,
175
                                 var='Adams'), 'ABCDEFGHIJKLMs')
176
        self.assertEqual(uealite('ABCDEFGHIJKLM', max_acro_length=8,
177
                                 var='Adams'), 'ABCDEFGHIJKLM')
178
        self.assertEqual(uealite('abcDefGhij'), 'abcDefGhij')
179
        self.assertEqual(uealite('Tophat'), 'Tophat')
180
        self.assertEqual(uealite(''), '')
181
        self.assertEqual(uealite(''), '')
182
183
    def test_uealite_wsj_set(self):
184
        """Test abydos.stemmer.uealite (WSJ testset)."""
185
        with open(_corpus_file('uea-lite_wsj.csv')) as wsj_ts:
186
            for wsj_line in wsj_ts:
187
                (word, uea, rule) = wsj_line.strip().split(',')
188
                self.assertEqual(uealite(word, return_rule_no=True),
189
                                 (uea, float(rule)))
190
191
192
if __name__ == '__main__':
193
    unittest.main()
194