Completed
Branch master (87ccc1)
by Chris
08:42
created

tests.stemmer.test_stemmer_clef   A

Complexity

Total Complexity 3

Size/Duplication

Total Lines 161
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 94
dl 0
loc 161
rs 10
c 0
b 0
f 0
wmc 3
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
"""abydos.tests.test_stemmer_clef.
20
21
This module contains unit tests for abydos.stemmer.clef
22
"""
23
24
from __future__ import unicode_literals
25
26
import unittest
27
28
from abydos.stemmer.clef import clef_german, clef_german_plus, clef_swedish
29
30
31
class CLEFTestCases(unittest.TestCase):
32
    """Test CLEF functions.
33
34
    abydos.stemmer.clef_german, abydos.stemmer.clef_german_plus, &
35
    abydos.stemmer.clef_swedish
36
    """
37
38
    def test_clef_german(self):
39
        """Test abydos.stemmer.clef_german."""
40
        # base case
41
        self.assertEqual(clef_german(''), '')
42
43
        # len <= 2
44
        self.assertEqual(clef_german('ä'), 'a')
45
        self.assertEqual(clef_german('er'), 'er')
46
        self.assertEqual(clef_german('es'), 'es')
47
        self.assertEqual(clef_german('äh'), 'ah')
48
49
        # len > 2
50
        self.assertEqual(clef_german('deinen'), 'dein')
51
        self.assertEqual(clef_german('können'), 'konn')
52
        self.assertEqual(clef_german('Damen'), 'dame')
53
        self.assertEqual(clef_german('kleines'), 'klein')
54
        self.assertEqual(clef_german('Namen'), 'name')
55
        self.assertEqual(clef_german('Äpfel'), 'apfel')
56
        self.assertEqual(clef_german('Jahre'), 'jahr')
57
        self.assertEqual(clef_german('Mannes'), 'mann')
58
        self.assertEqual(clef_german('Häuser'), 'haus')
59
        self.assertEqual(clef_german('Motoren'), 'motor')
60
        self.assertEqual(clef_german('kleine'), 'klein')
61
        self.assertEqual(clef_german('Pfingsten'), 'pfingst')
62
        self.assertEqual(clef_german('lautest'), 'lautest')
63
        self.assertEqual(clef_german('lauteste'), 'lautest')
64
        self.assertEqual(clef_german('lautere'), 'lauter')
65
        self.assertEqual(clef_german('lautste'), 'lautst')
66
        self.assertEqual(clef_german('kleinen'), 'klei')
67
68
    def test_clef_german_plus(self):
69
        """Test abydos.stemmer.clef_german_plus."""
70
        # base case
71
        self.assertEqual(clef_german_plus(''), '')
72
73
        # len <= 2
74
        self.assertEqual(clef_german_plus('ä'), 'a')
75
        self.assertEqual(clef_german_plus('er'), 'er')
76
        self.assertEqual(clef_german_plus('es'), 'es')
77
        self.assertEqual(clef_german_plus('äh'), 'ah')
78
79
        # len > 2
80
        self.assertEqual(clef_german_plus('deinen'), 'dein')
81
        self.assertEqual(clef_german_plus('können'), 'konn')
82
        self.assertEqual(clef_german_plus('Damen'), 'dam')
83
        self.assertEqual(clef_german_plus('kleines'), 'klein')
84
        self.assertEqual(clef_german_plus('Namen'), 'nam')
85
        self.assertEqual(clef_german_plus('Äpfel'), 'apfel')
86
        self.assertEqual(clef_german_plus('Jahre'), 'jahr')
87
        self.assertEqual(clef_german_plus('Mannes'), 'mann')
88
        self.assertEqual(clef_german_plus('Häuser'), 'haus')
89
        self.assertEqual(clef_german_plus('Motoren'), 'motor')
90
        self.assertEqual(clef_german_plus('kleine'), 'klein')
91
        self.assertEqual(clef_german_plus('Pfingsten'), 'pfing')
92
        self.assertEqual(clef_german_plus('lautest'), 'laut')
93
        self.assertEqual(clef_german_plus('lauteste'), 'laut')
94
        self.assertEqual(clef_german_plus('lautere'), 'laut')
95
        self.assertEqual(clef_german_plus('lautste'), 'laut')
96
        self.assertEqual(clef_german_plus('kleinen'), 'klein')
97
        self.assertEqual(clef_german_plus('Pfarrern'), 'pfarr')
98
99
    def test_clef_swedish(self):
100
        """Test abydos.stemmer.clef_swedish."""
101
        # base case
102
        self.assertEqual(clef_swedish(''), '')
103
104
        # unstemmed
105
        self.assertEqual(clef_swedish('konung'), 'konung')
106
107
        # len <= 3
108
        self.assertEqual(clef_swedish('km'), 'km')
109
        self.assertEqual(clef_swedish('ja'), 'ja')
110
        self.assertEqual(clef_swedish('de'), 'de')
111
        self.assertEqual(clef_swedish('in'), 'in')
112
        self.assertEqual(clef_swedish('a'), 'a')
113
        self.assertEqual(clef_swedish('mer'), 'mer')
114
        self.assertEqual(clef_swedish('s'), 's')
115
        self.assertEqual(clef_swedish('e'), 'e')
116
        self.assertEqual(clef_swedish('oss'), 'oss')
117
        self.assertEqual(clef_swedish('hos'), 'hos')
118
119
        # genitive
120
        self.assertEqual(clef_swedish('svenskars'), 'svensk')
121
        self.assertEqual(clef_swedish('stadens'), 'stad')
122
        self.assertEqual(clef_swedish('kommuns'), 'kommu')
123
        self.assertEqual(clef_swedish('aftonbladets'), 'aftonblad')
124
125
        # len > 7
126
        self.assertEqual(clef_swedish('fängelser'), 'fäng')
127
        self.assertEqual(clef_swedish('möjligheten'), 'möjlig')
128
129
        # len > 6
130
        self.assertEqual(clef_swedish('svenskar'), 'svensk')
131
        self.assertEqual(clef_swedish('myndigheterna'), 'myndighet')
132
        self.assertEqual(clef_swedish('avgörande'), 'avgör')
133
        self.assertEqual(clef_swedish('fängelse'), 'fäng')
134
        self.assertEqual(clef_swedish('viktigaste'), 'viktig')
135
        self.assertEqual(clef_swedish('kvinnorna'), 'kvinn')
136
        self.assertEqual(clef_swedish('åklagaren'), 'åklag')
137
138
        # len > 5
139
        self.assertEqual(clef_swedish('tidigare'), 'tidig')
140
        self.assertEqual(clef_swedish('senast'), 'sen')
141
        self.assertEqual(clef_swedish('möjlighet'), 'möjlig')
142
143
        # len > 4
144
        self.assertEqual(clef_swedish('svenskar'), 'svensk')
145
        self.assertEqual(clef_swedish('skriver'), 'skriv')
146
        self.assertEqual(clef_swedish('människor'), 'människ')
147
        self.assertEqual(clef_swedish('staden'), 'stad')
148
        self.assertEqual(clef_swedish('kunnat'), 'kunn')
149
        self.assertEqual(clef_swedish('samarbete'), 'samarbe')
150
        self.assertEqual(clef_swedish('aftonbladet'), 'aftonblad')
151
152
        # len > 3
153
        self.assertEqual(clef_swedish('allt'), 'all')
154
        self.assertEqual(clef_swedish('vilka'), 'vilk')
155
        self.assertEqual(clef_swedish('länge'), 'läng')
156
        self.assertEqual(clef_swedish('kommun'), 'kommu')
157
158
159
if __name__ == '__main__':
160
    unittest.main()
161