Completed
Branch master (87ccc1)
by Chris
10:18
created

tests.phonetic.test_phonetic_phonet   A

Complexity

Total Complexity 16

Size/Duplication

Total Lines 146
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 92
dl 0
loc 146
rs 10
c 0
b 0
f 0
wmc 16
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
"""abydos.tests.test_phonetic_phonet.
20
21
This module contains unit tests for abydos.phonetic.phonet
22
"""
23
24
from __future__ import unicode_literals
25
26
import codecs
27
import unittest
28
29
from abydos.phonetic.phonet import phonet
30
31
from .. import ALLOW_RANDOM, _corpus_file, _one_in
32
33
34
class PhonetTestCases(unittest.TestCase):
35
    """Test Phonet functions.
36
37
    test cases for abydos.phonetic.phonet
38
    """
39
40
    def test_phonet_german(self):
41
        """Test abydos.phonetic.phonet (German)."""
42
        self.assertEqual(phonet(''), '')
43
44
        # https://code.google.com/p/phonet4java/source/browse/trunk/src/test/java/com/googlecode/phonet4java/Phonet1Test.java
45
        self.assertEqual(phonet('', 1), '')
46
        self.assertEqual(phonet('Zedlitz', 1), 'ZETLIZ')
47
        self.assertEqual(phonet('Bremerhaven', 1), 'BREMAHAFN')
48
        self.assertEqual(phonet('Hamburger Hafen', 1), 'HAMBURGA HAFN')
49
        self.assertEqual(phonet('Jesper', 1), 'IESPA')
50
        self.assertEqual(phonet('elisabeth', 1), 'ELISABET')
51
        self.assertEqual(phonet('elisabet', 1), 'ELISABET')
52
        self.assertEqual(phonet('Ziegler', 1), 'ZIKLA')
53
        self.assertEqual(phonet('Scherer', 1), 'SHERA')
54
        self.assertEqual(phonet('Bartels', 1), 'BARTLS')
55
        self.assertEqual(phonet('Jansen', 1), 'IANSN')
56
        self.assertEqual(phonet('Sievers', 1), 'SIWAS')
57
        self.assertEqual(phonet('Michels', 1), 'MICHLS')
58
        self.assertEqual(phonet('Ewers', 1), 'EWERS')
59
        self.assertEqual(phonet('Evers', 1), 'EWERS')
60
        self.assertEqual(phonet('Wessels', 1), 'WESLS')
61
        self.assertEqual(phonet('Gottschalk', 1), 'GOSHALK')
62
        self.assertEqual(phonet('Brückmann', 1), 'BRÜKMAN')
63
        self.assertEqual(phonet('Blechschmidt', 1), 'BLECHSHMIT')
64
        self.assertEqual(phonet('Kolodziej', 1), 'KOLOTZI')
65
        self.assertEqual(phonet('Krauße', 1), 'KRAUSE')
66
        self.assertEqual(phonet('Cachel', 1), 'KESHL')
67
68
        self.assertEqual(phonet('', 2), '')
69
        self.assertEqual(phonet('Zedlitz', 2), 'ZETLIZ')
70
        self.assertEqual(phonet('Bremerhaven', 2), 'BRENAFN')
71
        self.assertEqual(phonet('Schönberg', 2), 'ZÖNBAK')
72
        self.assertEqual(phonet('Hamburger Hafen', 2), 'ANBURKA AFN')
73
        self.assertEqual(phonet('Ziegler', 2), 'ZIKLA')
74
        self.assertEqual(phonet('Scherer', 2), 'ZERA')
75
        self.assertEqual(phonet('Jansen', 2), 'IANZN')
76
        self.assertEqual(phonet('Eberhardt', 2), 'EBART')
77
        self.assertEqual(phonet('Gottschalk', 2), 'KUZALK')
78
        self.assertEqual(phonet('Brückmann', 2), 'BRIKNAN')
79
        self.assertEqual(phonet('Blechschmidt', 2), 'BLEKZNIT')
80
        self.assertEqual(phonet('Kolodziej', 2), 'KULUTZI')
81
        self.assertEqual(phonet('Krauße', 2), 'KRAUZE')
82
83
        # etc. (for code coverage)
84
        self.assertEqual(phonet('Jesper', 1), 'IESPA')
85
        self.assertEqual(phonet('Glacéhandschuh', 1), 'GLAZANSHU')
86
        self.assertEqual(phonet('Blechschmidt', 1), 'BLECHSHMIT')
87
        self.assertEqual(phonet('Burgdorf', 1), 'BURKDORF')
88
        self.assertEqual(phonet('Holzschuh', 1), 'HOLSHU')
89
        self.assertEqual(phonet('Aachen', 1), 'ACHN')
90
        self.assertEqual(phonet('Abendspaziergang', 1), 'ABENTSPAZIRGANK')
91
92
    def test_phonet_nolang(self):
93
        """Test abydos.phonetic.phonet (no language)."""
94
        self.assertEqual(phonet('', lang='none'), '')
95
96
        # https://code.google.com/p/phonet4java/source/browse/trunk/src/test/java/com/googlecode/phonet4java/Phonet1Test.java
97
        self.assertEqual(phonet('', 1, 'none'), '')
98
        self.assertEqual(phonet('Zedlitz', 1, 'none'), 'ZEDLITZ')
99
        self.assertEqual(phonet('Bremerhaven', 1, 'none'), 'BREMERHAVEN')
100
        self.assertEqual(phonet('Schönberg', 2, 'none'), 'SCHOENBERG')
101
        self.assertEqual(phonet('Brückmann', 1, 'none'), 'BRUECKMAN')
102
        self.assertEqual(phonet('Krauße', 1, 'none'), 'KRAUSE')
103
104
        self.assertEqual(phonet('', 2, 'none'), '')
105
        self.assertEqual(phonet('Zedlitz', 2, 'none'), 'ZEDLITZ')
106
        self.assertEqual(phonet('Bremerhaven', 2, 'none'), 'BREMERHAVEN')
107
        self.assertEqual(phonet('Schönberg', 2, 'none'), 'SCHOENBERG')
108
        self.assertEqual(phonet('Brückmann', 2, 'none'), 'BRUECKMAN')
109
        self.assertEqual(phonet('Krauße', 2, 'none'), 'KRAUSE')
110
111
    def test_phonet_nachnamen(self):
112
        """Test abydos.phonetic.phonet (Nachnamen set)."""
113
        if not ALLOW_RANDOM:
114
            return
115
        with codecs.open(_corpus_file('nachnamen.csv'),
116
                         encoding='utf-8') as nachnamen_testset:
117
            for nn_line in nachnamen_testset:
118
                if nn_line[0] != '#':
119
                    nn_line = nn_line.strip().split(',')
120
                    # This test set is very large (~10000 entries)
121
                    # so let's just randomly select about 100 for testing
122
                    if len(nn_line) >= 3 and _one_in(100):
123
                        (term, ph1, ph2) = nn_line
124
                        self.assertEqual(phonet(term, 1), ph1)
125
                        self.assertEqual(phonet(term, 2), ph2)
126
127
    def test_phonet_ngerman(self):
128
        """Test abydos.phonetic.phonet (ngerman set)."""
129
        if not ALLOW_RANDOM:
130
            return
131
        with codecs.open(_corpus_file('ngerman.csv'),
132
                         encoding='utf-8') as ngerman_testset:
133
            for ng_line in ngerman_testset:
134
                if ng_line[0] != '#':
135
                    ng_line = ng_line.strip().split(',')
136
                    # This test set is very large (~3000000 entries)
137
                    # so let's just randomly select about 30 for testing
138
                    if len(ng_line) >= 3 and _one_in(10000):
139
                        (term, ph1, ph2) = ng_line
140
                        self.assertEqual(phonet(term, 1), ph1)
141
                        self.assertEqual(phonet(term, 2), ph2)
142
143
144
if __name__ == '__main__':
145
    unittest.main()
146