Completed
Branch master (87ccc1)
by Chris
08:42
created

tests.phonetic.test_phonetic_fr   A

Complexity

Total Complexity 3

Size/Duplication

Total Lines 246
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 193
dl 0
loc 246
rs 10
c 0
b 0
f 0
wmc 3
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
"""abydos.tests.test_phonetic_fr.
20
21
This module contains unit tests for abydos.phonetic.fr
22
"""
23
24
from __future__ import unicode_literals
25
26
import unittest
27
28
from abydos.phonetic.fr import fonem, henry_early
29
30
31
class FonemTestCases(unittest.TestCase):
32
    """Test FONEM functions.
33
34
    test cases for abydos.phonetic.fonem
35
    """
36
37
    def test_fonem(self):
38
        """Test abydos.phonetic.fonem."""
39
        # Base cases
40
        self.assertEqual(fonem(''), '')
41
42
        # Test cases, mostly from the FONEM specification,
43
        # but copied from Talisman:
44
        # https://github.com/Yomguithereal/talisman/blob/master/test/phonetics/french/fonem.js
45
        test_cases = (
46
            ('BEAULAC', 'BOLAK'),
47
            ('BAULAC', 'BOLAK'),
48
            ('IMBEAULT', 'INBO'),
49
            ('DUFAUT', 'DUFO'),
50
            ('THIBOUTOT', 'TIBOUTOT'),
51
            ('DEVAUX', 'DEVO'),
52
            ('RONDEAUX', 'RONDO'),
53
            ('BOURGAULX', 'BOURGO'),
54
            ('PINCHAUD', 'PINCHO'),
55
            ('PEDNAULD', 'PEDNO'),
56
            ('MAZENOD', 'MASENOD'),
57
            ('ARNOLD', 'ARNOL'),
58
            ('BERTOLD', 'BERTOL'),
59
            ('BELLAY', 'BELE'),
60
            ('SANDAY', 'SENDE'),
61
            ('GAY', 'GAI'),
62
            ('FAYARD', 'FAYAR'),
63
            ('LEMIEUX', 'LEMIEU'),
64
            ('LHEUREUX', 'LEUREU'),
65
            ('BELLEY', 'BELE'),
66
            ('WELLEY', 'WELE'),
67
            ('MEYER', 'MEYER'),
68
            ('BOILY', 'BOILI'),
69
            ('LOYSEAU', 'LOISO'),
70
            ('MAYRAND', 'MAIREN'),
71
            ('GUYON', 'GUYON'),
72
            ('FAILLARD', 'FAYAR'),
73
            ('FAIARD', 'FAYAR'),
74
            ('MEIER', 'MEYER'),
75
            ('MEILLER', 'MEYER'),
76
            ('GUILLON', 'GUYON'),
77
            ('LAVILLE', 'LAVILLE'),
78
            ('COUET', 'CWET'),
79
            ('EDOUARD', 'EDWAR'),
80
            ('GIROUARD', 'JIRWAR'),
81
            ('OZOUADE', 'OSWADE'),  # differs from test set
82
            ('BOUILLE', 'BOUYE'),
83
            ('POUYEZ', 'POUYES'),  # differs from test set
84
            ('LEMEE', 'LEME'),
85
            ('ABRAAM', 'ABRAM'),
86
            ('ARCHEMBAULT', 'ARCHENBO'),
87
            ('AMTHIME', 'ENTIME'),
88
            ('ROMPRE', 'RONPRE'),
89
            ('BOMSECOURS', 'BONSECOURS'),
90
            ('BOULANGER', 'BOULENJER'),
91
            ('TANCREDE', 'TENKREDE'),
92
            ('BLAIN', 'BLIN'),
93
            ('BLAINVILLE', 'BLINVILLE'),
94
            ('MAINARD', 'MAINAR'),
95
            ('RAIMOND', 'RAIMON'),
96
            ('BLACKBORN', 'BLAKBURN'),
97
            ('SEABOURNE', 'SEABURN'),
98
            ('IMBO', 'INBO'),
99
            ('RIMFRET', 'RINFRET'),
100
            ('LEFEBVRE', 'LEFEVRE'),
101
            ('MACE', 'MASSE'),
102
            ('MACON', 'MACON'),
103
            ('MARCELIN', 'MARSELIN'),
104
            ('MARCEAU', 'MARSO'),
105
            ('VINCELETTE', 'VINSELETE'),
106
            ('FORCADE', 'FORCADE'),
107
            ('CELINE', 'SELINE'),
108
            ('CERAPHIN', 'SERAFIN'),
109
            ('CAMILLE', 'KAMILLE'),
110
            ('CAYETTE', 'KAYETE'),
111
            ('CARINE', 'KARINE'),
112
            ('LUC', 'LUK'),
113
            ('LEBLANC', 'LEBLEN'),
114
            ('VICTOR', 'VIKTOR'),
115
            ('LACCOULINE', 'LAKOULINE'),
116
            ('MACCIMILIEN', 'MAXIMILIEN'),
117
            ('MAGELLA', 'MAJELA'),
118
            ('GINETTE', 'JINETE'),
119
            ('GANDET', 'GANDET'),
120
            ('GEORGES', 'JORJES'),
121
            ('GEOFFROID', 'JOFROID'),
122
            ('PAGEAU', 'PAJO'),
123
            ('GAGNION', 'GAGNON'),
124
            ('MIGNIER', 'MIGNER'),
125
            ('HALLEY', 'ALE'),
126
            ('GAUTHIER', 'GOTIER'),
127
            ('CHARTIER', 'CHARTIER'),
128
            ('JEANNE', 'JANE'),
129
            ('MACGREGOR', 'MACGREGOR'),
130
            ('MACKAY', 'MACKE'),
131
            ('MCNICOL', 'MACNICOL'),
132
            ('MCNEIL', 'MACNEIL'),
133
            ('PHANEUF', 'FANEUF'),
134
            ('PHILIPPE', 'FILIPE'),
135
            ('QUENNEVILLE', 'KENEVILLE'),
136
            ('LAROCQUE', 'LAROKE'),
137
            ('SCIPION', 'SIPION'),
138
            ('ASCELIN', 'ASSELIN'),
139
            ('VASCO', 'VASKO'),
140
            ('PASCALINE', 'PASKALINE'),
141
            ('ESHEMBACK', 'ECHENBAK'),
142
            ('ASHED', 'ACHED'),
143
            ('GRATIA', 'GRASSIA'),
144
            ('PATRITIA', 'PATRISSIA'),
145
            ('BERTIO', 'BERTIO'),
146
            ('MATIEU', 'MATIEU'),
147
            ('BERTIAUME', 'BERTIOME'),
148
            ('MUNROW', 'MUNRO'),
149
            ('BRANISLAW', 'BRANISLA'),
150
            ('LOWMEN', 'LOMEN'),
151
            ('ANDREW', 'ENDREW'),
152
            ('EXCEL', 'EXEL'),
153
            ('EXCERINE', 'EXERINE'),
154
            ('EXSILDA', 'EXILDA'),
155
            ('EXZELDA', 'EXELDA'),
156
            ('CAZEAU', 'KASO'),
157
            ('BRAZEAU', 'BRASO'),
158
            ('FITZPATRICK', 'FITSPATRIK'),
159
            ('SINGELAIS', 'ST-JELAIS'),
160
            ('CINQMARS', 'ST-MARS'),
161
            ('SAINT-AMAND', 'ST-AMEN'),
162
            ('SAINTECROIX', 'STE-KROIX'),
163
            ('ST-HILAIRE', 'ST-ILAIRE'),
164
            ('STE-CROIX', 'STE-KROIX'),
165
            ('LAVALLEE', 'LAVALE'),
166
            ('CORINNE', 'KORINE'),
167
            ('DUTILE', 'DUTILLE')
168
        )
169
        for name, encoding in test_cases:
170
            self.assertEqual(fonem(name), encoding)
171
172
173
class HenryCodeTestCases(unittest.TestCase):
174
    """Test Henry Code functions.
175
176
    test cases for abydos.phonetic.henry_early
177
    """
178
179
    def test_henry_early(self):
180
        """Test abydos.phonetic.henry_early."""
181
        # Base case
182
        self.assertEqual(henry_early(''), '')
183
184
        # Examples from Legare 1972 paper
185
        self.assertEqual(henry_early('Descarry'), 'DKR')
186
        self.assertEqual(henry_early('Descaries'), 'DKR')
187
        self.assertEqual(henry_early('Campo'), 'KP')
188
        self.assertEqual(henry_early('Campot'), 'KP')
189
        self.assertEqual(henry_early('Gausselin'), 'GSL')
190
        self.assertEqual(henry_early('Gosselin'), 'GSL')
191
        self.assertEqual(henry_early('Bergeron'), 'BRJ')
192
        self.assertEqual(henry_early('Bergereau'), 'BRJ')
193
        self.assertEqual(henry_early('Bosseron'), 'BSR')
194
        self.assertEqual(henry_early('Cicire'), 'SSR')
195
        self.assertEqual(henry_early('Lechevalier'), 'LCV')
196
        self.assertEqual(henry_early('Chevalier'), 'CVL')
197
        self.assertEqual(henry_early('Peloy'), 'PL')
198
        self.assertEqual(henry_early('Beloy'), 'BL')
199
        self.assertEqual(henry_early('Beret'), 'BR')
200
        self.assertEqual(henry_early('Benet'), 'BN')
201
        self.assertEqual(henry_early('Turcot'), 'TRK')
202
        self.assertEqual(henry_early('Turgot'), 'TRG')
203
        self.assertEqual(henry_early('Vigier'), 'VJ')
204
        self.assertEqual(henry_early('Vigiere'), 'VJR')
205
        self.assertEqual(henry_early('Dodin'), 'DD')
206
        self.assertEqual(henry_early('Dodelin'), 'DDL')
207
208
        # Tests to complete coverage
209
        self.assertEqual(henry_early('Anil'), 'ANL')
210
        self.assertEqual(henry_early('Emmanuel'), 'AMN')
211
        self.assertEqual(henry_early('Ainu'), 'EN')
212
        self.assertEqual(henry_early('Oeuf'), 'OF')
213
        self.assertEqual(henry_early('Yves'), 'IV')
214
        self.assertEqual(henry_early('Yo'), 'I')
215
        self.assertEqual(henry_early('Umman'), 'EM')
216
        self.assertEqual(henry_early('Omman'), 'OM')
217
        self.assertEqual(henry_early('Zoe'), 'S')
218
        self.assertEqual(henry_early('Beauchamp'), 'BCP')
219
        self.assertEqual(henry_early('Chloe'), 'KL')
220
        self.assertEqual(henry_early('Gerard'), 'JRR')
221
        self.assertEqual(henry_early('Agnes'), 'ANN')
222
        self.assertEqual(henry_early('Pinot'), 'PN')
223
        self.assertEqual(henry_early('Philo'), 'FL')
224
        self.assertEqual(henry_early('Quisling'), 'GL')
225
        self.assertEqual(henry_early('Qualite'), 'KLT')
226
        self.assertEqual(henry_early('Sainte-Marie'), 'XMR')
227
        self.assertEqual(henry_early('Saint-Jean'), 'XJ')
228
        self.assertEqual(henry_early('Ste-Marie'), 'XMR')
229
        self.assertEqual(henry_early('St-Jean'), 'XJ')
230
        self.assertEqual(henry_early('Cloe'), 'KL')
231
        self.assertEqual(henry_early('Ahch-To'), 'AKT')
232
        self.assertEqual(henry_early('Zdavros'), 'SDV')
233
        self.assertEqual(henry_early('Sdavros'), 'DVR')
234
        self.assertEqual(henry_early('Coulomb'), 'KLB')
235
        self.assertEqual(henry_early('Calm'), 'K')
236
        self.assertEqual(henry_early('Omnia'), 'ON')
237
        self.assertEqual(henry_early('Ramps'), 'RPS')
238
        self.assertEqual(henry_early('Renault'), 'RN')
239
        self.assertEqual(henry_early('Czech'), 'CSK')
240
        self.assertEqual(henry_early('Imran'), 'ER')
241
        self.assertEqual(henry_early('Christopher', max_length=-1), 'KRXF')
242
243
244
if __name__ == '__main__':
245
    unittest.main()
246