|
1
|
|
|
# -*- coding: utf-8 -*- |
|
2
|
|
|
|
|
3
|
|
|
# Copyright 2018 by Christopher C. Little. |
|
4
|
|
|
# This file is part of Abydos. |
|
5
|
|
|
# |
|
6
|
|
|
# Abydos is free software: you can redistribute it and/or modify |
|
7
|
|
|
# it under the terms of the GNU General Public License as published by |
|
8
|
|
|
# the Free Software Foundation, either version 3 of the License, or |
|
9
|
|
|
# (at your option) any later version. |
|
10
|
|
|
# |
|
11
|
|
|
# Abydos is distributed in the hope that it will be useful, |
|
12
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
14
|
|
|
# GNU General Public License for more details. |
|
15
|
|
|
# |
|
16
|
|
|
# You should have received a copy of the GNU General Public License |
|
17
|
|
|
# along with Abydos. If not, see <http://www.gnu.org/licenses/>. |
|
18
|
|
|
|
|
19
|
|
|
"""abydos.tests.test_phonetic_fr. |
|
20
|
|
|
|
|
21
|
|
|
This module contains unit tests for abydos.phonetic.fr |
|
22
|
|
|
""" |
|
23
|
|
|
|
|
24
|
|
|
from __future__ import unicode_literals |
|
25
|
|
|
|
|
26
|
|
|
import unittest |
|
27
|
|
|
|
|
28
|
|
|
from abydos.phonetic.fr import fonem, henry_early |
|
29
|
|
|
|
|
30
|
|
|
|
|
31
|
|
|
class FonemTestCases(unittest.TestCase): |
|
32
|
|
|
"""Test FONEM functions. |
|
33
|
|
|
|
|
34
|
|
|
test cases for abydos.phonetic.fonem |
|
35
|
|
|
""" |
|
36
|
|
|
|
|
37
|
|
|
def test_fonem(self): |
|
38
|
|
|
"""Test abydos.phonetic.fonem.""" |
|
39
|
|
|
# Base cases |
|
40
|
|
|
self.assertEqual(fonem(''), '') |
|
41
|
|
|
|
|
42
|
|
|
# Test cases, mostly from the FONEM specification, |
|
43
|
|
|
# but copied from Talisman: |
|
44
|
|
|
# https://github.com/Yomguithereal/talisman/blob/master/test/phonetics/french/fonem.js |
|
45
|
|
|
test_cases = ( |
|
46
|
|
|
('BEAULAC', 'BOLAK'), |
|
47
|
|
|
('BAULAC', 'BOLAK'), |
|
48
|
|
|
('IMBEAULT', 'INBO'), |
|
49
|
|
|
('DUFAUT', 'DUFO'), |
|
50
|
|
|
('THIBOUTOT', 'TIBOUTOT'), |
|
51
|
|
|
('DEVAUX', 'DEVO'), |
|
52
|
|
|
('RONDEAUX', 'RONDO'), |
|
53
|
|
|
('BOURGAULX', 'BOURGO'), |
|
54
|
|
|
('PINCHAUD', 'PINCHO'), |
|
55
|
|
|
('PEDNAULD', 'PEDNO'), |
|
56
|
|
|
('MAZENOD', 'MASENOD'), |
|
57
|
|
|
('ARNOLD', 'ARNOL'), |
|
58
|
|
|
('BERTOLD', 'BERTOL'), |
|
59
|
|
|
('BELLAY', 'BELE'), |
|
60
|
|
|
('SANDAY', 'SENDE'), |
|
61
|
|
|
('GAY', 'GAI'), |
|
62
|
|
|
('FAYARD', 'FAYAR'), |
|
63
|
|
|
('LEMIEUX', 'LEMIEU'), |
|
64
|
|
|
('LHEUREUX', 'LEUREU'), |
|
65
|
|
|
('BELLEY', 'BELE'), |
|
66
|
|
|
('WELLEY', 'WELE'), |
|
67
|
|
|
('MEYER', 'MEYER'), |
|
68
|
|
|
('BOILY', 'BOILI'), |
|
69
|
|
|
('LOYSEAU', 'LOISO'), |
|
70
|
|
|
('MAYRAND', 'MAIREN'), |
|
71
|
|
|
('GUYON', 'GUYON'), |
|
72
|
|
|
('FAILLARD', 'FAYAR'), |
|
73
|
|
|
('FAIARD', 'FAYAR'), |
|
74
|
|
|
('MEIER', 'MEYER'), |
|
75
|
|
|
('MEILLER', 'MEYER'), |
|
76
|
|
|
('GUILLON', 'GUYON'), |
|
77
|
|
|
('LAVILLE', 'LAVILLE'), |
|
78
|
|
|
('COUET', 'CWET'), |
|
79
|
|
|
('EDOUARD', 'EDWAR'), |
|
80
|
|
|
('GIROUARD', 'JIRWAR'), |
|
81
|
|
|
('OZOUADE', 'OSWADE'), # differs from test set |
|
82
|
|
|
('BOUILLE', 'BOUYE'), |
|
83
|
|
|
('POUYEZ', 'POUYES'), # differs from test set |
|
84
|
|
|
('LEMEE', 'LEME'), |
|
85
|
|
|
('ABRAAM', 'ABRAM'), |
|
86
|
|
|
('ARCHEMBAULT', 'ARCHENBO'), |
|
87
|
|
|
('AMTHIME', 'ENTIME'), |
|
88
|
|
|
('ROMPRE', 'RONPRE'), |
|
89
|
|
|
('BOMSECOURS', 'BONSECOURS'), |
|
90
|
|
|
('BOULANGER', 'BOULENJER'), |
|
91
|
|
|
('TANCREDE', 'TENKREDE'), |
|
92
|
|
|
('BLAIN', 'BLIN'), |
|
93
|
|
|
('BLAINVILLE', 'BLINVILLE'), |
|
94
|
|
|
('MAINARD', 'MAINAR'), |
|
95
|
|
|
('RAIMOND', 'RAIMON'), |
|
96
|
|
|
('BLACKBORN', 'BLAKBURN'), |
|
97
|
|
|
('SEABOURNE', 'SEABURN'), |
|
98
|
|
|
('IMBO', 'INBO'), |
|
99
|
|
|
('RIMFRET', 'RINFRET'), |
|
100
|
|
|
('LEFEBVRE', 'LEFEVRE'), |
|
101
|
|
|
('MACE', 'MASSE'), |
|
102
|
|
|
('MACON', 'MACON'), |
|
103
|
|
|
('MARCELIN', 'MARSELIN'), |
|
104
|
|
|
('MARCEAU', 'MARSO'), |
|
105
|
|
|
('VINCELETTE', 'VINSELETE'), |
|
106
|
|
|
('FORCADE', 'FORCADE'), |
|
107
|
|
|
('CELINE', 'SELINE'), |
|
108
|
|
|
('CERAPHIN', 'SERAFIN'), |
|
109
|
|
|
('CAMILLE', 'KAMILLE'), |
|
110
|
|
|
('CAYETTE', 'KAYETE'), |
|
111
|
|
|
('CARINE', 'KARINE'), |
|
112
|
|
|
('LUC', 'LUK'), |
|
113
|
|
|
('LEBLANC', 'LEBLEN'), |
|
114
|
|
|
('VICTOR', 'VIKTOR'), |
|
115
|
|
|
('LACCOULINE', 'LAKOULINE'), |
|
116
|
|
|
('MACCIMILIEN', 'MAXIMILIEN'), |
|
117
|
|
|
('MAGELLA', 'MAJELA'), |
|
118
|
|
|
('GINETTE', 'JINETE'), |
|
119
|
|
|
('GANDET', 'GANDET'), |
|
120
|
|
|
('GEORGES', 'JORJES'), |
|
121
|
|
|
('GEOFFROID', 'JOFROID'), |
|
122
|
|
|
('PAGEAU', 'PAJO'), |
|
123
|
|
|
('GAGNION', 'GAGNON'), |
|
124
|
|
|
('MIGNIER', 'MIGNER'), |
|
125
|
|
|
('HALLEY', 'ALE'), |
|
126
|
|
|
('GAUTHIER', 'GOTIER'), |
|
127
|
|
|
('CHARTIER', 'CHARTIER'), |
|
128
|
|
|
('JEANNE', 'JANE'), |
|
129
|
|
|
('MACGREGOR', 'MACGREGOR'), |
|
130
|
|
|
('MACKAY', 'MACKE'), |
|
131
|
|
|
('MCNICOL', 'MACNICOL'), |
|
132
|
|
|
('MCNEIL', 'MACNEIL'), |
|
133
|
|
|
('PHANEUF', 'FANEUF'), |
|
134
|
|
|
('PHILIPPE', 'FILIPE'), |
|
135
|
|
|
('QUENNEVILLE', 'KENEVILLE'), |
|
136
|
|
|
('LAROCQUE', 'LAROKE'), |
|
137
|
|
|
('SCIPION', 'SIPION'), |
|
138
|
|
|
('ASCELIN', 'ASSELIN'), |
|
139
|
|
|
('VASCO', 'VASKO'), |
|
140
|
|
|
('PASCALINE', 'PASKALINE'), |
|
141
|
|
|
('ESHEMBACK', 'ECHENBAK'), |
|
142
|
|
|
('ASHED', 'ACHED'), |
|
143
|
|
|
('GRATIA', 'GRASSIA'), |
|
144
|
|
|
('PATRITIA', 'PATRISSIA'), |
|
145
|
|
|
('BERTIO', 'BERTIO'), |
|
146
|
|
|
('MATIEU', 'MATIEU'), |
|
147
|
|
|
('BERTIAUME', 'BERTIOME'), |
|
148
|
|
|
('MUNROW', 'MUNRO'), |
|
149
|
|
|
('BRANISLAW', 'BRANISLA'), |
|
150
|
|
|
('LOWMEN', 'LOMEN'), |
|
151
|
|
|
('ANDREW', 'ENDREW'), |
|
152
|
|
|
('EXCEL', 'EXEL'), |
|
153
|
|
|
('EXCERINE', 'EXERINE'), |
|
154
|
|
|
('EXSILDA', 'EXILDA'), |
|
155
|
|
|
('EXZELDA', 'EXELDA'), |
|
156
|
|
|
('CAZEAU', 'KASO'), |
|
157
|
|
|
('BRAZEAU', 'BRASO'), |
|
158
|
|
|
('FITZPATRICK', 'FITSPATRIK'), |
|
159
|
|
|
('SINGELAIS', 'ST-JELAIS'), |
|
160
|
|
|
('CINQMARS', 'ST-MARS'), |
|
161
|
|
|
('SAINT-AMAND', 'ST-AMEN'), |
|
162
|
|
|
('SAINTECROIX', 'STE-KROIX'), |
|
163
|
|
|
('ST-HILAIRE', 'ST-ILAIRE'), |
|
164
|
|
|
('STE-CROIX', 'STE-KROIX'), |
|
165
|
|
|
('LAVALLEE', 'LAVALE'), |
|
166
|
|
|
('CORINNE', 'KORINE'), |
|
167
|
|
|
('DUTILE', 'DUTILLE') |
|
168
|
|
|
) |
|
169
|
|
|
for name, encoding in test_cases: |
|
170
|
|
|
self.assertEqual(fonem(name), encoding) |
|
171
|
|
|
|
|
172
|
|
|
|
|
173
|
|
|
class HenryCodeTestCases(unittest.TestCase): |
|
174
|
|
|
"""Test Henry Code functions. |
|
175
|
|
|
|
|
176
|
|
|
test cases for abydos.phonetic.henry_early |
|
177
|
|
|
""" |
|
178
|
|
|
|
|
179
|
|
|
def test_henry_early(self): |
|
180
|
|
|
"""Test abydos.phonetic.henry_early.""" |
|
181
|
|
|
# Base case |
|
182
|
|
|
self.assertEqual(henry_early(''), '') |
|
183
|
|
|
|
|
184
|
|
|
# Examples from Legare 1972 paper |
|
185
|
|
|
self.assertEqual(henry_early('Descarry'), 'DKR') |
|
186
|
|
|
self.assertEqual(henry_early('Descaries'), 'DKR') |
|
187
|
|
|
self.assertEqual(henry_early('Campo'), 'KP') |
|
188
|
|
|
self.assertEqual(henry_early('Campot'), 'KP') |
|
189
|
|
|
self.assertEqual(henry_early('Gausselin'), 'GSL') |
|
190
|
|
|
self.assertEqual(henry_early('Gosselin'), 'GSL') |
|
191
|
|
|
self.assertEqual(henry_early('Bergeron'), 'BRJ') |
|
192
|
|
|
self.assertEqual(henry_early('Bergereau'), 'BRJ') |
|
193
|
|
|
self.assertEqual(henry_early('Bosseron'), 'BSR') |
|
194
|
|
|
self.assertEqual(henry_early('Cicire'), 'SSR') |
|
195
|
|
|
self.assertEqual(henry_early('Lechevalier'), 'LCV') |
|
196
|
|
|
self.assertEqual(henry_early('Chevalier'), 'CVL') |
|
197
|
|
|
self.assertEqual(henry_early('Peloy'), 'PL') |
|
198
|
|
|
self.assertEqual(henry_early('Beloy'), 'BL') |
|
199
|
|
|
self.assertEqual(henry_early('Beret'), 'BR') |
|
200
|
|
|
self.assertEqual(henry_early('Benet'), 'BN') |
|
201
|
|
|
self.assertEqual(henry_early('Turcot'), 'TRK') |
|
202
|
|
|
self.assertEqual(henry_early('Turgot'), 'TRG') |
|
203
|
|
|
self.assertEqual(henry_early('Vigier'), 'VJ') |
|
204
|
|
|
self.assertEqual(henry_early('Vigiere'), 'VJR') |
|
205
|
|
|
self.assertEqual(henry_early('Dodin'), 'DD') |
|
206
|
|
|
self.assertEqual(henry_early('Dodelin'), 'DDL') |
|
207
|
|
|
|
|
208
|
|
|
# Tests to complete coverage |
|
209
|
|
|
self.assertEqual(henry_early('Anil'), 'ANL') |
|
210
|
|
|
self.assertEqual(henry_early('Emmanuel'), 'AMN') |
|
211
|
|
|
self.assertEqual(henry_early('Ainu'), 'EN') |
|
212
|
|
|
self.assertEqual(henry_early('Oeuf'), 'OF') |
|
213
|
|
|
self.assertEqual(henry_early('Yves'), 'IV') |
|
214
|
|
|
self.assertEqual(henry_early('Yo'), 'I') |
|
215
|
|
|
self.assertEqual(henry_early('Umman'), 'EM') |
|
216
|
|
|
self.assertEqual(henry_early('Omman'), 'OM') |
|
217
|
|
|
self.assertEqual(henry_early('Zoe'), 'S') |
|
218
|
|
|
self.assertEqual(henry_early('Beauchamp'), 'BCP') |
|
219
|
|
|
self.assertEqual(henry_early('Chloe'), 'KL') |
|
220
|
|
|
self.assertEqual(henry_early('Gerard'), 'JRR') |
|
221
|
|
|
self.assertEqual(henry_early('Agnes'), 'ANN') |
|
222
|
|
|
self.assertEqual(henry_early('Pinot'), 'PN') |
|
223
|
|
|
self.assertEqual(henry_early('Philo'), 'FL') |
|
224
|
|
|
self.assertEqual(henry_early('Quisling'), 'GL') |
|
225
|
|
|
self.assertEqual(henry_early('Qualite'), 'KLT') |
|
226
|
|
|
self.assertEqual(henry_early('Sainte-Marie'), 'XMR') |
|
227
|
|
|
self.assertEqual(henry_early('Saint-Jean'), 'XJ') |
|
228
|
|
|
self.assertEqual(henry_early('Ste-Marie'), 'XMR') |
|
229
|
|
|
self.assertEqual(henry_early('St-Jean'), 'XJ') |
|
230
|
|
|
self.assertEqual(henry_early('Cloe'), 'KL') |
|
231
|
|
|
self.assertEqual(henry_early('Ahch-To'), 'AKT') |
|
232
|
|
|
self.assertEqual(henry_early('Zdavros'), 'SDV') |
|
233
|
|
|
self.assertEqual(henry_early('Sdavros'), 'DVR') |
|
234
|
|
|
self.assertEqual(henry_early('Coulomb'), 'KLB') |
|
235
|
|
|
self.assertEqual(henry_early('Calm'), 'K') |
|
236
|
|
|
self.assertEqual(henry_early('Omnia'), 'ON') |
|
237
|
|
|
self.assertEqual(henry_early('Ramps'), 'RPS') |
|
238
|
|
|
self.assertEqual(henry_early('Renault'), 'RN') |
|
239
|
|
|
self.assertEqual(henry_early('Czech'), 'CSK') |
|
240
|
|
|
self.assertEqual(henry_early('Imran'), 'ER') |
|
241
|
|
|
self.assertEqual(henry_early('Christopher', max_length=-1), 'KRXF') |
|
242
|
|
|
|
|
243
|
|
|
|
|
244
|
|
|
if __name__ == '__main__': |
|
245
|
|
|
unittest.main() |
|
246
|
|
|
|