1
|
|
|
# -*- coding: utf-8 -*- |
2
|
|
|
|
3
|
|
|
# Copyright 2018 by Christopher C. Little. |
4
|
|
|
# This file is part of Abydos. |
5
|
|
|
# |
6
|
|
|
# Abydos is free software: you can redistribute it and/or modify |
7
|
|
|
# it under the terms of the GNU General Public License as published by |
8
|
|
|
# the Free Software Foundation, either version 3 of the License, or |
9
|
|
|
# (at your option) any later version. |
10
|
|
|
# |
11
|
|
|
# Abydos is distributed in the hope that it will be useful, |
12
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
13
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14
|
|
|
# GNU General Public License for more details. |
15
|
|
|
# |
16
|
|
|
# You should have received a copy of the GNU General Public License |
17
|
|
|
# along with Abydos. If not, see <http://www.gnu.org/licenses/>. |
18
|
|
|
|
19
|
|
|
"""abydos.tests.test_phonetic_fr. |
20
|
|
|
|
21
|
|
|
This module contains unit tests for abydos.phonetic.fr |
22
|
|
|
""" |
23
|
|
|
|
24
|
|
|
from __future__ import unicode_literals |
25
|
|
|
|
26
|
|
|
import unittest |
27
|
|
|
|
28
|
|
|
from abydos.phonetic.fr import fonem, henry_early |
29
|
|
|
|
30
|
|
|
|
31
|
|
|
class FonemTestCases(unittest.TestCase): |
32
|
|
|
"""Test FONEM functions. |
33
|
|
|
|
34
|
|
|
test cases for abydos.phonetic.fonem |
35
|
|
|
""" |
36
|
|
|
|
37
|
|
|
def test_fonem(self): |
38
|
|
|
"""Test abydos.phonetic.fonem.""" |
39
|
|
|
# Base cases |
40
|
|
|
self.assertEqual(fonem(''), '') |
41
|
|
|
|
42
|
|
|
# Test cases, mostly from the FONEM specification, |
43
|
|
|
# but copied from Talisman: |
44
|
|
|
# https://github.com/Yomguithereal/talisman/blob/master/test/phonetics/french/fonem.js |
45
|
|
|
test_cases = ( |
46
|
|
|
('BEAULAC', 'BOLAK'), |
47
|
|
|
('BAULAC', 'BOLAK'), |
48
|
|
|
('IMBEAULT', 'INBO'), |
49
|
|
|
('DUFAUT', 'DUFO'), |
50
|
|
|
('THIBOUTOT', 'TIBOUTOT'), |
51
|
|
|
('DEVAUX', 'DEVO'), |
52
|
|
|
('RONDEAUX', 'RONDO'), |
53
|
|
|
('BOURGAULX', 'BOURGO'), |
54
|
|
|
('PINCHAUD', 'PINCHO'), |
55
|
|
|
('PEDNAULD', 'PEDNO'), |
56
|
|
|
('MAZENOD', 'MASENOD'), |
57
|
|
|
('ARNOLD', 'ARNOL'), |
58
|
|
|
('BERTOLD', 'BERTOL'), |
59
|
|
|
('BELLAY', 'BELE'), |
60
|
|
|
('SANDAY', 'SENDE'), |
61
|
|
|
('GAY', 'GAI'), |
62
|
|
|
('FAYARD', 'FAYAR'), |
63
|
|
|
('LEMIEUX', 'LEMIEU'), |
64
|
|
|
('LHEUREUX', 'LEUREU'), |
65
|
|
|
('BELLEY', 'BELE'), |
66
|
|
|
('WELLEY', 'WELE'), |
67
|
|
|
('MEYER', 'MEYER'), |
68
|
|
|
('BOILY', 'BOILI'), |
69
|
|
|
('LOYSEAU', 'LOISO'), |
70
|
|
|
('MAYRAND', 'MAIREN'), |
71
|
|
|
('GUYON', 'GUYON'), |
72
|
|
|
('FAILLARD', 'FAYAR'), |
73
|
|
|
('FAIARD', 'FAYAR'), |
74
|
|
|
('MEIER', 'MEYER'), |
75
|
|
|
('MEILLER', 'MEYER'), |
76
|
|
|
('GUILLON', 'GUYON'), |
77
|
|
|
('LAVILLE', 'LAVILLE'), |
78
|
|
|
('COUET', 'CWET'), |
79
|
|
|
('EDOUARD', 'EDWAR'), |
80
|
|
|
('GIROUARD', 'JIRWAR'), |
81
|
|
|
('OZOUADE', 'OSWADE'), # differs from test set |
82
|
|
|
('BOUILLE', 'BOUYE'), |
83
|
|
|
('POUYEZ', 'POUYES'), # differs from test set |
84
|
|
|
('LEMEE', 'LEME'), |
85
|
|
|
('ABRAAM', 'ABRAM'), |
86
|
|
|
('ARCHEMBAULT', 'ARCHENBO'), |
87
|
|
|
('AMTHIME', 'ENTIME'), |
88
|
|
|
('ROMPRE', 'RONPRE'), |
89
|
|
|
('BOMSECOURS', 'BONSECOURS'), |
90
|
|
|
('BOULANGER', 'BOULENJER'), |
91
|
|
|
('TANCREDE', 'TENKREDE'), |
92
|
|
|
('BLAIN', 'BLIN'), |
93
|
|
|
('BLAINVILLE', 'BLINVILLE'), |
94
|
|
|
('MAINARD', 'MAINAR'), |
95
|
|
|
('RAIMOND', 'RAIMON'), |
96
|
|
|
('BLACKBORN', 'BLAKBURN'), |
97
|
|
|
('SEABOURNE', 'SEABURN'), |
98
|
|
|
('IMBO', 'INBO'), |
99
|
|
|
('RIMFRET', 'RINFRET'), |
100
|
|
|
('LEFEBVRE', 'LEFEVRE'), |
101
|
|
|
('MACE', 'MASSE'), |
102
|
|
|
('MACON', 'MACON'), |
103
|
|
|
('MARCELIN', 'MARSELIN'), |
104
|
|
|
('MARCEAU', 'MARSO'), |
105
|
|
|
('VINCELETTE', 'VINSELETE'), |
106
|
|
|
('FORCADE', 'FORCADE'), |
107
|
|
|
('CELINE', 'SELINE'), |
108
|
|
|
('CERAPHIN', 'SERAFIN'), |
109
|
|
|
('CAMILLE', 'KAMILLE'), |
110
|
|
|
('CAYETTE', 'KAYETE'), |
111
|
|
|
('CARINE', 'KARINE'), |
112
|
|
|
('LUC', 'LUK'), |
113
|
|
|
('LEBLANC', 'LEBLEN'), |
114
|
|
|
('VICTOR', 'VIKTOR'), |
115
|
|
|
('LACCOULINE', 'LAKOULINE'), |
116
|
|
|
('MACCIMILIEN', 'MAXIMILIEN'), |
117
|
|
|
('MAGELLA', 'MAJELA'), |
118
|
|
|
('GINETTE', 'JINETE'), |
119
|
|
|
('GANDET', 'GANDET'), |
120
|
|
|
('GEORGES', 'JORJES'), |
121
|
|
|
('GEOFFROID', 'JOFROID'), |
122
|
|
|
('PAGEAU', 'PAJO'), |
123
|
|
|
('GAGNION', 'GAGNON'), |
124
|
|
|
('MIGNIER', 'MIGNER'), |
125
|
|
|
('HALLEY', 'ALE'), |
126
|
|
|
('GAUTHIER', 'GOTIER'), |
127
|
|
|
('CHARTIER', 'CHARTIER'), |
128
|
|
|
('JEANNE', 'JANE'), |
129
|
|
|
('MACGREGOR', 'MACGREGOR'), |
130
|
|
|
('MACKAY', 'MACKE'), |
131
|
|
|
('MCNICOL', 'MACNICOL'), |
132
|
|
|
('MCNEIL', 'MACNEIL'), |
133
|
|
|
('PHANEUF', 'FANEUF'), |
134
|
|
|
('PHILIPPE', 'FILIPE'), |
135
|
|
|
('QUENNEVILLE', 'KENEVILLE'), |
136
|
|
|
('LAROCQUE', 'LAROKE'), |
137
|
|
|
('SCIPION', 'SIPION'), |
138
|
|
|
('ASCELIN', 'ASSELIN'), |
139
|
|
|
('VASCO', 'VASKO'), |
140
|
|
|
('PASCALINE', 'PASKALINE'), |
141
|
|
|
('ESHEMBACK', 'ECHENBAK'), |
142
|
|
|
('ASHED', 'ACHED'), |
143
|
|
|
('GRATIA', 'GRASSIA'), |
144
|
|
|
('PATRITIA', 'PATRISSIA'), |
145
|
|
|
('BERTIO', 'BERTIO'), |
146
|
|
|
('MATIEU', 'MATIEU'), |
147
|
|
|
('BERTIAUME', 'BERTIOME'), |
148
|
|
|
('MUNROW', 'MUNRO'), |
149
|
|
|
('BRANISLAW', 'BRANISLA'), |
150
|
|
|
('LOWMEN', 'LOMEN'), |
151
|
|
|
('ANDREW', 'ENDREW'), |
152
|
|
|
('EXCEL', 'EXEL'), |
153
|
|
|
('EXCERINE', 'EXERINE'), |
154
|
|
|
('EXSILDA', 'EXILDA'), |
155
|
|
|
('EXZELDA', 'EXELDA'), |
156
|
|
|
('CAZEAU', 'KASO'), |
157
|
|
|
('BRAZEAU', 'BRASO'), |
158
|
|
|
('FITZPATRICK', 'FITSPATRIK'), |
159
|
|
|
('SINGELAIS', 'ST-JELAIS'), |
160
|
|
|
('CINQMARS', 'ST-MARS'), |
161
|
|
|
('SAINT-AMAND', 'ST-AMEN'), |
162
|
|
|
('SAINTECROIX', 'STE-KROIX'), |
163
|
|
|
('ST-HILAIRE', 'ST-ILAIRE'), |
164
|
|
|
('STE-CROIX', 'STE-KROIX'), |
165
|
|
|
('LAVALLEE', 'LAVALE'), |
166
|
|
|
('CORINNE', 'KORINE'), |
167
|
|
|
('DUTILE', 'DUTILLE') |
168
|
|
|
) |
169
|
|
|
for name, encoding in test_cases: |
170
|
|
|
self.assertEqual(fonem(name), encoding) |
171
|
|
|
|
172
|
|
|
|
173
|
|
|
class HenryCodeTestCases(unittest.TestCase): |
174
|
|
|
"""Test Henry Code functions. |
175
|
|
|
|
176
|
|
|
test cases for abydos.phonetic.henry_early |
177
|
|
|
""" |
178
|
|
|
|
179
|
|
|
def test_henry_early(self): |
180
|
|
|
"""Test abydos.phonetic.henry_early.""" |
181
|
|
|
# Base case |
182
|
|
|
self.assertEqual(henry_early(''), '') |
183
|
|
|
|
184
|
|
|
# Examples from Legare 1972 paper |
185
|
|
|
self.assertEqual(henry_early('Descarry'), 'DKR') |
186
|
|
|
self.assertEqual(henry_early('Descaries'), 'DKR') |
187
|
|
|
self.assertEqual(henry_early('Campo'), 'KP') |
188
|
|
|
self.assertEqual(henry_early('Campot'), 'KP') |
189
|
|
|
self.assertEqual(henry_early('Gausselin'), 'GSL') |
190
|
|
|
self.assertEqual(henry_early('Gosselin'), 'GSL') |
191
|
|
|
self.assertEqual(henry_early('Bergeron'), 'BRJ') |
192
|
|
|
self.assertEqual(henry_early('Bergereau'), 'BRJ') |
193
|
|
|
self.assertEqual(henry_early('Bosseron'), 'BSR') |
194
|
|
|
self.assertEqual(henry_early('Cicire'), 'SSR') |
195
|
|
|
self.assertEqual(henry_early('Lechevalier'), 'LCV') |
196
|
|
|
self.assertEqual(henry_early('Chevalier'), 'CVL') |
197
|
|
|
self.assertEqual(henry_early('Peloy'), 'PL') |
198
|
|
|
self.assertEqual(henry_early('Beloy'), 'BL') |
199
|
|
|
self.assertEqual(henry_early('Beret'), 'BR') |
200
|
|
|
self.assertEqual(henry_early('Benet'), 'BN') |
201
|
|
|
self.assertEqual(henry_early('Turcot'), 'TRK') |
202
|
|
|
self.assertEqual(henry_early('Turgot'), 'TRG') |
203
|
|
|
self.assertEqual(henry_early('Vigier'), 'VJ') |
204
|
|
|
self.assertEqual(henry_early('Vigiere'), 'VJR') |
205
|
|
|
self.assertEqual(henry_early('Dodin'), 'DD') |
206
|
|
|
self.assertEqual(henry_early('Dodelin'), 'DDL') |
207
|
|
|
|
208
|
|
|
# Tests to complete coverage |
209
|
|
|
self.assertEqual(henry_early('Anil'), 'ANL') |
210
|
|
|
self.assertEqual(henry_early('Emmanuel'), 'AMN') |
211
|
|
|
self.assertEqual(henry_early('Ainu'), 'EN') |
212
|
|
|
self.assertEqual(henry_early('Oeuf'), 'OF') |
213
|
|
|
self.assertEqual(henry_early('Yves'), 'IV') |
214
|
|
|
self.assertEqual(henry_early('Yo'), 'I') |
215
|
|
|
self.assertEqual(henry_early('Umman'), 'EM') |
216
|
|
|
self.assertEqual(henry_early('Omman'), 'OM') |
217
|
|
|
self.assertEqual(henry_early('Zoe'), 'S') |
218
|
|
|
self.assertEqual(henry_early('Beauchamp'), 'BCP') |
219
|
|
|
self.assertEqual(henry_early('Chloe'), 'KL') |
220
|
|
|
self.assertEqual(henry_early('Gerard'), 'JRR') |
221
|
|
|
self.assertEqual(henry_early('Agnes'), 'ANN') |
222
|
|
|
self.assertEqual(henry_early('Pinot'), 'PN') |
223
|
|
|
self.assertEqual(henry_early('Philo'), 'FL') |
224
|
|
|
self.assertEqual(henry_early('Quisling'), 'GL') |
225
|
|
|
self.assertEqual(henry_early('Qualite'), 'KLT') |
226
|
|
|
self.assertEqual(henry_early('Sainte-Marie'), 'XMR') |
227
|
|
|
self.assertEqual(henry_early('Saint-Jean'), 'XJ') |
228
|
|
|
self.assertEqual(henry_early('Ste-Marie'), 'XMR') |
229
|
|
|
self.assertEqual(henry_early('St-Jean'), 'XJ') |
230
|
|
|
self.assertEqual(henry_early('Cloe'), 'KL') |
231
|
|
|
self.assertEqual(henry_early('Ahch-To'), 'AKT') |
232
|
|
|
self.assertEqual(henry_early('Zdavros'), 'SDV') |
233
|
|
|
self.assertEqual(henry_early('Sdavros'), 'DVR') |
234
|
|
|
self.assertEqual(henry_early('Coulomb'), 'KLB') |
235
|
|
|
self.assertEqual(henry_early('Calm'), 'K') |
236
|
|
|
self.assertEqual(henry_early('Omnia'), 'ON') |
237
|
|
|
self.assertEqual(henry_early('Ramps'), 'RPS') |
238
|
|
|
self.assertEqual(henry_early('Renault'), 'RN') |
239
|
|
|
self.assertEqual(henry_early('Czech'), 'CSK') |
240
|
|
|
self.assertEqual(henry_early('Imran'), 'ER') |
241
|
|
|
self.assertEqual(henry_early('Christopher', max_length=-1), 'KRXF') |
242
|
|
|
|
243
|
|
|
|
244
|
|
|
if __name__ == '__main__': |
245
|
|
|
unittest.main() |
246
|
|
|
|