|
1
|
|
|
# -*- coding: utf-8 -*- |
|
2
|
|
|
|
|
3
|
|
|
# Copyright 2014-2018 by Christopher C. Little. |
|
4
|
|
|
# This file is part of Abydos. |
|
5
|
|
|
# |
|
6
|
|
|
# Abydos is free software: you can redistribute it and/or modify |
|
7
|
|
|
# it under the terms of the GNU General Public License as published by |
|
8
|
|
|
# the Free Software Foundation, either version 3 of the License, or |
|
9
|
|
|
# (at your option) any later version. |
|
10
|
|
|
# |
|
11
|
|
|
# Abydos is distributed in the hope that it will be useful, |
|
12
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
14
|
|
|
# GNU General Public License for more details. |
|
15
|
|
|
# |
|
16
|
|
|
# You should have received a copy of the GNU General Public License |
|
17
|
|
|
# along with Abydos. If not, see <http://www.gnu.org/licenses/>. |
|
18
|
|
|
|
|
19
|
|
|
"""abydos.tests.test_phonetic_soundex. |
|
20
|
|
|
|
|
21
|
|
|
This module contains unit tests for abydos.phonetic.soundex |
|
22
|
|
|
""" |
|
23
|
|
|
|
|
24
|
|
|
from __future__ import unicode_literals |
|
25
|
|
|
|
|
26
|
|
|
import unittest |
|
27
|
|
|
|
|
28
|
|
|
from abydos.phonetic.soundex import fuzzy_soundex, lein, phonex, phonix, \ |
|
29
|
|
|
pshp_soundex_first, pshp_soundex_last, refined_soundex, soundex |
|
30
|
|
|
|
|
31
|
|
|
|
|
32
|
|
|
class SoundexTestCases(unittest.TestCase): |
|
33
|
|
|
"""Test Soundex functions. |
|
34
|
|
|
|
|
35
|
|
|
test cases for abydos.phonetic.soundex, .refined_soundex |
|
36
|
|
|
""" |
|
37
|
|
|
|
|
38
|
|
|
def test_soundex(self): |
|
39
|
|
|
"""Test abydos.phonetic.soundex.""" |
|
40
|
|
|
self.assertEqual(soundex(''), '0000') |
|
41
|
|
|
|
|
42
|
|
|
# https://archive.org/stream/accessingindivid00moor#page/14/mode/2up |
|
43
|
|
|
self.assertEqual(soundex('Euler'), 'E460') |
|
44
|
|
|
self.assertEqual(soundex('Gauss'), 'G200') |
|
45
|
|
|
self.assertEqual(soundex('Hilbert'), 'H416') |
|
46
|
|
|
self.assertEqual(soundex('Knuth'), 'K530') |
|
47
|
|
|
self.assertEqual(soundex('Lloyd'), 'L300') |
|
48
|
|
|
self.assertEqual(soundex('Lukasieicz'), 'L222') |
|
49
|
|
|
self.assertEqual(soundex('Ellery'), 'E460') |
|
50
|
|
|
self.assertEqual(soundex('Ghosh'), 'G200') |
|
51
|
|
|
self.assertEqual(soundex('Heilbronn'), 'H416') |
|
52
|
|
|
self.assertEqual(soundex('Kant'), 'K530') |
|
53
|
|
|
self.assertEqual(soundex('Ladd'), 'L300') |
|
54
|
|
|
self.assertEqual(soundex('Lissajous'), 'L222') |
|
55
|
|
|
self.assertEqual(soundex('Rogers'), 'R262') |
|
56
|
|
|
self.assertEqual(soundex('Rodgers'), 'R326') |
|
57
|
|
|
self.assertNotEquals(soundex('Rogers'), soundex('Rodgers')) |
|
58
|
|
|
self.assertNotEquals(soundex('Sinclair'), soundex('St. Clair')) |
|
59
|
|
|
self.assertNotEquals(soundex('Tchebysheff'), soundex('Chebyshev')) |
|
60
|
|
|
|
|
61
|
|
|
# http://creativyst.com/Doc/Articles/SoundEx1/SoundEx1.htm#Related |
|
62
|
|
|
self.assertEqual(soundex('Htacky'), 'H320') |
|
63
|
|
|
self.assertEqual(soundex('Atacky'), 'A320') |
|
64
|
|
|
self.assertEqual(soundex('Schmit'), 'S530') |
|
65
|
|
|
self.assertEqual(soundex('Schneider'), 'S536') |
|
66
|
|
|
self.assertEqual(soundex('Pfister'), 'P236') |
|
67
|
|
|
self.assertEqual(soundex('Ashcroft'), 'A261') |
|
68
|
|
|
self.assertEqual(soundex('Asicroft'), 'A226') |
|
69
|
|
|
|
|
70
|
|
|
# https://en.wikipedia.org/wiki/Soundex |
|
71
|
|
|
self.assertEqual(soundex('Robert'), 'R163') |
|
72
|
|
|
self.assertEqual(soundex('Rupert'), 'R163') |
|
73
|
|
|
self.assertEqual(soundex('Rubin'), 'R150') |
|
74
|
|
|
self.assertEqual(soundex('Tymczak'), 'T522') |
|
75
|
|
|
|
|
76
|
|
|
# https://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex |
|
77
|
|
|
self.assertEqual(soundex('Peters'), 'P362') |
|
78
|
|
|
self.assertEqual(soundex('Peterson'), 'P362') |
|
79
|
|
|
self.assertEqual(soundex('Moskowitz'), 'M232') |
|
80
|
|
|
self.assertEqual(soundex('Moskovitz'), 'M213') |
|
81
|
|
|
self.assertEqual(soundex('Auerbach'), 'A612') |
|
82
|
|
|
self.assertEqual(soundex('Uhrbach'), 'U612') |
|
83
|
|
|
self.assertEqual(soundex('Jackson'), 'J250') |
|
84
|
|
|
self.assertEqual(soundex('Jackson-Jackson'), 'J252') |
|
85
|
|
|
|
|
86
|
|
|
# max_length tests |
|
87
|
|
|
self.assertEqual(soundex('Lincoln', 10), 'L524500000') |
|
88
|
|
|
self.assertEqual(soundex('Lincoln', 5), 'L5245') |
|
89
|
|
|
self.assertEqual(soundex('Christopher', 6), 'C62316') |
|
90
|
|
|
|
|
91
|
|
|
# max_length bounds tests |
|
92
|
|
|
self.assertEqual(soundex('Niall', max_length=-1), |
|
93
|
|
|
'N4000000000000000000000000000000000000000000000000' + |
|
94
|
|
|
'00000000000000') |
|
95
|
|
|
self.assertEqual(soundex('Niall', max_length=0), 'N400') |
|
96
|
|
|
|
|
97
|
|
|
# reverse tests |
|
98
|
|
|
self.assertEqual(soundex('Rubin', reverse=True), 'N160') |
|
99
|
|
|
self.assertEqual(soundex('Llyod', reverse=True), 'D400') |
|
100
|
|
|
self.assertEqual(soundex('Lincoln', reverse=True), 'N425') |
|
101
|
|
|
self.assertEqual(soundex('Knuth', reverse=True), 'H352') |
|
102
|
|
|
|
|
103
|
|
|
# zero_pad tests |
|
104
|
|
|
self.assertEqual(soundex('Niall', max_length=-1, zero_pad=False), 'N4') |
|
105
|
|
|
self.assertEqual(soundex('Niall', max_length=0, zero_pad=False), 'N4') |
|
106
|
|
|
self.assertEqual(soundex('Niall', max_length=0, zero_pad=True), 'N400') |
|
107
|
|
|
self.assertEqual(soundex('', max_length=4, zero_pad=False), '0') |
|
108
|
|
|
self.assertEqual(soundex('', max_length=4, zero_pad=True), '0000') |
|
109
|
|
|
|
|
110
|
|
|
def test_soundex_special(self): |
|
111
|
|
|
"""Test abydos.phonetic.soundex (special 1880-1910 variant method).""" |
|
112
|
|
|
self.assertEqual(soundex('Ashcroft', var='special'), 'A226') |
|
113
|
|
|
self.assertEqual(soundex('Asicroft', var='special'), 'A226') |
|
114
|
|
|
self.assertEqual(soundex('AsWcroft', var='special'), 'A226') |
|
115
|
|
|
self.assertEqual(soundex('Rupert', var='special'), 'R163') |
|
116
|
|
|
self.assertEqual(soundex('Rubin', var='special'), 'R150') |
|
117
|
|
|
|
|
118
|
|
|
def test_soundex_census(self): |
|
119
|
|
|
"""Test abydos.phonetic.soundex (Census variant method).""" |
|
120
|
|
|
self.assertEqual(soundex('Vandeusen', var='Census'), ('V532', 'D250')) |
|
121
|
|
|
self.assertEqual(soundex('van Deusen', var='Census'), ('V532', 'D250')) |
|
122
|
|
|
self.assertEqual(soundex('McDonald', var='Census'), 'M235') |
|
123
|
|
|
self.assertEqual(soundex('la Cruz', var='Census'), ('L262', 'C620')) |
|
124
|
|
|
self.assertEqual(soundex('vanDamme', var='Census'), ('V535', 'D500')) |
|
125
|
|
|
|
|
126
|
|
|
def test_refined_soundex(self): |
|
127
|
|
|
"""Test abydos.phonetic.refined_soundex.""" |
|
128
|
|
|
# http://ntz-develop.blogspot.com/2011/03/phonetic-algorithms.html |
|
129
|
|
|
self.assertEqual(refined_soundex('Braz'), 'B195') |
|
130
|
|
|
self.assertEqual(refined_soundex('Broz'), 'B195') |
|
131
|
|
|
self.assertEqual(refined_soundex('Caren'), 'C398') |
|
132
|
|
|
self.assertEqual(refined_soundex('Caron'), 'C398') |
|
133
|
|
|
self.assertEqual(refined_soundex('Carren'), 'C398') |
|
134
|
|
|
self.assertEqual(refined_soundex('Charon'), 'C398') |
|
135
|
|
|
self.assertEqual(refined_soundex('Corain'), 'C398') |
|
136
|
|
|
self.assertEqual(refined_soundex('Coram'), 'C398') |
|
137
|
|
|
self.assertEqual(refined_soundex('Corran'), 'C398') |
|
138
|
|
|
self.assertEqual(refined_soundex('Corrin'), 'C398') |
|
139
|
|
|
self.assertEqual(refined_soundex('Corwin'), 'C398') |
|
140
|
|
|
self.assertEqual(refined_soundex('Curran'), 'C398') |
|
141
|
|
|
self.assertEqual(refined_soundex('Curreen'), 'C398') |
|
142
|
|
|
self.assertEqual(refined_soundex('Currin'), 'C398') |
|
143
|
|
|
self.assertEqual(refined_soundex('Currom'), 'C398') |
|
144
|
|
|
self.assertEqual(refined_soundex('Currum'), 'C398') |
|
145
|
|
|
self.assertEqual(refined_soundex('Curwen'), 'C398') |
|
146
|
|
|
self.assertEqual(refined_soundex('Caren'), 'C398') |
|
147
|
|
|
self.assertEqual(refined_soundex('Caren'), 'C398') |
|
148
|
|
|
self.assertEqual(refined_soundex('Caren'), 'C398') |
|
149
|
|
|
self.assertEqual(refined_soundex('Caren'), 'C398') |
|
150
|
|
|
self.assertEqual(refined_soundex('Caren'), 'C398') |
|
151
|
|
|
self.assertEqual(refined_soundex('Caren'), 'C398') |
|
152
|
|
|
self.assertEqual(refined_soundex('Caren'), 'C398') |
|
153
|
|
|
self.assertEqual(refined_soundex('Hairs'), 'H93') |
|
154
|
|
|
self.assertEqual(refined_soundex('Hark'), 'H93') |
|
155
|
|
|
self.assertEqual(refined_soundex('Hars'), 'H93') |
|
156
|
|
|
self.assertEqual(refined_soundex('Hayers'), 'H93') |
|
157
|
|
|
self.assertEqual(refined_soundex('Heers'), 'H93') |
|
158
|
|
|
self.assertEqual(refined_soundex('Hiers'), 'H93') |
|
159
|
|
|
self.assertEqual(refined_soundex('Lambard'), 'L78196') |
|
160
|
|
|
self.assertEqual(refined_soundex('Lambart'), 'L78196') |
|
161
|
|
|
self.assertEqual(refined_soundex('Lambert'), 'L78196') |
|
162
|
|
|
self.assertEqual(refined_soundex('Lambird'), 'L78196') |
|
163
|
|
|
self.assertEqual(refined_soundex('Lampaert'), 'L78196') |
|
164
|
|
|
self.assertEqual(refined_soundex('Lampard'), 'L78196') |
|
165
|
|
|
self.assertEqual(refined_soundex('Lampart'), 'L78196') |
|
166
|
|
|
self.assertEqual(refined_soundex('Lamperd'), 'L78196') |
|
167
|
|
|
self.assertEqual(refined_soundex('Lampert'), 'L78196') |
|
168
|
|
|
self.assertEqual(refined_soundex('Lamport'), 'L78196') |
|
169
|
|
|
self.assertEqual(refined_soundex('Limbert'), 'L78196') |
|
170
|
|
|
self.assertEqual(refined_soundex('Lombard'), 'L78196') |
|
171
|
|
|
self.assertEqual(refined_soundex('Nolton'), 'N8768') |
|
172
|
|
|
self.assertEqual(refined_soundex('Noulton'), 'N8768') |
|
173
|
|
|
|
|
174
|
|
|
# http://trimc-nlp.blogspot.com/2015/03/the-soundex-algorithm.html |
|
175
|
|
|
self.assertEqual(refined_soundex('Craig'), 'C394') |
|
176
|
|
|
self.assertEqual(refined_soundex('Crag'), 'C394') |
|
177
|
|
|
self.assertEqual(refined_soundex('Crejg'), 'C394') |
|
178
|
|
|
self.assertEqual(refined_soundex('Creig'), 'C394') |
|
179
|
|
|
self.assertEqual(refined_soundex('Craigg'), 'C394') |
|
180
|
|
|
self.assertEqual(refined_soundex('Craug'), 'C394') |
|
181
|
|
|
self.assertEqual(refined_soundex('Craiggg'), 'C394') |
|
182
|
|
|
self.assertEqual(refined_soundex('Creg'), 'C394') |
|
183
|
|
|
self.assertEqual(refined_soundex('Cregg'), 'C394') |
|
184
|
|
|
self.assertEqual(refined_soundex('Creag'), 'C394') |
|
185
|
|
|
self.assertEqual(refined_soundex('Greg'), 'G494') |
|
186
|
|
|
self.assertEqual(refined_soundex('Gregg'), 'G494') |
|
187
|
|
|
self.assertEqual(refined_soundex('Graig'), 'G494') |
|
188
|
|
|
self.assertEqual(refined_soundex('Greig'), 'G494') |
|
189
|
|
|
self.assertEqual(refined_soundex('Greggg'), 'G494') |
|
190
|
|
|
self.assertEqual(refined_soundex('Groeg'), 'G494') |
|
191
|
|
|
self.assertEqual(refined_soundex('Graj'), 'G494') |
|
192
|
|
|
self.assertEqual(refined_soundex('Grej'), 'G494') |
|
193
|
|
|
self.assertEqual(refined_soundex('Grreg'), 'G494') |
|
194
|
|
|
self.assertEqual(refined_soundex('Greag'), 'G494') |
|
195
|
|
|
self.assertEqual(refined_soundex('Grig'), 'G494') |
|
196
|
|
|
self.assertEqual(refined_soundex('Kregg'), 'K394') |
|
197
|
|
|
self.assertEqual(refined_soundex('Kraig'), 'K394') |
|
198
|
|
|
self.assertEqual(refined_soundex('Krag'), 'K394') |
|
199
|
|
|
self.assertEqual(refined_soundex('Kreig'), 'K394') |
|
200
|
|
|
self.assertEqual(refined_soundex('Krug'), 'K394') |
|
201
|
|
|
self.assertEqual(refined_soundex('Kreg'), 'K394') |
|
202
|
|
|
self.assertEqual(refined_soundex('Krieg'), 'K394') |
|
203
|
|
|
self.assertEqual(refined_soundex('Krijg'), 'K394') |
|
204
|
|
|
|
|
205
|
|
|
# Apache Commons test cases |
|
206
|
|
|
# http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/RefinedSoundexTest.java?view=markup |
|
207
|
|
|
self.assertEqual(refined_soundex('testing'), 'T63684') |
|
208
|
|
|
self.assertEqual(refined_soundex('TESTING'), 'T63684') |
|
209
|
|
|
self.assertEqual(refined_soundex('The'), 'T6') |
|
210
|
|
|
self.assertEqual(refined_soundex('quick'), 'Q53') |
|
211
|
|
|
self.assertEqual(refined_soundex('brown'), 'B198') |
|
212
|
|
|
self.assertEqual(refined_soundex('fox'), 'F25') |
|
213
|
|
|
self.assertEqual(refined_soundex('jumped'), 'J4816') |
|
214
|
|
|
self.assertEqual(refined_soundex('over'), 'O29') |
|
215
|
|
|
self.assertEqual(refined_soundex('the'), 'T6') |
|
216
|
|
|
self.assertEqual(refined_soundex('lazy'), 'L75') |
|
217
|
|
|
self.assertEqual(refined_soundex('dogs'), 'D643') |
|
218
|
|
|
|
|
219
|
|
|
# Test with retain_vowels=True |
|
220
|
|
|
# http://ntz-develop.blogspot.com/2011/03/phonetic-algorithms.html |
|
221
|
|
|
self.assertEqual(refined_soundex('Braz', retain_vowels=True), 'B1905') |
|
222
|
|
|
self.assertEqual(refined_soundex('Broz', retain_vowels=True), 'B1905') |
|
223
|
|
|
self.assertEqual(refined_soundex('Caren', retain_vowels=True), |
|
224
|
|
|
'C30908') |
|
225
|
|
|
self.assertEqual(refined_soundex('Caron', retain_vowels=True), |
|
226
|
|
|
'C30908') |
|
227
|
|
|
self.assertEqual(refined_soundex('Carren', retain_vowels=True), |
|
228
|
|
|
'C30908') |
|
229
|
|
|
self.assertEqual(refined_soundex('Charon', retain_vowels=True), |
|
230
|
|
|
'C30908') |
|
231
|
|
|
self.assertEqual(refined_soundex('Corain', retain_vowels=True), |
|
232
|
|
|
'C30908') |
|
233
|
|
|
self.assertEqual(refined_soundex('Coram', retain_vowels=True), |
|
234
|
|
|
'C30908') |
|
235
|
|
|
self.assertEqual(refined_soundex('Corran', retain_vowels=True), |
|
236
|
|
|
'C30908') |
|
237
|
|
|
self.assertEqual(refined_soundex('Corrin', retain_vowels=True), |
|
238
|
|
|
'C30908') |
|
239
|
|
|
self.assertEqual(refined_soundex('Corwin', retain_vowels=True), |
|
240
|
|
|
'C30908') |
|
241
|
|
|
self.assertEqual(refined_soundex('Curran', retain_vowels=True), |
|
242
|
|
|
'C30908') |
|
243
|
|
|
self.assertEqual(refined_soundex('Curreen', retain_vowels=True), |
|
244
|
|
|
'C30908') |
|
245
|
|
|
self.assertEqual(refined_soundex('Currin', retain_vowels=True), |
|
246
|
|
|
'C30908') |
|
247
|
|
|
self.assertEqual(refined_soundex('Currom', retain_vowels=True), |
|
248
|
|
|
'C30908') |
|
249
|
|
|
self.assertEqual(refined_soundex('Currum', retain_vowels=True), |
|
250
|
|
|
'C30908') |
|
251
|
|
|
self.assertEqual(refined_soundex('Curwen', retain_vowels=True), |
|
252
|
|
|
'C30908') |
|
253
|
|
|
self.assertEqual(refined_soundex('Caren', retain_vowels=True), |
|
254
|
|
|
'C30908') |
|
255
|
|
|
self.assertEqual(refined_soundex('Caren', retain_vowels=True), |
|
256
|
|
|
'C30908') |
|
257
|
|
|
self.assertEqual(refined_soundex('Caren', retain_vowels=True), |
|
258
|
|
|
'C30908') |
|
259
|
|
|
self.assertEqual(refined_soundex('Caren', retain_vowels=True), |
|
260
|
|
|
'C30908') |
|
261
|
|
|
self.assertEqual(refined_soundex('Caren', retain_vowels=True), |
|
262
|
|
|
'C30908') |
|
263
|
|
|
self.assertEqual(refined_soundex('Caren', retain_vowels=True), |
|
264
|
|
|
'C30908') |
|
265
|
|
|
self.assertEqual(refined_soundex('Caren', retain_vowels=True), |
|
266
|
|
|
'C30908') |
|
267
|
|
|
self.assertEqual(refined_soundex('Hairs', retain_vowels=True), 'H093') |
|
268
|
|
|
self.assertEqual(refined_soundex('Hark', retain_vowels=True), 'H093') |
|
269
|
|
|
self.assertEqual(refined_soundex('Hars', retain_vowels=True), 'H093') |
|
270
|
|
|
self.assertEqual(refined_soundex('Hayers', retain_vowels=True), 'H093') |
|
271
|
|
|
self.assertEqual(refined_soundex('Heers', retain_vowels=True), 'H093') |
|
272
|
|
|
self.assertEqual(refined_soundex('Hiers', retain_vowels=True), 'H093') |
|
273
|
|
|
self.assertEqual(refined_soundex('Lambard', retain_vowels=True), |
|
274
|
|
|
'L7081096') |
|
275
|
|
|
self.assertEqual(refined_soundex('Lambart', retain_vowels=True), |
|
276
|
|
|
'L7081096') |
|
277
|
|
|
self.assertEqual(refined_soundex('Lambert', retain_vowels=True), |
|
278
|
|
|
'L7081096') |
|
279
|
|
|
self.assertEqual(refined_soundex('Lambird', retain_vowels=True), |
|
280
|
|
|
'L7081096') |
|
281
|
|
|
self.assertEqual(refined_soundex('Lampaert', retain_vowels=True), |
|
282
|
|
|
'L7081096') |
|
283
|
|
|
self.assertEqual(refined_soundex('Lampard', retain_vowels=True), |
|
284
|
|
|
'L7081096') |
|
285
|
|
|
self.assertEqual(refined_soundex('Lampart', retain_vowels=True), |
|
286
|
|
|
'L7081096') |
|
287
|
|
|
self.assertEqual(refined_soundex('Lamperd', retain_vowels=True), |
|
288
|
|
|
'L7081096') |
|
289
|
|
|
self.assertEqual(refined_soundex('Lampert', retain_vowels=True), |
|
290
|
|
|
'L7081096') |
|
291
|
|
|
self.assertEqual(refined_soundex('Lamport', retain_vowels=True), |
|
292
|
|
|
'L7081096') |
|
293
|
|
|
self.assertEqual(refined_soundex('Limbert', retain_vowels=True), |
|
294
|
|
|
'L7081096') |
|
295
|
|
|
self.assertEqual(refined_soundex('Lombard', retain_vowels=True), |
|
296
|
|
|
'L7081096') |
|
297
|
|
|
self.assertEqual(refined_soundex('Nolton', retain_vowels=True), |
|
298
|
|
|
'N807608') |
|
299
|
|
|
self.assertEqual(refined_soundex('Noulton', retain_vowels=True), |
|
300
|
|
|
'N807608') |
|
301
|
|
|
|
|
302
|
|
|
# http://trimc-nlp.blogspot.com/2015/03/the-soundex-algorithm.html |
|
303
|
|
|
self.assertEqual(refined_soundex('Craig', retain_vowels=True), 'C3904') |
|
304
|
|
|
self.assertEqual(refined_soundex('Crag', retain_vowels=True), 'C3904') |
|
305
|
|
|
self.assertEqual(refined_soundex('Crejg', retain_vowels=True), 'C3904') |
|
306
|
|
|
self.assertEqual(refined_soundex('Creig', retain_vowels=True), 'C3904') |
|
307
|
|
|
self.assertEqual(refined_soundex('Craigg', retain_vowels=True), |
|
308
|
|
|
'C3904') |
|
309
|
|
|
self.assertEqual(refined_soundex('Craug', retain_vowels=True), 'C3904') |
|
310
|
|
|
self.assertEqual(refined_soundex('Craiggg', retain_vowels=True), |
|
311
|
|
|
'C3904') |
|
312
|
|
|
self.assertEqual(refined_soundex('Creg', retain_vowels=True), 'C3904') |
|
313
|
|
|
self.assertEqual(refined_soundex('Cregg', retain_vowels=True), 'C3904') |
|
314
|
|
|
self.assertEqual(refined_soundex('Creag', retain_vowels=True), 'C3904') |
|
315
|
|
|
self.assertEqual(refined_soundex('Greg', retain_vowels=True), 'G4904') |
|
316
|
|
|
self.assertEqual(refined_soundex('Gregg', retain_vowels=True), 'G4904') |
|
317
|
|
|
self.assertEqual(refined_soundex('Graig', retain_vowels=True), 'G4904') |
|
318
|
|
|
self.assertEqual(refined_soundex('Greig', retain_vowels=True), 'G4904') |
|
319
|
|
|
self.assertEqual(refined_soundex('Greggg', retain_vowels=True), |
|
320
|
|
|
'G4904') |
|
321
|
|
|
self.assertEqual(refined_soundex('Groeg', retain_vowels=True), 'G4904') |
|
322
|
|
|
self.assertEqual(refined_soundex('Graj', retain_vowels=True), 'G4904') |
|
323
|
|
|
self.assertEqual(refined_soundex('Grej', retain_vowels=True), 'G4904') |
|
324
|
|
|
self.assertEqual(refined_soundex('Grreg', retain_vowels=True), 'G4904') |
|
325
|
|
|
self.assertEqual(refined_soundex('Greag', retain_vowels=True), 'G4904') |
|
326
|
|
|
self.assertEqual(refined_soundex('Grig', retain_vowels=True), 'G4904') |
|
327
|
|
|
self.assertEqual(refined_soundex('Kregg', retain_vowels=True), 'K3904') |
|
328
|
|
|
self.assertEqual(refined_soundex('Kraig', retain_vowels=True), 'K3904') |
|
329
|
|
|
self.assertEqual(refined_soundex('Krag', retain_vowels=True), 'K3904') |
|
330
|
|
|
self.assertEqual(refined_soundex('Kreig', retain_vowels=True), 'K3904') |
|
331
|
|
|
self.assertEqual(refined_soundex('Krug', retain_vowels=True), 'K3904') |
|
332
|
|
|
self.assertEqual(refined_soundex('Kreg', retain_vowels=True), 'K3904') |
|
333
|
|
|
self.assertEqual(refined_soundex('Krieg', retain_vowels=True), 'K3904') |
|
334
|
|
|
self.assertEqual(refined_soundex('Krijg', retain_vowels=True), 'K3904') |
|
335
|
|
|
|
|
336
|
|
|
# Apache Commons test cases |
|
337
|
|
|
# http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/RefinedSoundexTest.java?view=markup |
|
338
|
|
|
self.assertEqual(refined_soundex('testing', retain_vowels=True), |
|
339
|
|
|
'T6036084') |
|
340
|
|
|
self.assertEqual(refined_soundex('TESTING', retain_vowels=True), |
|
341
|
|
|
'T6036084') |
|
342
|
|
|
self.assertEqual(refined_soundex('The', retain_vowels=True), 'T60') |
|
343
|
|
|
self.assertEqual(refined_soundex('quick', retain_vowels=True), 'Q503') |
|
344
|
|
|
self.assertEqual(refined_soundex('brown', retain_vowels=True), 'B1908') |
|
345
|
|
|
self.assertEqual(refined_soundex('fox', retain_vowels=True), 'F205') |
|
346
|
|
|
self.assertEqual(refined_soundex('jumped', retain_vowels=True), |
|
347
|
|
|
'J408106') |
|
348
|
|
|
self.assertEqual(refined_soundex('over', retain_vowels=True), 'O0209') |
|
349
|
|
|
self.assertEqual(refined_soundex('the', retain_vowels=True), 'T60') |
|
350
|
|
|
self.assertEqual(refined_soundex('lazy', retain_vowels=True), 'L7050') |
|
351
|
|
|
self.assertEqual(refined_soundex('dogs', retain_vowels=True), 'D6043') |
|
352
|
|
|
|
|
353
|
|
|
# length tests |
|
354
|
|
|
self.assertEqual(refined_soundex('testing', max_length=4, |
|
355
|
|
|
zero_pad=True), 'T636') |
|
356
|
|
|
self.assertEqual(refined_soundex('TESTING', max_length=4, |
|
357
|
|
|
zero_pad=True), 'T636') |
|
358
|
|
|
self.assertEqual(refined_soundex('The', max_length=4, zero_pad=True), |
|
359
|
|
|
'T600') |
|
360
|
|
|
self.assertEqual(refined_soundex('quick', max_length=4, zero_pad=True), |
|
361
|
|
|
'Q530') |
|
362
|
|
|
self.assertEqual(refined_soundex('brown', max_length=4, zero_pad=True), |
|
363
|
|
|
'B198') |
|
364
|
|
|
self.assertEqual(refined_soundex('fox', max_length=4, zero_pad=True), |
|
365
|
|
|
'F250') |
|
366
|
|
|
self.assertEqual(refined_soundex('jumped', max_length=4, |
|
367
|
|
|
zero_pad=True), 'J481') |
|
368
|
|
|
self.assertEqual(refined_soundex('over', max_length=4, zero_pad=True), |
|
369
|
|
|
'O290') |
|
370
|
|
|
self.assertEqual(refined_soundex('the', max_length=4, zero_pad=True), |
|
371
|
|
|
'T600') |
|
372
|
|
|
self.assertEqual(refined_soundex('lazy', max_length=4, zero_pad=True), |
|
373
|
|
|
'L750') |
|
374
|
|
|
self.assertEqual(refined_soundex('dogs', max_length=4, zero_pad=True), |
|
375
|
|
|
'D643') |
|
376
|
|
|
self.assertEqual(refined_soundex('The', max_length=4), |
|
377
|
|
|
'T6') |
|
378
|
|
|
self.assertEqual(refined_soundex('quick', max_length=4), |
|
379
|
|
|
'Q53') |
|
380
|
|
|
self.assertEqual(refined_soundex('brown', max_length=4), |
|
381
|
|
|
'B198') |
|
382
|
|
|
self.assertEqual(refined_soundex('fox', max_length=4), |
|
383
|
|
|
'F25') |
|
384
|
|
|
self.assertEqual(refined_soundex('jumped', max_length=4), |
|
385
|
|
|
'J481') |
|
386
|
|
|
self.assertEqual(refined_soundex('over', max_length=4), |
|
387
|
|
|
'O29') |
|
388
|
|
|
self.assertEqual(refined_soundex('the', max_length=4), |
|
389
|
|
|
'T6') |
|
390
|
|
|
self.assertEqual(refined_soundex('lazy', max_length=4), |
|
391
|
|
|
'L75') |
|
392
|
|
|
self.assertEqual(refined_soundex('dogs', max_length=4), |
|
393
|
|
|
'D643') |
|
394
|
|
|
|
|
395
|
|
|
|
|
396
|
|
|
class FuzzySoundexTestCases(unittest.TestCase): |
|
397
|
|
|
"""Test Fuzzy Soundex functions. |
|
398
|
|
|
|
|
399
|
|
|
test cases for abydos.phonetic.fuzzy_soundex |
|
400
|
|
|
""" |
|
401
|
|
|
|
|
402
|
|
|
def test_fuzzy_soundex(self): |
|
403
|
|
|
"""Test abydos.phonetic.fuzzy_soundex.""" |
|
404
|
|
|
self.assertEqual(fuzzy_soundex(''), '00000') |
|
405
|
|
|
# http://wayback.archive.org/web/20100629121128/http://www.ir.iit.edu/publications/downloads/IEEESoundexV5.pdf |
|
406
|
|
|
self.assertEqual(fuzzy_soundex('Kristen'), 'K6935') |
|
407
|
|
|
self.assertEqual(fuzzy_soundex('Krissy'), 'K6900') |
|
408
|
|
|
self.assertEqual(fuzzy_soundex('Christen'), 'K6935') |
|
409
|
|
|
|
|
410
|
|
|
# http://books.google.com/books?id=LZrT6eWf9NMC&lpg=PA76&ots=Tex3FqNwGP&dq=%22phonix%20algorithm%22&pg=PA75#v=onepage&q=%22phonix%20algorithm%22&f=false |
|
411
|
|
|
self.assertEqual(fuzzy_soundex('peter', 4), 'P360') |
|
412
|
|
|
self.assertEqual(fuzzy_soundex('pete', 4), 'P300') |
|
413
|
|
|
self.assertEqual(fuzzy_soundex('pedro', 4), 'P360') |
|
414
|
|
|
self.assertEqual(fuzzy_soundex('stephen', 4), 'S315') |
|
415
|
|
|
self.assertEqual(fuzzy_soundex('steve', 4), 'S310') |
|
416
|
|
|
self.assertEqual(fuzzy_soundex('smith', 4), 'S530') |
|
417
|
|
|
self.assertEqual(fuzzy_soundex('smythe', 4), 'S530') |
|
418
|
|
|
self.assertEqual(fuzzy_soundex('gail', 4), 'G400') |
|
419
|
|
|
self.assertEqual(fuzzy_soundex('gayle', 4), 'G400') |
|
420
|
|
|
self.assertEqual(fuzzy_soundex('christine', 4), 'K693') |
|
421
|
|
|
self.assertEqual(fuzzy_soundex('christina', 4), 'K693') |
|
422
|
|
|
self.assertEqual(fuzzy_soundex('kristina', 4), 'K693') |
|
423
|
|
|
|
|
424
|
|
|
# etc. (for code coverage) |
|
425
|
|
|
self.assertEqual(fuzzy_soundex('Wight'), 'W3000') |
|
426
|
|
|
self.assertEqual(fuzzy_soundex('Hardt'), 'H6000') |
|
427
|
|
|
self.assertEqual(fuzzy_soundex('Knight'), 'N3000') |
|
428
|
|
|
self.assertEqual(fuzzy_soundex('Czech'), 'S7000') |
|
429
|
|
|
self.assertEqual(fuzzy_soundex('Tsech'), 'S7000') |
|
430
|
|
|
self.assertEqual(fuzzy_soundex('gnomic'), 'N5900') |
|
431
|
|
|
self.assertEqual(fuzzy_soundex('Wright'), 'R3000') |
|
432
|
|
|
self.assertEqual(fuzzy_soundex('Hrothgar'), 'R3760') |
|
433
|
|
|
self.assertEqual(fuzzy_soundex('Hwaet'), 'W3000') |
|
434
|
|
|
self.assertEqual(fuzzy_soundex('Grant'), 'G6300') |
|
435
|
|
|
self.assertEqual(fuzzy_soundex('Hart'), 'H6000') |
|
436
|
|
|
self.assertEqual(fuzzy_soundex('Hardt'), 'H6000') |
|
437
|
|
|
|
|
438
|
|
|
# max_length bounds tests |
|
439
|
|
|
self.assertEqual(fuzzy_soundex('Niall', max_length=-1), |
|
440
|
|
|
'N4000000000000000000000000000000000000000000000000' + |
|
441
|
|
|
'00000000000000') |
|
442
|
|
|
self.assertEqual(fuzzy_soundex('Niall', max_length=0), 'N400') |
|
443
|
|
|
|
|
444
|
|
|
# zero_pad tests |
|
445
|
|
|
self.assertEqual(fuzzy_soundex('Niall', max_length=-1, |
|
446
|
|
|
zero_pad=False), 'N4') |
|
447
|
|
|
self.assertEqual(fuzzy_soundex('Niall', max_length=0, |
|
448
|
|
|
zero_pad=False), 'N4') |
|
449
|
|
|
self.assertEqual(fuzzy_soundex('Niall', max_length=0, |
|
450
|
|
|
zero_pad=True), 'N400') |
|
451
|
|
|
self.assertEqual(fuzzy_soundex('', max_length=4, zero_pad=False), '0') |
|
452
|
|
|
self.assertEqual(fuzzy_soundex('', max_length=4, zero_pad=True), |
|
453
|
|
|
'0000') |
|
454
|
|
|
|
|
455
|
|
|
|
|
456
|
|
|
class PhonexTestCases(unittest.TestCase): |
|
457
|
|
|
"""Test Phonex functions. |
|
458
|
|
|
|
|
459
|
|
|
test cases for abydos.phonetic.phonex |
|
460
|
|
|
""" |
|
461
|
|
|
|
|
462
|
|
|
def test_phonex(self): |
|
463
|
|
|
"""Test abydos.phonetic.phonex.""" |
|
464
|
|
|
self.assertEqual(phonex(''), '0000') |
|
465
|
|
|
|
|
466
|
|
|
# http://homepages.cs.ncl.ac.uk/brian.randell/Genealogy/NameMatching.pdf |
|
467
|
|
|
self.assertEqual(phonex('Ewell'), 'A400') |
|
468
|
|
|
self.assertEqual(phonex('Filp'), 'F100') |
|
469
|
|
|
self.assertEqual(phonex('Heames'), 'A500') |
|
470
|
|
|
self.assertEqual(phonex('Kneves'), 'N100') |
|
471
|
|
|
self.assertEqual(phonex('River'), 'R160') |
|
472
|
|
|
self.assertEqual(phonex('Corley'), 'C400') |
|
473
|
|
|
self.assertEqual(phonex('Carton'), 'C350') |
|
474
|
|
|
self.assertEqual(phonex('Cachpole'), 'C214') |
|
475
|
|
|
|
|
476
|
|
|
self.assertEqual(phonex('Ewell'), phonex('Ule')) |
|
477
|
|
|
self.assertEqual(phonex('Filp'), phonex('Philp')) |
|
478
|
|
|
self.assertEqual(phonex('Yule'), phonex('Ewell')) |
|
479
|
|
|
self.assertEqual(phonex('Heames'), phonex('Eames')) |
|
480
|
|
|
self.assertEqual(phonex('Kneves'), phonex('Neves')) |
|
481
|
|
|
self.assertEqual(phonex('River'), phonex('Rivers')) |
|
482
|
|
|
self.assertEqual(phonex('Corley'), phonex('Coley')) |
|
483
|
|
|
self.assertEqual(phonex('Carton'), phonex('Carlton')) |
|
484
|
|
|
self.assertEqual(phonex('Cachpole'), phonex('Catchpole')) |
|
485
|
|
|
|
|
486
|
|
|
# etc. (for code coverage) |
|
487
|
|
|
self.assertEqual(phonex('Saxon'), 'S250') |
|
488
|
|
|
self.assertEqual(phonex('Wright'), 'R230') |
|
489
|
|
|
self.assertEqual(phonex('Ai'), 'A000') |
|
490
|
|
|
self.assertEqual(phonex('Barth'), 'B300') |
|
491
|
|
|
self.assertEqual(phonex('Perry'), 'B600') |
|
492
|
|
|
self.assertEqual(phonex('Garth'), 'G300') |
|
493
|
|
|
self.assertEqual(phonex('Jerry'), 'G600') |
|
494
|
|
|
self.assertEqual(phonex('Gerry'), 'G600') |
|
495
|
|
|
self.assertEqual(phonex('Camden'), 'C500') |
|
496
|
|
|
self.assertEqual(phonex('Ganges'), 'G500') |
|
497
|
|
|
self.assertEqual(phonex('A-1'), 'A000') |
|
498
|
|
|
|
|
499
|
|
|
# max_length bounds tests |
|
500
|
|
|
self.assertEqual(phonex('Niall', max_length=-1), |
|
501
|
|
|
'N4000000000000000000000000000000000000000000000000' + |
|
502
|
|
|
'00000000000000') |
|
503
|
|
|
self.assertEqual(phonex('Niall', max_length=0), 'N400') |
|
504
|
|
|
|
|
505
|
|
|
# zero_pad tests |
|
506
|
|
|
self.assertEqual(phonex('Niall', max_length=0, zero_pad=False), 'N4') |
|
507
|
|
|
self.assertEqual(phonex('Niall', max_length=0, zero_pad=False), 'N4') |
|
508
|
|
|
self.assertEqual(phonex('Niall', max_length=0, zero_pad=True), 'N400') |
|
509
|
|
|
self.assertEqual(phonex('', max_length=4, zero_pad=False), '0') |
|
510
|
|
|
self.assertEqual(phonex('', max_length=4, zero_pad=True), '0000') |
|
511
|
|
|
|
|
512
|
|
|
|
|
513
|
|
|
class PhonixTestCases(unittest.TestCase): |
|
514
|
|
|
"""Test Phonix functions. |
|
515
|
|
|
|
|
516
|
|
|
test cases for abydos.phonetic.phonix |
|
517
|
|
|
""" |
|
518
|
|
|
|
|
519
|
|
|
def test_phonix(self): |
|
520
|
|
|
"""Test abydos.phonetic.phonix.""" |
|
521
|
|
|
self.assertEqual(phonix(''), '0000') |
|
522
|
|
|
|
|
523
|
|
|
# http://cpansearch.perl.org/src/MAROS/Text-Phonetic-2.05/t/007_phonix.t |
|
524
|
|
|
self.assertEqual(phonix('Müller'), 'M400') |
|
525
|
|
|
self.assertEqual(phonix('schneider'), 'S530') |
|
526
|
|
|
self.assertEqual(phonix('fischer'), 'F800') |
|
527
|
|
|
self.assertEqual(phonix('weber'), 'W100') |
|
528
|
|
|
self.assertEqual(phonix('meyer'), 'M000') |
|
529
|
|
|
self.assertEqual(phonix('wagner'), 'W250') |
|
530
|
|
|
self.assertEqual(phonix('schulz'), 'S480') |
|
531
|
|
|
self.assertEqual(phonix('becker'), 'B200') |
|
532
|
|
|
self.assertEqual(phonix('hoffmann'), 'H755') |
|
533
|
|
|
self.assertEqual(phonix('schäfer'), 'S700') |
|
534
|
|
|
self.assertEqual(phonix('schmidt'), 'S530') |
|
535
|
|
|
|
|
536
|
|
|
# http://cpansearch.perl.org/src/MAROS/Text-Phonetic-2.05/t/007_phonix.t: |
|
537
|
|
|
# testcases from Wais Module |
|
538
|
|
|
self.assertEqual(phonix('computer'), 'K513') |
|
539
|
|
|
self.assertEqual(phonix('computers'), 'K513') |
|
540
|
|
|
self.assertEqual(phonix('computers', 5), 'K5138') |
|
541
|
|
|
self.assertEqual(phonix('pfeifer'), 'F700') |
|
542
|
|
|
self.assertEqual(phonix('pfeiffer'), 'F700') |
|
543
|
|
|
self.assertEqual(phonix('knight'), 'N300') |
|
544
|
|
|
self.assertEqual(phonix('night'), 'N300') |
|
545
|
|
|
|
|
546
|
|
|
# http://cpansearch.perl.org/src/MAROS/Text-Phonetic-2.05/t/007_phonix.t: |
|
547
|
|
|
# testcases from |
|
548
|
|
|
# http://www.cl.uni-heidelberg.de/~bormann/documents/phono/ |
|
549
|
|
|
# They use a sliglty different algorithm (first char is not included in |
|
550
|
|
|
# num code here) |
|
551
|
|
|
self.assertEqual(phonix('wait'), 'W300') |
|
552
|
|
|
self.assertEqual(phonix('weight'), 'W300') |
|
553
|
|
|
self.assertEqual(phonix('gnome'), 'N500') |
|
554
|
|
|
self.assertEqual(phonix('noam'), 'N500') |
|
555
|
|
|
self.assertEqual(phonix('rees'), 'R800') |
|
556
|
|
|
self.assertEqual(phonix('reece'), 'R800') |
|
557
|
|
|
self.assertEqual(phonix('yaeger'), 'v200') |
|
558
|
|
|
|
|
559
|
|
|
# http://books.google.com/books?id=xtWPI7Is9wIC&lpg=PA29&ots=DXhaL7ZkvK&dq=phonix%20gadd&pg=PA29#v=onepage&q=phonix%20gadd&f=false |
|
560
|
|
|
self.assertEqual(phonix('alam'), 'v450') |
|
561
|
|
|
self.assertEqual(phonix('berkpakaian'), 'B212') |
|
562
|
|
|
self.assertEqual(phonix('capaian'), 'K150') |
|
563
|
|
|
|
|
564
|
|
|
# http://books.google.com/books?id=LZrT6eWf9NMC&lpg=PA76&ots=Tex3FqNwGP&dq=%22phonix%20algorithm%22&pg=PA75#v=onepage&q=%22phonix%20algorithm%22&f=false |
|
565
|
|
|
self.assertEqual(phonix('peter'), 'P300') |
|
566
|
|
|
self.assertEqual(phonix('pete'), 'P300') |
|
567
|
|
|
self.assertEqual(phonix('pedro'), 'P360') |
|
568
|
|
|
self.assertEqual(phonix('stephen'), 'S375') |
|
569
|
|
|
self.assertEqual(phonix('steve'), 'S370') |
|
570
|
|
|
self.assertEqual(phonix('smith'), 'S530') |
|
571
|
|
|
self.assertEqual(phonix('smythe'), 'S530') |
|
572
|
|
|
self.assertEqual(phonix('gail'), 'G400') |
|
573
|
|
|
self.assertEqual(phonix('gayle'), 'G400') |
|
574
|
|
|
self.assertEqual(phonix('christine'), 'K683') |
|
575
|
|
|
self.assertEqual(phonix('christina'), 'K683') |
|
576
|
|
|
self.assertEqual(phonix('kristina'), 'K683') |
|
577
|
|
|
|
|
578
|
|
|
# max_length bounds tests |
|
579
|
|
|
self.assertEqual(phonix('Niall', max_length=-1), 'N4'+'0'*62) |
|
580
|
|
|
self.assertEqual(phonix('Niall', max_length=0), 'N400') |
|
581
|
|
|
|
|
582
|
|
|
# zero_pad tests |
|
583
|
|
|
self.assertEqual(phonix('Niall', max_length=-1, zero_pad=False), 'N4') |
|
584
|
|
|
self.assertEqual(phonix('Niall', max_length=0, zero_pad=False), |
|
585
|
|
|
'N4') |
|
586
|
|
|
self.assertEqual(phonix('Niall', max_length=0, zero_pad=True), |
|
587
|
|
|
'N400') |
|
588
|
|
|
self.assertEqual(phonix('', max_length=4, zero_pad=False), '0') |
|
589
|
|
|
self.assertEqual(phonix('', max_length=4, zero_pad=True), '0000') |
|
590
|
|
|
|
|
591
|
|
|
|
|
592
|
|
|
class LeinTestCases(unittest.TestCase): |
|
593
|
|
|
"""Test Lein functions. |
|
594
|
|
|
|
|
595
|
|
|
test cases for abydos.phonetic.lein |
|
596
|
|
|
""" |
|
597
|
|
|
|
|
598
|
|
|
def test_lein(self): |
|
599
|
|
|
"""Test abydos.phonetic.lein.""" |
|
600
|
|
|
self.assertEqual(lein(''), '0000') |
|
601
|
|
|
|
|
602
|
|
|
# https://naldc.nal.usda.gov/download/27833/PDF |
|
603
|
|
|
self.assertEqual(lein('Dubose'), 'D450') |
|
604
|
|
|
self.assertEqual(lein('Dubs'), 'D450') |
|
605
|
|
|
self.assertEqual(lein('Dubbs'), 'D450') |
|
606
|
|
|
self.assertEqual(lein('Doviak'), 'D450') |
|
607
|
|
|
self.assertEqual(lein('Dubke'), 'D450') |
|
608
|
|
|
self.assertEqual(lein('Dubus'), 'D450') |
|
609
|
|
|
self.assertEqual(lein('Dubois'), 'D450') |
|
610
|
|
|
self.assertEqual(lein('Duboise'), 'D450') |
|
611
|
|
|
self.assertEqual(lein('Doubek'), 'D450') |
|
612
|
|
|
self.assertEqual(lein('Defigh'), 'D450') |
|
613
|
|
|
self.assertEqual(lein('Defazio'), 'D450') |
|
614
|
|
|
self.assertEqual(lein('Debaca'), 'D450') |
|
615
|
|
|
self.assertEqual(lein('Dabbs'), 'D450') |
|
616
|
|
|
self.assertEqual(lein('Davies'), 'D450') |
|
617
|
|
|
self.assertEqual(lein('Dubukey'), 'D450') |
|
618
|
|
|
self.assertEqual(lein('Debus'), 'D450') |
|
619
|
|
|
self.assertEqual(lein('Debose'), 'D450') |
|
620
|
|
|
self.assertEqual(lein('Daves'), 'D450') |
|
621
|
|
|
self.assertEqual(lein('Dipiazza'), 'D450') |
|
622
|
|
|
self.assertEqual(lein('Dobbs'), 'D450') |
|
623
|
|
|
self.assertEqual(lein('Dobak'), 'D450') |
|
624
|
|
|
self.assertEqual(lein('Dobis'), 'D450') |
|
625
|
|
|
self.assertEqual(lein('Dobish'), 'D450') |
|
626
|
|
|
self.assertEqual(lein('Doepke'), 'D450') |
|
627
|
|
|
self.assertEqual(lein('Divish'), 'D450') |
|
628
|
|
|
self.assertEqual(lein('Dobosh'), 'D450') |
|
629
|
|
|
self.assertEqual(lein('Dupois'), 'D450') |
|
630
|
|
|
self.assertEqual(lein('Dufek'), 'D450') |
|
631
|
|
|
self.assertEqual(lein('Duffek'), 'D450') |
|
632
|
|
|
self.assertEqual(lein('Dupuis'), 'D450') |
|
633
|
|
|
self.assertEqual(lein('Dupas'), 'D450') |
|
634
|
|
|
self.assertEqual(lein('Devese'), 'D450') |
|
635
|
|
|
self.assertEqual(lein('Devos'), 'D450') |
|
636
|
|
|
self.assertEqual(lein('Deveaux'), 'D450') |
|
637
|
|
|
self.assertEqual(lein('Devies'), 'D450') |
|
638
|
|
|
|
|
639
|
|
|
self.assertEqual(lein('Sand'), 'S210') |
|
640
|
|
|
self.assertEqual(lein('Sandau'), 'S210') |
|
641
|
|
|
self.assertEqual(lein('Sande'), 'S210') |
|
642
|
|
|
self.assertEqual(lein('Sandia'), 'S210') |
|
643
|
|
|
self.assertEqual(lein('Sando'), 'S210') |
|
644
|
|
|
self.assertEqual(lein('Sandoe'), 'S210') |
|
645
|
|
|
self.assertEqual(lein('Sandy'), 'S210') |
|
646
|
|
|
self.assertEqual(lein('Santee'), 'S210') |
|
647
|
|
|
self.assertEqual(lein('Santi'), 'S210') |
|
648
|
|
|
self.assertEqual(lein('Santo'), 'S210') |
|
649
|
|
|
self.assertEqual(lein('Send'), 'S210') |
|
650
|
|
|
self.assertEqual(lein('Sennet'), 'S210') |
|
651
|
|
|
self.assertEqual(lein('Shemoit'), 'S210') |
|
652
|
|
|
self.assertEqual(lein('Shenot'), 'S210') |
|
653
|
|
|
self.assertEqual(lein('Shumate'), 'S210') |
|
654
|
|
|
self.assertEqual(lein('Simmet'), 'S210') |
|
655
|
|
|
self.assertEqual(lein('Simot'), 'S210') |
|
656
|
|
|
self.assertEqual(lein('Sineath'), 'S210') |
|
657
|
|
|
self.assertEqual(lein('Sinnott'), 'S210') |
|
658
|
|
|
self.assertEqual(lein('Sintay'), 'S210') |
|
659
|
|
|
self.assertEqual(lein('Smead'), 'S210') |
|
660
|
|
|
self.assertEqual(lein('Smeda'), 'S210') |
|
661
|
|
|
self.assertEqual(lein('Smit'), 'S210') |
|
662
|
|
|
|
|
663
|
|
|
# Additional tests from @Yomguithereal's talisman |
|
664
|
|
|
# https://github.com/Yomguithereal/talisman/blob/master/test/phonetics/lein.js |
|
665
|
|
|
self.assertEqual(lein('Guillaume'), 'G320') |
|
666
|
|
|
self.assertEqual(lein('Arlène'), 'A332') |
|
667
|
|
|
self.assertEqual(lein('Lüdenscheidt'), 'L125') |
|
668
|
|
|
|
|
669
|
|
|
# Coverage |
|
670
|
|
|
self.assertEqual(lein('Lüdenscheidt', zero_pad=False), 'L125') |
|
671
|
|
|
self.assertEqual(lein('Smith', zero_pad=False), 'S21') |
|
672
|
|
|
|
|
673
|
|
|
|
|
674
|
|
|
class PSHPSoundexTestCases(unittest.TestCase): |
|
675
|
|
|
"""Test PSHP Soundex functions. |
|
676
|
|
|
|
|
677
|
|
|
test cases for abydos.phonetic.pshp_soundex_last & pshp_soundex_first |
|
678
|
|
|
""" |
|
679
|
|
|
|
|
680
|
|
|
def test_pshp_soundex_last(self): |
|
681
|
|
|
"""Test abydos.phonetic.pshp_soundex_last.""" |
|
682
|
|
|
# Base case |
|
683
|
|
|
self.assertEqual(pshp_soundex_last(''), '0000') |
|
684
|
|
|
|
|
685
|
|
|
self.assertEqual(pshp_soundex_last('JAMES'), 'J500') |
|
686
|
|
|
self.assertEqual(pshp_soundex_last('JOHN'), 'J500') |
|
687
|
|
|
self.assertEqual(pshp_soundex_last('PAT'), 'P300') |
|
688
|
|
|
self.assertEqual(pshp_soundex_last('PETER'), 'P350') |
|
689
|
|
|
|
|
690
|
|
|
self.assertEqual(pshp_soundex_last('Smith'), 'S530') |
|
691
|
|
|
self.assertEqual(pshp_soundex_last('van Damme'), 'D500') |
|
692
|
|
|
self.assertEqual(pshp_soundex_last('MacNeil'), 'M400') |
|
693
|
|
|
self.assertEqual(pshp_soundex_last('McNeil'), 'M400') |
|
694
|
|
|
self.assertEqual(pshp_soundex_last('Edwards'), 'A353') |
|
695
|
|
|
self.assertEqual(pshp_soundex_last('Gin'), 'J500') |
|
696
|
|
|
self.assertEqual(pshp_soundex_last('Cillian'), 'S450') |
|
697
|
|
|
self.assertEqual(pshp_soundex_last('Christopher'), 'K523') |
|
698
|
|
|
self.assertEqual(pshp_soundex_last('Carme'), 'K500') |
|
699
|
|
|
self.assertEqual(pshp_soundex_last('Knight'), 'N230') |
|
700
|
|
|
self.assertEqual(pshp_soundex_last('Phillip'), 'F410') |
|
701
|
|
|
self.assertEqual(pshp_soundex_last('Wein'), 'V500') |
|
702
|
|
|
self.assertEqual(pshp_soundex_last('Wagner', german=True), 'V255') |
|
703
|
|
|
self.assertEqual(pshp_soundex_last('Pence'), 'P500') |
|
704
|
|
|
self.assertEqual(pshp_soundex_last('Less'), 'L000') |
|
705
|
|
|
self.assertEqual(pshp_soundex_last('Simpson'), 'S525') |
|
706
|
|
|
self.assertEqual(pshp_soundex_last('Samson'), 'S250') |
|
707
|
|
|
self.assertEqual(pshp_soundex_last('Lang'), 'L500') |
|
708
|
|
|
self.assertEqual(pshp_soundex_last('Hagan'), 'H500') |
|
709
|
|
|
self.assertEqual(pshp_soundex_last('Cartes', german=True), 'K500') |
|
710
|
|
|
self.assertEqual(pshp_soundex_last('Kats', german=True), 'K000') |
|
711
|
|
|
self.assertEqual(pshp_soundex_last('Schultze', german=True), 'S400') |
|
712
|
|
|
self.assertEqual(pshp_soundex_last('Alze', german=True), 'A400') |
|
713
|
|
|
self.assertEqual(pshp_soundex_last('Galz', german=True), 'G400') |
|
714
|
|
|
self.assertEqual(pshp_soundex_last('Alte', german=True), 'A400') |
|
715
|
|
|
self.assertEqual(pshp_soundex_last('Alte', max_length=-1), 'A43') |
|
716
|
|
|
self.assertEqual(pshp_soundex_last('Altemaier', max_length=-1), |
|
717
|
|
|
'A4355') |
|
718
|
|
|
|
|
719
|
|
|
def test_pshp_soundex_first(self): |
|
720
|
|
|
"""Test abydos.phonetic.pshp_soundex_first.""" |
|
721
|
|
|
# Base case |
|
722
|
|
|
self.assertEqual(pshp_soundex_first(''), '0000') |
|
723
|
|
|
|
|
724
|
|
|
# Examples given in defining paper (Hershberg, et al. 1976) |
|
725
|
|
|
self.assertEqual(pshp_soundex_first('JAMES'), 'J700') |
|
726
|
|
|
self.assertEqual(pshp_soundex_first('JOHN'), 'J500') |
|
727
|
|
|
self.assertEqual(pshp_soundex_first('PAT'), 'P700') |
|
728
|
|
|
self.assertEqual(pshp_soundex_first('PETER'), 'P300') |
|
729
|
|
|
|
|
730
|
|
|
# Additions for coverage |
|
731
|
|
|
self.assertEqual(pshp_soundex_first('Giles'), 'J400') |
|
732
|
|
|
self.assertEqual(pshp_soundex_first('Cy'), 'S000') |
|
733
|
|
|
self.assertEqual(pshp_soundex_first('Chris'), 'K500') |
|
734
|
|
|
self.assertEqual(pshp_soundex_first('Caleb'), 'K400') |
|
735
|
|
|
self.assertEqual(pshp_soundex_first('Knabe'), 'N100') |
|
736
|
|
|
self.assertEqual(pshp_soundex_first('Phil'), 'F400') |
|
737
|
|
|
self.assertEqual(pshp_soundex_first('Wieland'), 'V400') |
|
738
|
|
|
self.assertEqual(pshp_soundex_first('Wayne', german=True), 'V500') |
|
739
|
|
|
self.assertEqual(pshp_soundex_first('Christopher', max_length=-1), |
|
740
|
|
|
'K5') |
|
741
|
|
|
self.assertEqual(pshp_soundex_first('Asdaananndsjsjasd', |
|
742
|
|
|
max_length=-1), 'A23553223') |
|
743
|
|
|
self.assertEqual(pshp_soundex_first('Asdaananndsjsjasd'), 'A235') |
|
744
|
|
|
|
|
745
|
|
|
|
|
746
|
|
|
if __name__ == '__main__': |
|
747
|
|
|
unittest.main() |
|
748
|
|
|
|