Completed
Branch master (87ccc1)
by Chris
10:18
created

tests.phonetic.test_phonetic_soundex.SoundexTestCases.test_refined_soundex()   B

Complexity

Conditions 1

Size

Total Lines 268
Code Lines 251

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 251
nop 1
dl 0
loc 268
rs 7
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
"""abydos.tests.test_phonetic_soundex.
20
21
This module contains unit tests for abydos.phonetic.soundex
22
"""
23
24
from __future__ import unicode_literals
25
26
import unittest
27
28
from abydos.phonetic.soundex import fuzzy_soundex, lein, phonex, phonix, \
29
    pshp_soundex_first, pshp_soundex_last, refined_soundex, soundex
30
31
32
class SoundexTestCases(unittest.TestCase):
33
    """Test Soundex functions.
34
35
    test cases for abydos.phonetic.soundex, .refined_soundex
36
    """
37
38
    def test_soundex(self):
39
        """Test abydos.phonetic.soundex."""
40
        self.assertEqual(soundex(''), '0000')
41
42
        # https://archive.org/stream/accessingindivid00moor#page/14/mode/2up
43
        self.assertEqual(soundex('Euler'), 'E460')
44
        self.assertEqual(soundex('Gauss'), 'G200')
45
        self.assertEqual(soundex('Hilbert'), 'H416')
46
        self.assertEqual(soundex('Knuth'), 'K530')
47
        self.assertEqual(soundex('Lloyd'), 'L300')
48
        self.assertEqual(soundex('Lukasieicz'), 'L222')
49
        self.assertEqual(soundex('Ellery'), 'E460')
50
        self.assertEqual(soundex('Ghosh'), 'G200')
51
        self.assertEqual(soundex('Heilbronn'), 'H416')
52
        self.assertEqual(soundex('Kant'), 'K530')
53
        self.assertEqual(soundex('Ladd'), 'L300')
54
        self.assertEqual(soundex('Lissajous'), 'L222')
55
        self.assertEqual(soundex('Rogers'), 'R262')
56
        self.assertEqual(soundex('Rodgers'), 'R326')
57
        self.assertNotEquals(soundex('Rogers'), soundex('Rodgers'))
58
        self.assertNotEquals(soundex('Sinclair'), soundex('St. Clair'))
59
        self.assertNotEquals(soundex('Tchebysheff'), soundex('Chebyshev'))
60
61
        # http://creativyst.com/Doc/Articles/SoundEx1/SoundEx1.htm#Related
62
        self.assertEqual(soundex('Htacky'), 'H320')
63
        self.assertEqual(soundex('Atacky'), 'A320')
64
        self.assertEqual(soundex('Schmit'), 'S530')
65
        self.assertEqual(soundex('Schneider'), 'S536')
66
        self.assertEqual(soundex('Pfister'), 'P236')
67
        self.assertEqual(soundex('Ashcroft'), 'A261')
68
        self.assertEqual(soundex('Asicroft'), 'A226')
69
70
        # https://en.wikipedia.org/wiki/Soundex
71
        self.assertEqual(soundex('Robert'), 'R163')
72
        self.assertEqual(soundex('Rupert'), 'R163')
73
        self.assertEqual(soundex('Rubin'), 'R150')
74
        self.assertEqual(soundex('Tymczak'), 'T522')
75
76
        # https://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex
77
        self.assertEqual(soundex('Peters'), 'P362')
78
        self.assertEqual(soundex('Peterson'), 'P362')
79
        self.assertEqual(soundex('Moskowitz'), 'M232')
80
        self.assertEqual(soundex('Moskovitz'), 'M213')
81
        self.assertEqual(soundex('Auerbach'), 'A612')
82
        self.assertEqual(soundex('Uhrbach'), 'U612')
83
        self.assertEqual(soundex('Jackson'), 'J250')
84
        self.assertEqual(soundex('Jackson-Jackson'), 'J252')
85
86
        # max_length tests
87
        self.assertEqual(soundex('Lincoln', 10), 'L524500000')
88
        self.assertEqual(soundex('Lincoln', 5), 'L5245')
89
        self.assertEqual(soundex('Christopher', 6), 'C62316')
90
91
        # max_length bounds tests
92
        self.assertEqual(soundex('Niall', max_length=-1),
93
                         'N4000000000000000000000000000000000000000000000000' +
94
                         '00000000000000')
95
        self.assertEqual(soundex('Niall', max_length=0), 'N400')
96
97
        # reverse tests
98
        self.assertEqual(soundex('Rubin', reverse=True), 'N160')
99
        self.assertEqual(soundex('Llyod', reverse=True), 'D400')
100
        self.assertEqual(soundex('Lincoln', reverse=True), 'N425')
101
        self.assertEqual(soundex('Knuth', reverse=True), 'H352')
102
103
        # zero_pad tests
104
        self.assertEqual(soundex('Niall', max_length=-1, zero_pad=False), 'N4')
105
        self.assertEqual(soundex('Niall', max_length=0, zero_pad=False), 'N4')
106
        self.assertEqual(soundex('Niall', max_length=0, zero_pad=True), 'N400')
107
        self.assertEqual(soundex('', max_length=4, zero_pad=False), '0')
108
        self.assertEqual(soundex('', max_length=4, zero_pad=True), '0000')
109
110
    def test_soundex_special(self):
111
        """Test abydos.phonetic.soundex (special 1880-1910 variant method)."""
112
        self.assertEqual(soundex('Ashcroft', var='special'), 'A226')
113
        self.assertEqual(soundex('Asicroft', var='special'), 'A226')
114
        self.assertEqual(soundex('AsWcroft', var='special'), 'A226')
115
        self.assertEqual(soundex('Rupert', var='special'), 'R163')
116
        self.assertEqual(soundex('Rubin', var='special'), 'R150')
117
118
    def test_soundex_census(self):
119
        """Test abydos.phonetic.soundex (Census variant method)."""
120
        self.assertEqual(soundex('Vandeusen', var='Census'), ('V532', 'D250'))
121
        self.assertEqual(soundex('van Deusen', var='Census'), ('V532', 'D250'))
122
        self.assertEqual(soundex('McDonald', var='Census'), 'M235')
123
        self.assertEqual(soundex('la Cruz', var='Census'), ('L262', 'C620'))
124
        self.assertEqual(soundex('vanDamme', var='Census'), ('V535', 'D500'))
125
126
    def test_refined_soundex(self):
127
        """Test abydos.phonetic.refined_soundex."""
128
        # http://ntz-develop.blogspot.com/2011/03/phonetic-algorithms.html
129
        self.assertEqual(refined_soundex('Braz'), 'B195')
130
        self.assertEqual(refined_soundex('Broz'), 'B195')
131
        self.assertEqual(refined_soundex('Caren'), 'C398')
132
        self.assertEqual(refined_soundex('Caron'), 'C398')
133
        self.assertEqual(refined_soundex('Carren'), 'C398')
134
        self.assertEqual(refined_soundex('Charon'), 'C398')
135
        self.assertEqual(refined_soundex('Corain'), 'C398')
136
        self.assertEqual(refined_soundex('Coram'), 'C398')
137
        self.assertEqual(refined_soundex('Corran'), 'C398')
138
        self.assertEqual(refined_soundex('Corrin'), 'C398')
139
        self.assertEqual(refined_soundex('Corwin'), 'C398')
140
        self.assertEqual(refined_soundex('Curran'), 'C398')
141
        self.assertEqual(refined_soundex('Curreen'), 'C398')
142
        self.assertEqual(refined_soundex('Currin'), 'C398')
143
        self.assertEqual(refined_soundex('Currom'), 'C398')
144
        self.assertEqual(refined_soundex('Currum'), 'C398')
145
        self.assertEqual(refined_soundex('Curwen'), 'C398')
146
        self.assertEqual(refined_soundex('Caren'), 'C398')
147
        self.assertEqual(refined_soundex('Caren'), 'C398')
148
        self.assertEqual(refined_soundex('Caren'), 'C398')
149
        self.assertEqual(refined_soundex('Caren'), 'C398')
150
        self.assertEqual(refined_soundex('Caren'), 'C398')
151
        self.assertEqual(refined_soundex('Caren'), 'C398')
152
        self.assertEqual(refined_soundex('Caren'), 'C398')
153
        self.assertEqual(refined_soundex('Hairs'), 'H93')
154
        self.assertEqual(refined_soundex('Hark'), 'H93')
155
        self.assertEqual(refined_soundex('Hars'), 'H93')
156
        self.assertEqual(refined_soundex('Hayers'), 'H93')
157
        self.assertEqual(refined_soundex('Heers'), 'H93')
158
        self.assertEqual(refined_soundex('Hiers'), 'H93')
159
        self.assertEqual(refined_soundex('Lambard'), 'L78196')
160
        self.assertEqual(refined_soundex('Lambart'), 'L78196')
161
        self.assertEqual(refined_soundex('Lambert'), 'L78196')
162
        self.assertEqual(refined_soundex('Lambird'), 'L78196')
163
        self.assertEqual(refined_soundex('Lampaert'), 'L78196')
164
        self.assertEqual(refined_soundex('Lampard'), 'L78196')
165
        self.assertEqual(refined_soundex('Lampart'), 'L78196')
166
        self.assertEqual(refined_soundex('Lamperd'), 'L78196')
167
        self.assertEqual(refined_soundex('Lampert'), 'L78196')
168
        self.assertEqual(refined_soundex('Lamport'), 'L78196')
169
        self.assertEqual(refined_soundex('Limbert'), 'L78196')
170
        self.assertEqual(refined_soundex('Lombard'), 'L78196')
171
        self.assertEqual(refined_soundex('Nolton'), 'N8768')
172
        self.assertEqual(refined_soundex('Noulton'), 'N8768')
173
174
        # http://trimc-nlp.blogspot.com/2015/03/the-soundex-algorithm.html
175
        self.assertEqual(refined_soundex('Craig'), 'C394')
176
        self.assertEqual(refined_soundex('Crag'), 'C394')
177
        self.assertEqual(refined_soundex('Crejg'), 'C394')
178
        self.assertEqual(refined_soundex('Creig'), 'C394')
179
        self.assertEqual(refined_soundex('Craigg'), 'C394')
180
        self.assertEqual(refined_soundex('Craug'), 'C394')
181
        self.assertEqual(refined_soundex('Craiggg'), 'C394')
182
        self.assertEqual(refined_soundex('Creg'), 'C394')
183
        self.assertEqual(refined_soundex('Cregg'), 'C394')
184
        self.assertEqual(refined_soundex('Creag'), 'C394')
185
        self.assertEqual(refined_soundex('Greg'), 'G494')
186
        self.assertEqual(refined_soundex('Gregg'), 'G494')
187
        self.assertEqual(refined_soundex('Graig'), 'G494')
188
        self.assertEqual(refined_soundex('Greig'), 'G494')
189
        self.assertEqual(refined_soundex('Greggg'), 'G494')
190
        self.assertEqual(refined_soundex('Groeg'), 'G494')
191
        self.assertEqual(refined_soundex('Graj'), 'G494')
192
        self.assertEqual(refined_soundex('Grej'), 'G494')
193
        self.assertEqual(refined_soundex('Grreg'), 'G494')
194
        self.assertEqual(refined_soundex('Greag'), 'G494')
195
        self.assertEqual(refined_soundex('Grig'), 'G494')
196
        self.assertEqual(refined_soundex('Kregg'), 'K394')
197
        self.assertEqual(refined_soundex('Kraig'), 'K394')
198
        self.assertEqual(refined_soundex('Krag'), 'K394')
199
        self.assertEqual(refined_soundex('Kreig'), 'K394')
200
        self.assertEqual(refined_soundex('Krug'), 'K394')
201
        self.assertEqual(refined_soundex('Kreg'), 'K394')
202
        self.assertEqual(refined_soundex('Krieg'), 'K394')
203
        self.assertEqual(refined_soundex('Krijg'), 'K394')
204
205
        # Apache Commons test cases
206
        # http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/RefinedSoundexTest.java?view=markup
207
        self.assertEqual(refined_soundex('testing'), 'T63684')
208
        self.assertEqual(refined_soundex('TESTING'), 'T63684')
209
        self.assertEqual(refined_soundex('The'), 'T6')
210
        self.assertEqual(refined_soundex('quick'), 'Q53')
211
        self.assertEqual(refined_soundex('brown'), 'B198')
212
        self.assertEqual(refined_soundex('fox'), 'F25')
213
        self.assertEqual(refined_soundex('jumped'), 'J4816')
214
        self.assertEqual(refined_soundex('over'), 'O29')
215
        self.assertEqual(refined_soundex('the'), 'T6')
216
        self.assertEqual(refined_soundex('lazy'), 'L75')
217
        self.assertEqual(refined_soundex('dogs'), 'D643')
218
219
        # Test with retain_vowels=True
220
        # http://ntz-develop.blogspot.com/2011/03/phonetic-algorithms.html
221
        self.assertEqual(refined_soundex('Braz', retain_vowels=True), 'B1905')
222
        self.assertEqual(refined_soundex('Broz', retain_vowels=True), 'B1905')
223
        self.assertEqual(refined_soundex('Caren', retain_vowels=True),
224
                         'C30908')
225
        self.assertEqual(refined_soundex('Caron', retain_vowels=True),
226
                         'C30908')
227
        self.assertEqual(refined_soundex('Carren', retain_vowels=True),
228
                         'C30908')
229
        self.assertEqual(refined_soundex('Charon', retain_vowels=True),
230
                         'C30908')
231
        self.assertEqual(refined_soundex('Corain', retain_vowels=True),
232
                         'C30908')
233
        self.assertEqual(refined_soundex('Coram', retain_vowels=True),
234
                         'C30908')
235
        self.assertEqual(refined_soundex('Corran', retain_vowels=True),
236
                         'C30908')
237
        self.assertEqual(refined_soundex('Corrin', retain_vowels=True),
238
                         'C30908')
239
        self.assertEqual(refined_soundex('Corwin', retain_vowels=True),
240
                         'C30908')
241
        self.assertEqual(refined_soundex('Curran', retain_vowels=True),
242
                         'C30908')
243
        self.assertEqual(refined_soundex('Curreen', retain_vowels=True),
244
                         'C30908')
245
        self.assertEqual(refined_soundex('Currin', retain_vowels=True),
246
                         'C30908')
247
        self.assertEqual(refined_soundex('Currom', retain_vowels=True),
248
                         'C30908')
249
        self.assertEqual(refined_soundex('Currum', retain_vowels=True),
250
                         'C30908')
251
        self.assertEqual(refined_soundex('Curwen', retain_vowels=True),
252
                         'C30908')
253
        self.assertEqual(refined_soundex('Caren', retain_vowels=True),
254
                         'C30908')
255
        self.assertEqual(refined_soundex('Caren', retain_vowels=True),
256
                         'C30908')
257
        self.assertEqual(refined_soundex('Caren', retain_vowels=True),
258
                         'C30908')
259
        self.assertEqual(refined_soundex('Caren', retain_vowels=True),
260
                         'C30908')
261
        self.assertEqual(refined_soundex('Caren', retain_vowels=True),
262
                         'C30908')
263
        self.assertEqual(refined_soundex('Caren', retain_vowels=True),
264
                         'C30908')
265
        self.assertEqual(refined_soundex('Caren', retain_vowels=True),
266
                         'C30908')
267
        self.assertEqual(refined_soundex('Hairs', retain_vowels=True), 'H093')
268
        self.assertEqual(refined_soundex('Hark', retain_vowels=True), 'H093')
269
        self.assertEqual(refined_soundex('Hars', retain_vowels=True), 'H093')
270
        self.assertEqual(refined_soundex('Hayers', retain_vowels=True), 'H093')
271
        self.assertEqual(refined_soundex('Heers', retain_vowels=True), 'H093')
272
        self.assertEqual(refined_soundex('Hiers', retain_vowels=True), 'H093')
273
        self.assertEqual(refined_soundex('Lambard', retain_vowels=True),
274
                         'L7081096')
275
        self.assertEqual(refined_soundex('Lambart', retain_vowels=True),
276
                         'L7081096')
277
        self.assertEqual(refined_soundex('Lambert', retain_vowels=True),
278
                         'L7081096')
279
        self.assertEqual(refined_soundex('Lambird', retain_vowels=True),
280
                         'L7081096')
281
        self.assertEqual(refined_soundex('Lampaert', retain_vowels=True),
282
                         'L7081096')
283
        self.assertEqual(refined_soundex('Lampard', retain_vowels=True),
284
                         'L7081096')
285
        self.assertEqual(refined_soundex('Lampart', retain_vowels=True),
286
                         'L7081096')
287
        self.assertEqual(refined_soundex('Lamperd', retain_vowels=True),
288
                         'L7081096')
289
        self.assertEqual(refined_soundex('Lampert', retain_vowels=True),
290
                         'L7081096')
291
        self.assertEqual(refined_soundex('Lamport', retain_vowels=True),
292
                         'L7081096')
293
        self.assertEqual(refined_soundex('Limbert', retain_vowels=True),
294
                         'L7081096')
295
        self.assertEqual(refined_soundex('Lombard', retain_vowels=True),
296
                         'L7081096')
297
        self.assertEqual(refined_soundex('Nolton', retain_vowels=True),
298
                         'N807608')
299
        self.assertEqual(refined_soundex('Noulton', retain_vowels=True),
300
                         'N807608')
301
302
        # http://trimc-nlp.blogspot.com/2015/03/the-soundex-algorithm.html
303
        self.assertEqual(refined_soundex('Craig', retain_vowels=True), 'C3904')
304
        self.assertEqual(refined_soundex('Crag', retain_vowels=True), 'C3904')
305
        self.assertEqual(refined_soundex('Crejg', retain_vowels=True), 'C3904')
306
        self.assertEqual(refined_soundex('Creig', retain_vowels=True), 'C3904')
307
        self.assertEqual(refined_soundex('Craigg', retain_vowels=True),
308
                         'C3904')
309
        self.assertEqual(refined_soundex('Craug', retain_vowels=True), 'C3904')
310
        self.assertEqual(refined_soundex('Craiggg', retain_vowels=True),
311
                         'C3904')
312
        self.assertEqual(refined_soundex('Creg', retain_vowels=True), 'C3904')
313
        self.assertEqual(refined_soundex('Cregg', retain_vowels=True), 'C3904')
314
        self.assertEqual(refined_soundex('Creag', retain_vowels=True), 'C3904')
315
        self.assertEqual(refined_soundex('Greg', retain_vowels=True), 'G4904')
316
        self.assertEqual(refined_soundex('Gregg', retain_vowels=True), 'G4904')
317
        self.assertEqual(refined_soundex('Graig', retain_vowels=True), 'G4904')
318
        self.assertEqual(refined_soundex('Greig', retain_vowels=True), 'G4904')
319
        self.assertEqual(refined_soundex('Greggg', retain_vowels=True),
320
                         'G4904')
321
        self.assertEqual(refined_soundex('Groeg', retain_vowels=True), 'G4904')
322
        self.assertEqual(refined_soundex('Graj', retain_vowels=True), 'G4904')
323
        self.assertEqual(refined_soundex('Grej', retain_vowels=True), 'G4904')
324
        self.assertEqual(refined_soundex('Grreg', retain_vowels=True), 'G4904')
325
        self.assertEqual(refined_soundex('Greag', retain_vowels=True), 'G4904')
326
        self.assertEqual(refined_soundex('Grig', retain_vowels=True), 'G4904')
327
        self.assertEqual(refined_soundex('Kregg', retain_vowels=True), 'K3904')
328
        self.assertEqual(refined_soundex('Kraig', retain_vowels=True), 'K3904')
329
        self.assertEqual(refined_soundex('Krag', retain_vowels=True), 'K3904')
330
        self.assertEqual(refined_soundex('Kreig', retain_vowels=True), 'K3904')
331
        self.assertEqual(refined_soundex('Krug', retain_vowels=True), 'K3904')
332
        self.assertEqual(refined_soundex('Kreg', retain_vowels=True), 'K3904')
333
        self.assertEqual(refined_soundex('Krieg', retain_vowels=True), 'K3904')
334
        self.assertEqual(refined_soundex('Krijg', retain_vowels=True), 'K3904')
335
336
        # Apache Commons test cases
337
        # http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/RefinedSoundexTest.java?view=markup
338
        self.assertEqual(refined_soundex('testing', retain_vowels=True),
339
                         'T6036084')
340
        self.assertEqual(refined_soundex('TESTING', retain_vowels=True),
341
                         'T6036084')
342
        self.assertEqual(refined_soundex('The', retain_vowels=True), 'T60')
343
        self.assertEqual(refined_soundex('quick', retain_vowels=True), 'Q503')
344
        self.assertEqual(refined_soundex('brown', retain_vowels=True), 'B1908')
345
        self.assertEqual(refined_soundex('fox', retain_vowels=True), 'F205')
346
        self.assertEqual(refined_soundex('jumped', retain_vowels=True),
347
                         'J408106')
348
        self.assertEqual(refined_soundex('over', retain_vowels=True), 'O0209')
349
        self.assertEqual(refined_soundex('the', retain_vowels=True), 'T60')
350
        self.assertEqual(refined_soundex('lazy', retain_vowels=True), 'L7050')
351
        self.assertEqual(refined_soundex('dogs', retain_vowels=True), 'D6043')
352
353
        # length tests
354
        self.assertEqual(refined_soundex('testing', max_length=4,
355
                                         zero_pad=True), 'T636')
356
        self.assertEqual(refined_soundex('TESTING', max_length=4,
357
                                         zero_pad=True), 'T636')
358
        self.assertEqual(refined_soundex('The', max_length=4, zero_pad=True),
359
                         'T600')
360
        self.assertEqual(refined_soundex('quick', max_length=4, zero_pad=True),
361
                         'Q530')
362
        self.assertEqual(refined_soundex('brown', max_length=4, zero_pad=True),
363
                         'B198')
364
        self.assertEqual(refined_soundex('fox', max_length=4, zero_pad=True),
365
                         'F250')
366
        self.assertEqual(refined_soundex('jumped', max_length=4,
367
                                         zero_pad=True), 'J481')
368
        self.assertEqual(refined_soundex('over', max_length=4, zero_pad=True),
369
                         'O290')
370
        self.assertEqual(refined_soundex('the', max_length=4, zero_pad=True),
371
                         'T600')
372
        self.assertEqual(refined_soundex('lazy', max_length=4, zero_pad=True),
373
                         'L750')
374
        self.assertEqual(refined_soundex('dogs', max_length=4, zero_pad=True),
375
                         'D643')
376
        self.assertEqual(refined_soundex('The', max_length=4),
377
                         'T6')
378
        self.assertEqual(refined_soundex('quick', max_length=4),
379
                         'Q53')
380
        self.assertEqual(refined_soundex('brown', max_length=4),
381
                         'B198')
382
        self.assertEqual(refined_soundex('fox', max_length=4),
383
                         'F25')
384
        self.assertEqual(refined_soundex('jumped', max_length=4),
385
                         'J481')
386
        self.assertEqual(refined_soundex('over', max_length=4),
387
                         'O29')
388
        self.assertEqual(refined_soundex('the', max_length=4),
389
                         'T6')
390
        self.assertEqual(refined_soundex('lazy', max_length=4),
391
                         'L75')
392
        self.assertEqual(refined_soundex('dogs', max_length=4),
393
                         'D643')
394
395
396
class FuzzySoundexTestCases(unittest.TestCase):
397
    """Test Fuzzy Soundex functions.
398
399
    test cases for abydos.phonetic.fuzzy_soundex
400
    """
401
402
    def test_fuzzy_soundex(self):
403
        """Test abydos.phonetic.fuzzy_soundex."""
404
        self.assertEqual(fuzzy_soundex(''), '00000')
405
        # http://wayback.archive.org/web/20100629121128/http://www.ir.iit.edu/publications/downloads/IEEESoundexV5.pdf
406
        self.assertEqual(fuzzy_soundex('Kristen'), 'K6935')
407
        self.assertEqual(fuzzy_soundex('Krissy'), 'K6900')
408
        self.assertEqual(fuzzy_soundex('Christen'), 'K6935')
409
410
        # http://books.google.com/books?id=LZrT6eWf9NMC&lpg=PA76&ots=Tex3FqNwGP&dq=%22phonix%20algorithm%22&pg=PA75#v=onepage&q=%22phonix%20algorithm%22&f=false
411
        self.assertEqual(fuzzy_soundex('peter', 4), 'P360')
412
        self.assertEqual(fuzzy_soundex('pete', 4), 'P300')
413
        self.assertEqual(fuzzy_soundex('pedro', 4), 'P360')
414
        self.assertEqual(fuzzy_soundex('stephen', 4), 'S315')
415
        self.assertEqual(fuzzy_soundex('steve', 4), 'S310')
416
        self.assertEqual(fuzzy_soundex('smith', 4), 'S530')
417
        self.assertEqual(fuzzy_soundex('smythe', 4), 'S530')
418
        self.assertEqual(fuzzy_soundex('gail', 4), 'G400')
419
        self.assertEqual(fuzzy_soundex('gayle', 4), 'G400')
420
        self.assertEqual(fuzzy_soundex('christine', 4), 'K693')
421
        self.assertEqual(fuzzy_soundex('christina', 4), 'K693')
422
        self.assertEqual(fuzzy_soundex('kristina', 4), 'K693')
423
424
        # etc. (for code coverage)
425
        self.assertEqual(fuzzy_soundex('Wight'), 'W3000')
426
        self.assertEqual(fuzzy_soundex('Hardt'), 'H6000')
427
        self.assertEqual(fuzzy_soundex('Knight'), 'N3000')
428
        self.assertEqual(fuzzy_soundex('Czech'), 'S7000')
429
        self.assertEqual(fuzzy_soundex('Tsech'), 'S7000')
430
        self.assertEqual(fuzzy_soundex('gnomic'), 'N5900')
431
        self.assertEqual(fuzzy_soundex('Wright'), 'R3000')
432
        self.assertEqual(fuzzy_soundex('Hrothgar'), 'R3760')
433
        self.assertEqual(fuzzy_soundex('Hwaet'), 'W3000')
434
        self.assertEqual(fuzzy_soundex('Grant'), 'G6300')
435
        self.assertEqual(fuzzy_soundex('Hart'), 'H6000')
436
        self.assertEqual(fuzzy_soundex('Hardt'), 'H6000')
437
438
        # max_length bounds tests
439
        self.assertEqual(fuzzy_soundex('Niall', max_length=-1),
440
                         'N4000000000000000000000000000000000000000000000000' +
441
                         '00000000000000')
442
        self.assertEqual(fuzzy_soundex('Niall', max_length=0), 'N400')
443
444
        # zero_pad tests
445
        self.assertEqual(fuzzy_soundex('Niall', max_length=-1,
446
                                       zero_pad=False), 'N4')
447
        self.assertEqual(fuzzy_soundex('Niall', max_length=0,
448
                                       zero_pad=False), 'N4')
449
        self.assertEqual(fuzzy_soundex('Niall', max_length=0,
450
                                       zero_pad=True), 'N400')
451
        self.assertEqual(fuzzy_soundex('', max_length=4, zero_pad=False), '0')
452
        self.assertEqual(fuzzy_soundex('', max_length=4, zero_pad=True),
453
                         '0000')
454
455
456
class PhonexTestCases(unittest.TestCase):
457
    """Test Phonex functions.
458
459
    test cases for abydos.phonetic.phonex
460
    """
461
462
    def test_phonex(self):
463
        """Test abydos.phonetic.phonex."""
464
        self.assertEqual(phonex(''), '0000')
465
466
        # http://homepages.cs.ncl.ac.uk/brian.randell/Genealogy/NameMatching.pdf
467
        self.assertEqual(phonex('Ewell'), 'A400')
468
        self.assertEqual(phonex('Filp'), 'F100')
469
        self.assertEqual(phonex('Heames'), 'A500')
470
        self.assertEqual(phonex('Kneves'), 'N100')
471
        self.assertEqual(phonex('River'), 'R160')
472
        self.assertEqual(phonex('Corley'), 'C400')
473
        self.assertEqual(phonex('Carton'), 'C350')
474
        self.assertEqual(phonex('Cachpole'), 'C214')
475
476
        self.assertEqual(phonex('Ewell'), phonex('Ule'))
477
        self.assertEqual(phonex('Filp'), phonex('Philp'))
478
        self.assertEqual(phonex('Yule'), phonex('Ewell'))
479
        self.assertEqual(phonex('Heames'), phonex('Eames'))
480
        self.assertEqual(phonex('Kneves'), phonex('Neves'))
481
        self.assertEqual(phonex('River'), phonex('Rivers'))
482
        self.assertEqual(phonex('Corley'), phonex('Coley'))
483
        self.assertEqual(phonex('Carton'), phonex('Carlton'))
484
        self.assertEqual(phonex('Cachpole'), phonex('Catchpole'))
485
486
        # etc. (for code coverage)
487
        self.assertEqual(phonex('Saxon'), 'S250')
488
        self.assertEqual(phonex('Wright'), 'R230')
489
        self.assertEqual(phonex('Ai'), 'A000')
490
        self.assertEqual(phonex('Barth'), 'B300')
491
        self.assertEqual(phonex('Perry'), 'B600')
492
        self.assertEqual(phonex('Garth'), 'G300')
493
        self.assertEqual(phonex('Jerry'), 'G600')
494
        self.assertEqual(phonex('Gerry'), 'G600')
495
        self.assertEqual(phonex('Camden'), 'C500')
496
        self.assertEqual(phonex('Ganges'), 'G500')
497
        self.assertEqual(phonex('A-1'), 'A000')
498
499
        # max_length bounds tests
500
        self.assertEqual(phonex('Niall', max_length=-1),
501
                         'N4000000000000000000000000000000000000000000000000' +
502
                         '00000000000000')
503
        self.assertEqual(phonex('Niall', max_length=0), 'N400')
504
505
        # zero_pad tests
506
        self.assertEqual(phonex('Niall', max_length=0, zero_pad=False), 'N4')
507
        self.assertEqual(phonex('Niall', max_length=0, zero_pad=False), 'N4')
508
        self.assertEqual(phonex('Niall', max_length=0, zero_pad=True), 'N400')
509
        self.assertEqual(phonex('', max_length=4, zero_pad=False), '0')
510
        self.assertEqual(phonex('', max_length=4, zero_pad=True), '0000')
511
512
513
class PhonixTestCases(unittest.TestCase):
514
    """Test Phonix functions.
515
516
    test cases for abydos.phonetic.phonix
517
    """
518
519
    def test_phonix(self):
520
        """Test abydos.phonetic.phonix."""
521
        self.assertEqual(phonix(''), '0000')
522
523
        # http://cpansearch.perl.org/src/MAROS/Text-Phonetic-2.05/t/007_phonix.t
524
        self.assertEqual(phonix('Müller'), 'M400')
525
        self.assertEqual(phonix('schneider'), 'S530')
526
        self.assertEqual(phonix('fischer'), 'F800')
527
        self.assertEqual(phonix('weber'), 'W100')
528
        self.assertEqual(phonix('meyer'), 'M000')
529
        self.assertEqual(phonix('wagner'), 'W250')
530
        self.assertEqual(phonix('schulz'), 'S480')
531
        self.assertEqual(phonix('becker'), 'B200')
532
        self.assertEqual(phonix('hoffmann'), 'H755')
533
        self.assertEqual(phonix('schäfer'), 'S700')
534
        self.assertEqual(phonix('schmidt'), 'S530')
535
536
        # http://cpansearch.perl.org/src/MAROS/Text-Phonetic-2.05/t/007_phonix.t:
537
        # testcases from Wais Module
538
        self.assertEqual(phonix('computer'), 'K513')
539
        self.assertEqual(phonix('computers'), 'K513')
540
        self.assertEqual(phonix('computers', 5), 'K5138')
541
        self.assertEqual(phonix('pfeifer'), 'F700')
542
        self.assertEqual(phonix('pfeiffer'), 'F700')
543
        self.assertEqual(phonix('knight'), 'N300')
544
        self.assertEqual(phonix('night'), 'N300')
545
546
        # http://cpansearch.perl.org/src/MAROS/Text-Phonetic-2.05/t/007_phonix.t:
547
        # testcases from
548
        # http://www.cl.uni-heidelberg.de/~bormann/documents/phono/
549
        # They use a sliglty different algorithm (first char is not included in
550
        # num code here)
551
        self.assertEqual(phonix('wait'), 'W300')
552
        self.assertEqual(phonix('weight'), 'W300')
553
        self.assertEqual(phonix('gnome'), 'N500')
554
        self.assertEqual(phonix('noam'), 'N500')
555
        self.assertEqual(phonix('rees'), 'R800')
556
        self.assertEqual(phonix('reece'), 'R800')
557
        self.assertEqual(phonix('yaeger'), 'v200')
558
559
        # http://books.google.com/books?id=xtWPI7Is9wIC&lpg=PA29&ots=DXhaL7ZkvK&dq=phonix%20gadd&pg=PA29#v=onepage&q=phonix%20gadd&f=false
560
        self.assertEqual(phonix('alam'), 'v450')
561
        self.assertEqual(phonix('berkpakaian'), 'B212')
562
        self.assertEqual(phonix('capaian'), 'K150')
563
564
        # http://books.google.com/books?id=LZrT6eWf9NMC&lpg=PA76&ots=Tex3FqNwGP&dq=%22phonix%20algorithm%22&pg=PA75#v=onepage&q=%22phonix%20algorithm%22&f=false
565
        self.assertEqual(phonix('peter'), 'P300')
566
        self.assertEqual(phonix('pete'), 'P300')
567
        self.assertEqual(phonix('pedro'), 'P360')
568
        self.assertEqual(phonix('stephen'), 'S375')
569
        self.assertEqual(phonix('steve'), 'S370')
570
        self.assertEqual(phonix('smith'), 'S530')
571
        self.assertEqual(phonix('smythe'), 'S530')
572
        self.assertEqual(phonix('gail'), 'G400')
573
        self.assertEqual(phonix('gayle'), 'G400')
574
        self.assertEqual(phonix('christine'), 'K683')
575
        self.assertEqual(phonix('christina'), 'K683')
576
        self.assertEqual(phonix('kristina'), 'K683')
577
578
        # max_length bounds tests
579
        self.assertEqual(phonix('Niall', max_length=-1), 'N4'+'0'*62)
580
        self.assertEqual(phonix('Niall', max_length=0), 'N400')
581
582
        # zero_pad tests
583
        self.assertEqual(phonix('Niall', max_length=-1, zero_pad=False), 'N4')
584
        self.assertEqual(phonix('Niall', max_length=0, zero_pad=False),
585
                         'N4')
586
        self.assertEqual(phonix('Niall', max_length=0, zero_pad=True),
587
                         'N400')
588
        self.assertEqual(phonix('', max_length=4, zero_pad=False), '0')
589
        self.assertEqual(phonix('', max_length=4, zero_pad=True), '0000')
590
591
592
class LeinTestCases(unittest.TestCase):
593
    """Test Lein functions.
594
595
    test cases for abydos.phonetic.lein
596
    """
597
598
    def test_lein(self):
599
        """Test abydos.phonetic.lein."""
600
        self.assertEqual(lein(''), '0000')
601
602
        # https://naldc.nal.usda.gov/download/27833/PDF
603
        self.assertEqual(lein('Dubose'), 'D450')
604
        self.assertEqual(lein('Dubs'), 'D450')
605
        self.assertEqual(lein('Dubbs'), 'D450')
606
        self.assertEqual(lein('Doviak'), 'D450')
607
        self.assertEqual(lein('Dubke'), 'D450')
608
        self.assertEqual(lein('Dubus'), 'D450')
609
        self.assertEqual(lein('Dubois'), 'D450')
610
        self.assertEqual(lein('Duboise'), 'D450')
611
        self.assertEqual(lein('Doubek'), 'D450')
612
        self.assertEqual(lein('Defigh'), 'D450')
613
        self.assertEqual(lein('Defazio'), 'D450')
614
        self.assertEqual(lein('Debaca'), 'D450')
615
        self.assertEqual(lein('Dabbs'), 'D450')
616
        self.assertEqual(lein('Davies'), 'D450')
617
        self.assertEqual(lein('Dubukey'), 'D450')
618
        self.assertEqual(lein('Debus'), 'D450')
619
        self.assertEqual(lein('Debose'), 'D450')
620
        self.assertEqual(lein('Daves'), 'D450')
621
        self.assertEqual(lein('Dipiazza'), 'D450')
622
        self.assertEqual(lein('Dobbs'), 'D450')
623
        self.assertEqual(lein('Dobak'), 'D450')
624
        self.assertEqual(lein('Dobis'), 'D450')
625
        self.assertEqual(lein('Dobish'), 'D450')
626
        self.assertEqual(lein('Doepke'), 'D450')
627
        self.assertEqual(lein('Divish'), 'D450')
628
        self.assertEqual(lein('Dobosh'), 'D450')
629
        self.assertEqual(lein('Dupois'), 'D450')
630
        self.assertEqual(lein('Dufek'), 'D450')
631
        self.assertEqual(lein('Duffek'), 'D450')
632
        self.assertEqual(lein('Dupuis'), 'D450')
633
        self.assertEqual(lein('Dupas'), 'D450')
634
        self.assertEqual(lein('Devese'), 'D450')
635
        self.assertEqual(lein('Devos'), 'D450')
636
        self.assertEqual(lein('Deveaux'), 'D450')
637
        self.assertEqual(lein('Devies'), 'D450')
638
639
        self.assertEqual(lein('Sand'), 'S210')
640
        self.assertEqual(lein('Sandau'), 'S210')
641
        self.assertEqual(lein('Sande'), 'S210')
642
        self.assertEqual(lein('Sandia'), 'S210')
643
        self.assertEqual(lein('Sando'), 'S210')
644
        self.assertEqual(lein('Sandoe'), 'S210')
645
        self.assertEqual(lein('Sandy'), 'S210')
646
        self.assertEqual(lein('Santee'), 'S210')
647
        self.assertEqual(lein('Santi'), 'S210')
648
        self.assertEqual(lein('Santo'), 'S210')
649
        self.assertEqual(lein('Send'), 'S210')
650
        self.assertEqual(lein('Sennet'), 'S210')
651
        self.assertEqual(lein('Shemoit'), 'S210')
652
        self.assertEqual(lein('Shenot'), 'S210')
653
        self.assertEqual(lein('Shumate'), 'S210')
654
        self.assertEqual(lein('Simmet'), 'S210')
655
        self.assertEqual(lein('Simot'), 'S210')
656
        self.assertEqual(lein('Sineath'), 'S210')
657
        self.assertEqual(lein('Sinnott'), 'S210')
658
        self.assertEqual(lein('Sintay'), 'S210')
659
        self.assertEqual(lein('Smead'), 'S210')
660
        self.assertEqual(lein('Smeda'), 'S210')
661
        self.assertEqual(lein('Smit'), 'S210')
662
663
        # Additional tests from @Yomguithereal's talisman
664
        # https://github.com/Yomguithereal/talisman/blob/master/test/phonetics/lein.js
665
        self.assertEqual(lein('Guillaume'), 'G320')
666
        self.assertEqual(lein('Arlène'), 'A332')
667
        self.assertEqual(lein('Lüdenscheidt'), 'L125')
668
669
        # Coverage
670
        self.assertEqual(lein('Lüdenscheidt', zero_pad=False), 'L125')
671
        self.assertEqual(lein('Smith', zero_pad=False), 'S21')
672
673
674
class PSHPSoundexTestCases(unittest.TestCase):
675
    """Test PSHP Soundex functions.
676
677
    test cases for abydos.phonetic.pshp_soundex_last & pshp_soundex_first
678
    """
679
680
    def test_pshp_soundex_last(self):
681
        """Test abydos.phonetic.pshp_soundex_last."""
682
        # Base case
683
        self.assertEqual(pshp_soundex_last(''), '0000')
684
685
        self.assertEqual(pshp_soundex_last('JAMES'), 'J500')
686
        self.assertEqual(pshp_soundex_last('JOHN'), 'J500')
687
        self.assertEqual(pshp_soundex_last('PAT'), 'P300')
688
        self.assertEqual(pshp_soundex_last('PETER'), 'P350')
689
690
        self.assertEqual(pshp_soundex_last('Smith'), 'S530')
691
        self.assertEqual(pshp_soundex_last('van Damme'), 'D500')
692
        self.assertEqual(pshp_soundex_last('MacNeil'), 'M400')
693
        self.assertEqual(pshp_soundex_last('McNeil'), 'M400')
694
        self.assertEqual(pshp_soundex_last('Edwards'), 'A353')
695
        self.assertEqual(pshp_soundex_last('Gin'), 'J500')
696
        self.assertEqual(pshp_soundex_last('Cillian'), 'S450')
697
        self.assertEqual(pshp_soundex_last('Christopher'), 'K523')
698
        self.assertEqual(pshp_soundex_last('Carme'), 'K500')
699
        self.assertEqual(pshp_soundex_last('Knight'), 'N230')
700
        self.assertEqual(pshp_soundex_last('Phillip'), 'F410')
701
        self.assertEqual(pshp_soundex_last('Wein'), 'V500')
702
        self.assertEqual(pshp_soundex_last('Wagner', german=True), 'V255')
703
        self.assertEqual(pshp_soundex_last('Pence'), 'P500')
704
        self.assertEqual(pshp_soundex_last('Less'), 'L000')
705
        self.assertEqual(pshp_soundex_last('Simpson'), 'S525')
706
        self.assertEqual(pshp_soundex_last('Samson'), 'S250')
707
        self.assertEqual(pshp_soundex_last('Lang'), 'L500')
708
        self.assertEqual(pshp_soundex_last('Hagan'), 'H500')
709
        self.assertEqual(pshp_soundex_last('Cartes', german=True), 'K500')
710
        self.assertEqual(pshp_soundex_last('Kats', german=True), 'K000')
711
        self.assertEqual(pshp_soundex_last('Schultze', german=True), 'S400')
712
        self.assertEqual(pshp_soundex_last('Alze', german=True), 'A400')
713
        self.assertEqual(pshp_soundex_last('Galz', german=True), 'G400')
714
        self.assertEqual(pshp_soundex_last('Alte', german=True), 'A400')
715
        self.assertEqual(pshp_soundex_last('Alte', max_length=-1), 'A43')
716
        self.assertEqual(pshp_soundex_last('Altemaier', max_length=-1),
717
                         'A4355')
718
719
    def test_pshp_soundex_first(self):
720
        """Test abydos.phonetic.pshp_soundex_first."""
721
        # Base case
722
        self.assertEqual(pshp_soundex_first(''), '0000')
723
724
        # Examples given in defining paper (Hershberg, et al. 1976)
725
        self.assertEqual(pshp_soundex_first('JAMES'), 'J700')
726
        self.assertEqual(pshp_soundex_first('JOHN'), 'J500')
727
        self.assertEqual(pshp_soundex_first('PAT'), 'P700')
728
        self.assertEqual(pshp_soundex_first('PETER'), 'P300')
729
730
        # Additions for coverage
731
        self.assertEqual(pshp_soundex_first('Giles'), 'J400')
732
        self.assertEqual(pshp_soundex_first('Cy'), 'S000')
733
        self.assertEqual(pshp_soundex_first('Chris'), 'K500')
734
        self.assertEqual(pshp_soundex_first('Caleb'), 'K400')
735
        self.assertEqual(pshp_soundex_first('Knabe'), 'N100')
736
        self.assertEqual(pshp_soundex_first('Phil'), 'F400')
737
        self.assertEqual(pshp_soundex_first('Wieland'), 'V400')
738
        self.assertEqual(pshp_soundex_first('Wayne', german=True), 'V500')
739
        self.assertEqual(pshp_soundex_first('Christopher', max_length=-1),
740
                         'K5')
741
        self.assertEqual(pshp_soundex_first('Asdaananndsjsjasd',
742
                                            max_length=-1), 'A23553223')
743
        self.assertEqual(pshp_soundex_first('Asdaananndsjsjasd'), 'A235')
744
745
746
if __name__ == '__main__':
747
    unittest.main()
748