Total Complexity | 124 |
Total Lines | 6533 |
Duplicated Lines | 1.1 % |
Changes | 0 |
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like tests.test_phonetic often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | # -*- coding: utf-8 -*- |
||
2 | |||
3 | # Copyright 2014-2018 by Christopher C. Little. |
||
4 | # This file is part of Abydos. |
||
5 | # |
||
6 | # Abydos is free software: you can redistribute it and/or modify |
||
7 | # it under the terms of the GNU General Public License as published by |
||
8 | # the Free Software Foundation, either version 3 of the License, or |
||
9 | # (at your option) any later version. |
||
10 | # |
||
11 | # Abydos is distributed in the hope that it will be useful, |
||
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
14 | # GNU General Public License for more details. |
||
15 | # |
||
16 | # You should have received a copy of the GNU General Public License |
||
17 | # along with Abydos. If not, see <http://www.gnu.org/licenses/>. |
||
18 | |||
19 | """abydos.tests.test_phonetic. |
||
20 | |||
21 | This module contains unit tests for abydos.phonetic |
||
22 | """ |
||
23 | |||
24 | from __future__ import unicode_literals |
||
25 | |||
26 | import codecs |
||
27 | import math |
||
28 | import os |
||
29 | import unittest |
||
30 | from random import random |
||
31 | |||
32 | from abydos._bm import _bm_apply_rule_if_compat, _bm_expand_alternates, \ |
||
33 | _bm_language, _bm_normalize_lang_attrs, _bm_phonetic_number, \ |
||
34 | _bm_remove_dupes |
||
35 | from abydos._bmdata import L_ANY, L_CYRILLIC, L_CZECH, L_DUTCH, L_ENGLISH, \ |
||
36 | L_FRENCH, L_GERMAN, L_GREEK, L_GREEKLATIN, L_HEBREW, L_HUNGARIAN, \ |
||
37 | L_ITALIAN, L_LATVIAN, L_POLISH, L_PORTUGUESE, L_ROMANIAN, L_SPANISH, \ |
||
38 | L_TURKISH |
||
39 | from abydos.phonetic import alpha_sis, bmpm, caverphone, davidson, \ |
||
40 | dm_soundex, dolby, double_metaphone, eudex, fonem, fuzzy_soundex, haase_phonetik, \ |
||
41 | henry_early, koelner_phonetik, koelner_phonetik_alpha, koelner_phonetik_num_to_alpha, \ |
||
42 | lein, metaphone, mra, norphone, nysiis, onca, parmar_kumbharana, phonem, \ |
||
43 | phonet, phonetic_spanish, phonex, phonix, pshp_soundex_first, pshp_soundex_last, \ |
||
44 | refined_soundex, reth_schek_phonetik, roger_root, russell_index, russell_index_alpha, \ |
||
45 | russell_index_num_to_alpha, sfinxbis, sound_d, soundex, soundex_br, \ |
||
46 | spanish_metaphone, spfc, statistics_canada |
||
47 | |||
48 | from six import text_type |
||
49 | |||
50 | TESTDIR = os.path.dirname(__file__) |
||
51 | |||
52 | EXTREME_TEST = False # Set to True to test EVERY single case (NB: takes hours) |
||
53 | ALLOW_RANDOM = True # Set to False to skip all random tests |
||
54 | |||
55 | if not EXTREME_TEST and os.path.isfile(TESTDIR + '/EXTREME_TEST'): |
||
56 | # EXTREME_TEST file detected -- switching to EXTREME_TEST mode... |
||
57 | EXTREME_TEST = True |
||
58 | |||
59 | |||
60 | def one_in(inverse_probability): |
||
61 | """Return whether to run a test. |
||
62 | |||
63 | Return True if: |
||
64 | EXTREME_TEST is True |
||
65 | OR |
||
66 | (ALLOW_RANDOM is False |
||
67 | AND |
||
68 | random.random() * inverse_probability < 1 |
||
69 | Otherwise return False |
||
70 | """ |
||
71 | if EXTREME_TEST: |
||
72 | return True |
||
73 | elif ALLOW_RANDOM and random() * inverse_probability < 1: # noqa: S311 |
||
74 | return True |
||
75 | else: |
||
76 | return False |
||
77 | |||
78 | |||
79 | class RussellIndexTestCases(unittest.TestCase): |
||
80 | """Test Russel Index functions. |
||
81 | |||
82 | test cases for abydos.phonetic.russell_index, |
||
83 | .russell_index_num_to_alpha, & .russell_index_alpha |
||
84 | """ |
||
85 | |||
86 | def test_russel_index(self): |
||
87 | """Test abydos.phonetic.russell_index.""" |
||
88 | self.assertTrue(math.isnan(russell_index(''))) |
||
89 | self.assertTrue(math.isnan(russell_index('H'))) |
||
90 | self.assertEqual(russell_index('Hoppa'), 12) |
||
91 | self.assertEqual(russell_index('Hopley'), 125) |
||
92 | self.assertEqual(russell_index('Highfield'), 1254) |
||
93 | self.assertEqual(russell_index('Wright'), 814) |
||
94 | self.assertEqual(russell_index('Carter'), 31848) |
||
95 | self.assertEqual(russell_index('Hopf'), 12) |
||
96 | self.assertEqual(russell_index('Hay'), 1) |
||
97 | self.assertEqual(russell_index('Haas'), 1) |
||
98 | self.assertEqual(russell_index('Meyers'), 618) |
||
99 | self.assertEqual(russell_index('Myers'), 618) |
||
100 | self.assertEqual(russell_index('Meyer'), 618) |
||
101 | self.assertEqual(russell_index('Myer'), 618) |
||
102 | self.assertEqual(russell_index('Mack'), 613) |
||
103 | self.assertEqual(russell_index('Knack'), 3713) |
||
104 | |||
105 | def test_russel_index_n2a(self): |
||
106 | """Test abydos.phonetic.russell_index_num_to_alpha.""" |
||
107 | self.assertEqual(russell_index_num_to_alpha(0), '') |
||
108 | self.assertEqual(russell_index_num_to_alpha(''), '') |
||
109 | self.assertEqual(russell_index_num_to_alpha(float('NaN')), '') |
||
110 | self.assertEqual(russell_index_num_to_alpha(123456789), 'ABCDLMNR') |
||
111 | self.assertEqual(russell_index_num_to_alpha('0123456789'), 'ABCDLMNR') |
||
112 | |||
113 | def test_russel_index_alpha(self): |
||
114 | """Test abydos.phonetic.russell_index_alpha.""" |
||
115 | self.assertEqual(russell_index_alpha(''), '') |
||
116 | self.assertEqual(russell_index_alpha('H'), '') |
||
117 | self.assertEqual(russell_index_alpha('Hoppa'), 'AB') |
||
118 | self.assertEqual(russell_index_alpha('Hopley'), 'ABL') |
||
119 | self.assertEqual(russell_index_alpha('Highfield'), 'ABLD') |
||
120 | self.assertEqual(russell_index_alpha('Wright'), 'RAD') |
||
121 | self.assertEqual(russell_index_alpha('Carter'), 'CARDR') |
||
122 | self.assertEqual(russell_index_alpha('Hopf'), 'AB') |
||
123 | self.assertEqual(russell_index_alpha('Hay'), 'A') |
||
124 | self.assertEqual(russell_index_alpha('Haas'), 'A') |
||
125 | self.assertEqual(russell_index_alpha('Meyers'), 'MAR') |
||
126 | self.assertEqual(russell_index_alpha('Myers'), 'MAR') |
||
127 | self.assertEqual(russell_index_alpha('Meyer'), 'MAR') |
||
128 | self.assertEqual(russell_index_alpha('Myer'), 'MAR') |
||
129 | self.assertEqual(russell_index_alpha('Mack'), 'MAC') |
||
130 | self.assertEqual(russell_index_alpha('Knack'), 'CNAC') |
||
131 | |||
132 | |||
133 | class SoundexTestCases(unittest.TestCase): |
||
134 | """Test Soundex functions. |
||
135 | |||
136 | test cases for abydos.phonetic.soundex, .refined_soundex, |
||
137 | & .dm_soundex |
||
138 | """ |
||
139 | |||
140 | def test_soundex(self): |
||
141 | """Test abydos.phonetic.soundex.""" |
||
142 | self.assertEqual(soundex(''), '0000') |
||
143 | |||
144 | # https://archive.org/stream/accessingindivid00moor#page/14/mode/2up |
||
145 | self.assertEqual(soundex('Euler'), 'E460') |
||
146 | self.assertEqual(soundex('Gauss'), 'G200') |
||
147 | self.assertEqual(soundex('Hilbert'), 'H416') |
||
148 | self.assertEqual(soundex('Knuth'), 'K530') |
||
149 | self.assertEqual(soundex('Lloyd'), 'L300') |
||
150 | self.assertEqual(soundex('Lukasieicz'), 'L222') |
||
151 | self.assertEqual(soundex('Ellery'), 'E460') |
||
152 | self.assertEqual(soundex('Ghosh'), 'G200') |
||
153 | self.assertEqual(soundex('Heilbronn'), 'H416') |
||
154 | self.assertEqual(soundex('Kant'), 'K530') |
||
155 | self.assertEqual(soundex('Ladd'), 'L300') |
||
156 | self.assertEqual(soundex('Lissajous'), 'L222') |
||
157 | self.assertEqual(soundex('Rogers'), 'R262') |
||
158 | self.assertEqual(soundex('Rodgers'), 'R326') |
||
159 | self.assertNotEquals(soundex('Rogers'), soundex('Rodgers')) |
||
160 | self.assertNotEquals(soundex('Sinclair'), soundex('St. Clair')) |
||
161 | self.assertNotEquals(soundex('Tchebysheff'), soundex('Chebyshev')) |
||
162 | |||
163 | # http://creativyst.com/Doc/Articles/SoundEx1/SoundEx1.htm#Related |
||
164 | self.assertEqual(soundex('Htacky'), 'H320') |
||
165 | self.assertEqual(soundex('Atacky'), 'A320') |
||
166 | self.assertEqual(soundex('Schmit'), 'S530') |
||
167 | self.assertEqual(soundex('Schneider'), 'S536') |
||
168 | self.assertEqual(soundex('Pfister'), 'P236') |
||
169 | self.assertEqual(soundex('Ashcroft'), 'A261') |
||
170 | self.assertEqual(soundex('Asicroft'), 'A226') |
||
171 | |||
172 | # https://en.wikipedia.org/wiki/Soundex |
||
173 | self.assertEqual(soundex('Robert'), 'R163') |
||
174 | self.assertEqual(soundex('Rupert'), 'R163') |
||
175 | self.assertEqual(soundex('Rubin'), 'R150') |
||
176 | self.assertEqual(soundex('Tymczak'), 'T522') |
||
177 | |||
178 | # https://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex |
||
179 | self.assertEqual(soundex('Peters'), 'P362') |
||
180 | self.assertEqual(soundex('Peterson'), 'P362') |
||
181 | self.assertEqual(soundex('Moskowitz'), 'M232') |
||
182 | self.assertEqual(soundex('Moskovitz'), 'M213') |
||
183 | self.assertEqual(soundex('Auerbach'), 'A612') |
||
184 | self.assertEqual(soundex('Uhrbach'), 'U612') |
||
185 | self.assertEqual(soundex('Jackson'), 'J250') |
||
186 | self.assertEqual(soundex('Jackson-Jackson'), 'J252') |
||
187 | |||
188 | # maxlength tests |
||
189 | self.assertEqual(soundex('Lincoln', 10), 'L524500000') |
||
190 | self.assertEqual(soundex('Lincoln', 5), 'L5245') |
||
191 | self.assertEqual(soundex('Christopher', 6), 'C62316') |
||
192 | |||
193 | # maxlength bounds tests |
||
194 | self.assertEqual(soundex('Niall', maxlength=float('inf')), |
||
195 | 'N4000000000000000000000000000000000000000000000000' + |
||
196 | '00000000000000') |
||
197 | self.assertEqual(soundex('Niall', maxlength=None), |
||
198 | 'N4000000000000000000000000000000000000000000000000' + |
||
199 | '00000000000000') |
||
200 | self.assertEqual(soundex('Niall', maxlength=0), 'N400') |
||
201 | |||
202 | # reverse tests |
||
203 | self.assertEqual(soundex('Rubin', reverse=True), 'N160') |
||
204 | self.assertEqual(soundex('Llyod', reverse=True), 'D400') |
||
205 | self.assertEqual(soundex('Lincoln', reverse=True), 'N425') |
||
206 | self.assertEqual(soundex('Knuth', reverse=True), 'H352') |
||
207 | |||
208 | # zero_pad tests |
||
209 | self.assertEqual(soundex('Niall', maxlength=float('inf'), |
||
210 | zero_pad=False), 'N4') |
||
211 | self.assertEqual(soundex('Niall', maxlength=None, |
||
212 | zero_pad=False), 'N4') |
||
213 | self.assertEqual(soundex('Niall', maxlength=0, zero_pad=False), 'N4') |
||
214 | self.assertEqual(soundex('Niall', maxlength=0, zero_pad=True), 'N400') |
||
215 | self.assertEqual(soundex('', maxlength=4, zero_pad=False), '0') |
||
216 | self.assertEqual(soundex('', maxlength=4, zero_pad=True), '0000') |
||
217 | |||
218 | def test_soundex_special(self): |
||
219 | """Test abydos.phonetic.soundex (special 1880-1910 variant method).""" |
||
220 | self.assertEqual(soundex('Ashcroft', var='special'), 'A226') |
||
221 | self.assertEqual(soundex('Asicroft', var='special'), 'A226') |
||
222 | self.assertEqual(soundex('AsWcroft', var='special'), 'A226') |
||
223 | self.assertEqual(soundex('Rupert', var='special'), 'R163') |
||
224 | self.assertEqual(soundex('Rubin', var='special'), 'R150') |
||
225 | |||
226 | def test_soundex_census(self): |
||
227 | """Test abydos.phonetic.soundex (Census variant method).""" |
||
228 | self.assertEqual(soundex('Vandeusen', var='Census'), ('V532', 'D250')) |
||
229 | self.assertEqual(soundex('van Deusen', var='Census'), ('V532', 'D250')) |
||
230 | self.assertEqual(soundex('McDonald', var='Census'), 'M235') |
||
231 | self.assertEqual(soundex('la Cruz', var='Census'), ('L262', 'C620')) |
||
232 | self.assertEqual(soundex('vanDamme', var='Census'), ('V535', 'D500')) |
||
233 | |||
234 | def test_refined_soundex(self): |
||
235 | """Test abydos.phonetic.refined_soundex.""" |
||
236 | # http://ntz-develop.blogspot.com/2011/03/phonetic-algorithms.html |
||
237 | self.assertEqual(refined_soundex('Braz'), 'B195') |
||
238 | self.assertEqual(refined_soundex('Broz'), 'B195') |
||
239 | self.assertEqual(refined_soundex('Caren'), 'C398') |
||
240 | self.assertEqual(refined_soundex('Caron'), 'C398') |
||
241 | self.assertEqual(refined_soundex('Carren'), 'C398') |
||
242 | self.assertEqual(refined_soundex('Charon'), 'C398') |
||
243 | self.assertEqual(refined_soundex('Corain'), 'C398') |
||
244 | self.assertEqual(refined_soundex('Coram'), 'C398') |
||
245 | self.assertEqual(refined_soundex('Corran'), 'C398') |
||
246 | self.assertEqual(refined_soundex('Corrin'), 'C398') |
||
247 | self.assertEqual(refined_soundex('Corwin'), 'C398') |
||
248 | self.assertEqual(refined_soundex('Curran'), 'C398') |
||
249 | self.assertEqual(refined_soundex('Curreen'), 'C398') |
||
250 | self.assertEqual(refined_soundex('Currin'), 'C398') |
||
251 | self.assertEqual(refined_soundex('Currom'), 'C398') |
||
252 | self.assertEqual(refined_soundex('Currum'), 'C398') |
||
253 | self.assertEqual(refined_soundex('Curwen'), 'C398') |
||
254 | self.assertEqual(refined_soundex('Caren'), 'C398') |
||
255 | self.assertEqual(refined_soundex('Caren'), 'C398') |
||
256 | self.assertEqual(refined_soundex('Caren'), 'C398') |
||
257 | self.assertEqual(refined_soundex('Caren'), 'C398') |
||
258 | self.assertEqual(refined_soundex('Caren'), 'C398') |
||
259 | self.assertEqual(refined_soundex('Caren'), 'C398') |
||
260 | self.assertEqual(refined_soundex('Caren'), 'C398') |
||
261 | self.assertEqual(refined_soundex('Hairs'), 'H93') |
||
262 | self.assertEqual(refined_soundex('Hark'), 'H93') |
||
263 | self.assertEqual(refined_soundex('Hars'), 'H93') |
||
264 | self.assertEqual(refined_soundex('Hayers'), 'H93') |
||
265 | self.assertEqual(refined_soundex('Heers'), 'H93') |
||
266 | self.assertEqual(refined_soundex('Hiers'), 'H93') |
||
267 | self.assertEqual(refined_soundex('Lambard'), 'L78196') |
||
268 | self.assertEqual(refined_soundex('Lambart'), 'L78196') |
||
269 | self.assertEqual(refined_soundex('Lambert'), 'L78196') |
||
270 | self.assertEqual(refined_soundex('Lambird'), 'L78196') |
||
271 | self.assertEqual(refined_soundex('Lampaert'), 'L78196') |
||
272 | self.assertEqual(refined_soundex('Lampard'), 'L78196') |
||
273 | self.assertEqual(refined_soundex('Lampart'), 'L78196') |
||
274 | self.assertEqual(refined_soundex('Lamperd'), 'L78196') |
||
275 | self.assertEqual(refined_soundex('Lampert'), 'L78196') |
||
276 | self.assertEqual(refined_soundex('Lamport'), 'L78196') |
||
277 | self.assertEqual(refined_soundex('Limbert'), 'L78196') |
||
278 | self.assertEqual(refined_soundex('Lombard'), 'L78196') |
||
279 | self.assertEqual(refined_soundex('Nolton'), 'N8768') |
||
280 | self.assertEqual(refined_soundex('Noulton'), 'N8768') |
||
281 | |||
282 | # http://trimc-nlp.blogspot.com/2015/03/the-soundex-algorithm.html |
||
283 | self.assertEqual(refined_soundex('Craig'), 'C394') |
||
284 | self.assertEqual(refined_soundex('Crag'), 'C394') |
||
285 | self.assertEqual(refined_soundex('Crejg'), 'C394') |
||
286 | self.assertEqual(refined_soundex('Creig'), 'C394') |
||
287 | self.assertEqual(refined_soundex('Craigg'), 'C394') |
||
288 | self.assertEqual(refined_soundex('Craug'), 'C394') |
||
289 | self.assertEqual(refined_soundex('Craiggg'), 'C394') |
||
290 | self.assertEqual(refined_soundex('Creg'), 'C394') |
||
291 | self.assertEqual(refined_soundex('Cregg'), 'C394') |
||
292 | self.assertEqual(refined_soundex('Creag'), 'C394') |
||
293 | self.assertEqual(refined_soundex('Greg'), 'G494') |
||
294 | self.assertEqual(refined_soundex('Gregg'), 'G494') |
||
295 | self.assertEqual(refined_soundex('Graig'), 'G494') |
||
296 | self.assertEqual(refined_soundex('Greig'), 'G494') |
||
297 | self.assertEqual(refined_soundex('Greggg'), 'G494') |
||
298 | self.assertEqual(refined_soundex('Groeg'), 'G494') |
||
299 | self.assertEqual(refined_soundex('Graj'), 'G494') |
||
300 | self.assertEqual(refined_soundex('Grej'), 'G494') |
||
301 | self.assertEqual(refined_soundex('Grreg'), 'G494') |
||
302 | self.assertEqual(refined_soundex('Greag'), 'G494') |
||
303 | self.assertEqual(refined_soundex('Grig'), 'G494') |
||
304 | self.assertEqual(refined_soundex('Kregg'), 'K394') |
||
305 | self.assertEqual(refined_soundex('Kraig'), 'K394') |
||
306 | self.assertEqual(refined_soundex('Krag'), 'K394') |
||
307 | self.assertEqual(refined_soundex('Kreig'), 'K394') |
||
308 | self.assertEqual(refined_soundex('Krug'), 'K394') |
||
309 | self.assertEqual(refined_soundex('Kreg'), 'K394') |
||
310 | self.assertEqual(refined_soundex('Krieg'), 'K394') |
||
311 | self.assertEqual(refined_soundex('Krijg'), 'K394') |
||
312 | |||
313 | # Apache Commons test cases |
||
314 | # http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/RefinedSoundexTest.java?view=markup |
||
315 | self.assertEqual(refined_soundex('testing'), 'T63684') |
||
316 | self.assertEqual(refined_soundex('TESTING'), 'T63684') |
||
317 | self.assertEqual(refined_soundex('The'), 'T6') |
||
318 | self.assertEqual(refined_soundex('quick'), 'Q53') |
||
319 | self.assertEqual(refined_soundex('brown'), 'B198') |
||
320 | self.assertEqual(refined_soundex('fox'), 'F25') |
||
321 | self.assertEqual(refined_soundex('jumped'), 'J4816') |
||
322 | self.assertEqual(refined_soundex('over'), 'O29') |
||
323 | self.assertEqual(refined_soundex('the'), 'T6') |
||
324 | self.assertEqual(refined_soundex('lazy'), 'L75') |
||
325 | self.assertEqual(refined_soundex('dogs'), 'D643') |
||
326 | |||
327 | # Test with retain_vowels=True |
||
328 | # http://ntz-develop.blogspot.com/2011/03/phonetic-algorithms.html |
||
329 | self.assertEqual(refined_soundex('Braz', retain_vowels=True), 'B1905') |
||
330 | self.assertEqual(refined_soundex('Broz', retain_vowels=True), 'B1905') |
||
331 | self.assertEqual(refined_soundex('Caren', retain_vowels=True), |
||
332 | 'C30908') |
||
333 | self.assertEqual(refined_soundex('Caron', retain_vowels=True), |
||
334 | 'C30908') |
||
335 | self.assertEqual(refined_soundex('Carren', retain_vowels=True), |
||
336 | 'C30908') |
||
337 | self.assertEqual(refined_soundex('Charon', retain_vowels=True), |
||
338 | 'C30908') |
||
339 | self.assertEqual(refined_soundex('Corain', retain_vowels=True), |
||
340 | 'C30908') |
||
341 | self.assertEqual(refined_soundex('Coram', retain_vowels=True), |
||
342 | 'C30908') |
||
343 | self.assertEqual(refined_soundex('Corran', retain_vowels=True), |
||
344 | 'C30908') |
||
345 | self.assertEqual(refined_soundex('Corrin', retain_vowels=True), |
||
346 | 'C30908') |
||
347 | self.assertEqual(refined_soundex('Corwin', retain_vowels=True), |
||
348 | 'C30908') |
||
349 | self.assertEqual(refined_soundex('Curran', retain_vowels=True), |
||
350 | 'C30908') |
||
351 | self.assertEqual(refined_soundex('Curreen', retain_vowels=True), |
||
352 | 'C30908') |
||
353 | self.assertEqual(refined_soundex('Currin', retain_vowels=True), |
||
354 | 'C30908') |
||
355 | self.assertEqual(refined_soundex('Currom', retain_vowels=True), |
||
356 | 'C30908') |
||
357 | self.assertEqual(refined_soundex('Currum', retain_vowels=True), |
||
358 | 'C30908') |
||
359 | self.assertEqual(refined_soundex('Curwen', retain_vowels=True), |
||
360 | 'C30908') |
||
361 | self.assertEqual(refined_soundex('Caren', retain_vowels=True), |
||
362 | 'C30908') |
||
363 | self.assertEqual(refined_soundex('Caren', retain_vowels=True), |
||
364 | 'C30908') |
||
365 | self.assertEqual(refined_soundex('Caren', retain_vowels=True), |
||
366 | 'C30908') |
||
367 | self.assertEqual(refined_soundex('Caren', retain_vowels=True), |
||
368 | 'C30908') |
||
369 | self.assertEqual(refined_soundex('Caren', retain_vowels=True), |
||
370 | 'C30908') |
||
371 | self.assertEqual(refined_soundex('Caren', retain_vowels=True), |
||
372 | 'C30908') |
||
373 | self.assertEqual(refined_soundex('Caren', retain_vowels=True), |
||
374 | 'C30908') |
||
375 | self.assertEqual(refined_soundex('Hairs', retain_vowels=True), 'H093') |
||
376 | self.assertEqual(refined_soundex('Hark', retain_vowels=True), 'H093') |
||
377 | self.assertEqual(refined_soundex('Hars', retain_vowels=True), 'H093') |
||
378 | self.assertEqual(refined_soundex('Hayers', retain_vowels=True), 'H093') |
||
379 | self.assertEqual(refined_soundex('Heers', retain_vowels=True), 'H093') |
||
380 | self.assertEqual(refined_soundex('Hiers', retain_vowels=True), 'H093') |
||
381 | self.assertEqual(refined_soundex('Lambard', retain_vowels=True), |
||
382 | 'L7081096') |
||
383 | self.assertEqual(refined_soundex('Lambart', retain_vowels=True), |
||
384 | 'L7081096') |
||
385 | self.assertEqual(refined_soundex('Lambert', retain_vowels=True), |
||
386 | 'L7081096') |
||
387 | self.assertEqual(refined_soundex('Lambird', retain_vowels=True), |
||
388 | 'L7081096') |
||
389 | self.assertEqual(refined_soundex('Lampaert', retain_vowels=True), |
||
390 | 'L7081096') |
||
391 | self.assertEqual(refined_soundex('Lampard', retain_vowels=True), |
||
392 | 'L7081096') |
||
393 | self.assertEqual(refined_soundex('Lampart', retain_vowels=True), |
||
394 | 'L7081096') |
||
395 | self.assertEqual(refined_soundex('Lamperd', retain_vowels=True), |
||
396 | 'L7081096') |
||
397 | self.assertEqual(refined_soundex('Lampert', retain_vowels=True), |
||
398 | 'L7081096') |
||
399 | self.assertEqual(refined_soundex('Lamport', retain_vowels=True), |
||
400 | 'L7081096') |
||
401 | self.assertEqual(refined_soundex('Limbert', retain_vowels=True), |
||
402 | 'L7081096') |
||
403 | self.assertEqual(refined_soundex('Lombard', retain_vowels=True), |
||
404 | 'L7081096') |
||
405 | self.assertEqual(refined_soundex('Nolton', retain_vowels=True), |
||
406 | 'N807608') |
||
407 | self.assertEqual(refined_soundex('Noulton', retain_vowels=True), |
||
408 | 'N807608') |
||
409 | |||
410 | # http://trimc-nlp.blogspot.com/2015/03/the-soundex-algorithm.html |
||
411 | self.assertEqual(refined_soundex('Craig', retain_vowels=True), 'C3904') |
||
412 | self.assertEqual(refined_soundex('Crag', retain_vowels=True), 'C3904') |
||
413 | self.assertEqual(refined_soundex('Crejg', retain_vowels=True), 'C3904') |
||
414 | self.assertEqual(refined_soundex('Creig', retain_vowels=True), 'C3904') |
||
415 | self.assertEqual(refined_soundex('Craigg', retain_vowels=True), |
||
416 | 'C3904') |
||
417 | self.assertEqual(refined_soundex('Craug', retain_vowels=True), 'C3904') |
||
418 | self.assertEqual(refined_soundex('Craiggg', retain_vowels=True), |
||
419 | 'C3904') |
||
420 | self.assertEqual(refined_soundex('Creg', retain_vowels=True), 'C3904') |
||
421 | self.assertEqual(refined_soundex('Cregg', retain_vowels=True), 'C3904') |
||
422 | self.assertEqual(refined_soundex('Creag', retain_vowels=True), 'C3904') |
||
423 | self.assertEqual(refined_soundex('Greg', retain_vowels=True), 'G4904') |
||
424 | self.assertEqual(refined_soundex('Gregg', retain_vowels=True), 'G4904') |
||
425 | self.assertEqual(refined_soundex('Graig', retain_vowels=True), 'G4904') |
||
426 | self.assertEqual(refined_soundex('Greig', retain_vowels=True), 'G4904') |
||
427 | self.assertEqual(refined_soundex('Greggg', retain_vowels=True), |
||
428 | 'G4904') |
||
429 | self.assertEqual(refined_soundex('Groeg', retain_vowels=True), 'G4904') |
||
430 | self.assertEqual(refined_soundex('Graj', retain_vowels=True), 'G4904') |
||
431 | self.assertEqual(refined_soundex('Grej', retain_vowels=True), 'G4904') |
||
432 | self.assertEqual(refined_soundex('Grreg', retain_vowels=True), 'G4904') |
||
433 | self.assertEqual(refined_soundex('Greag', retain_vowels=True), 'G4904') |
||
434 | self.assertEqual(refined_soundex('Grig', retain_vowels=True), 'G4904') |
||
435 | self.assertEqual(refined_soundex('Kregg', retain_vowels=True), 'K3904') |
||
436 | self.assertEqual(refined_soundex('Kraig', retain_vowels=True), 'K3904') |
||
437 | self.assertEqual(refined_soundex('Krag', retain_vowels=True), 'K3904') |
||
438 | self.assertEqual(refined_soundex('Kreig', retain_vowels=True), 'K3904') |
||
439 | self.assertEqual(refined_soundex('Krug', retain_vowels=True), 'K3904') |
||
440 | self.assertEqual(refined_soundex('Kreg', retain_vowels=True), 'K3904') |
||
441 | self.assertEqual(refined_soundex('Krieg', retain_vowels=True), 'K3904') |
||
442 | self.assertEqual(refined_soundex('Krijg', retain_vowels=True), 'K3904') |
||
443 | |||
444 | # Apache Commons test cases |
||
445 | # http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/RefinedSoundexTest.java?view=markup |
||
446 | self.assertEqual(refined_soundex('testing', retain_vowels=True), |
||
447 | 'T6036084') |
||
448 | self.assertEqual(refined_soundex('TESTING', retain_vowels=True), |
||
449 | 'T6036084') |
||
450 | self.assertEqual(refined_soundex('The', retain_vowels=True), 'T60') |
||
451 | self.assertEqual(refined_soundex('quick', retain_vowels=True), 'Q503') |
||
452 | self.assertEqual(refined_soundex('brown', retain_vowels=True), 'B1908') |
||
453 | self.assertEqual(refined_soundex('fox', retain_vowels=True), 'F205') |
||
454 | self.assertEqual(refined_soundex('jumped', retain_vowels=True), |
||
455 | 'J408106') |
||
456 | self.assertEqual(refined_soundex('over', retain_vowels=True), 'O0209') |
||
457 | self.assertEqual(refined_soundex('the', retain_vowels=True), 'T60') |
||
458 | self.assertEqual(refined_soundex('lazy', retain_vowels=True), 'L7050') |
||
459 | self.assertEqual(refined_soundex('dogs', retain_vowels=True), 'D6043') |
||
460 | |||
461 | # length tests |
||
462 | self.assertEqual(refined_soundex('testing', maxlength=4, |
||
463 | zero_pad=True), 'T636') |
||
464 | self.assertEqual(refined_soundex('TESTING', maxlength=4, |
||
465 | zero_pad=True), 'T636') |
||
466 | self.assertEqual(refined_soundex('The', maxlength=4, zero_pad=True), |
||
467 | 'T600') |
||
468 | self.assertEqual(refined_soundex('quick', maxlength=4, zero_pad=True), |
||
469 | 'Q530') |
||
470 | self.assertEqual(refined_soundex('brown', maxlength=4, zero_pad=True), |
||
471 | 'B198') |
||
472 | self.assertEqual(refined_soundex('fox', maxlength=4, zero_pad=True), |
||
473 | 'F250') |
||
474 | self.assertEqual(refined_soundex('jumped', maxlength=4, zero_pad=True), |
||
475 | 'J481') |
||
476 | self.assertEqual(refined_soundex('over', maxlength=4, zero_pad=True), |
||
477 | 'O290') |
||
478 | self.assertEqual(refined_soundex('the', maxlength=4, zero_pad=True), |
||
479 | 'T600') |
||
480 | self.assertEqual(refined_soundex('lazy', maxlength=4, zero_pad=True), |
||
481 | 'L750') |
||
482 | self.assertEqual(refined_soundex('dogs', maxlength=4, zero_pad=True), |
||
483 | 'D643') |
||
484 | self.assertEqual(refined_soundex('The', maxlength=4), |
||
485 | 'T6') |
||
486 | self.assertEqual(refined_soundex('quick', maxlength=4), |
||
487 | 'Q53') |
||
488 | self.assertEqual(refined_soundex('brown', maxlength=4), |
||
489 | 'B198') |
||
490 | self.assertEqual(refined_soundex('fox', maxlength=4), |
||
491 | 'F25') |
||
492 | self.assertEqual(refined_soundex('jumped', maxlength=4), |
||
493 | 'J481') |
||
494 | self.assertEqual(refined_soundex('over', maxlength=4), |
||
495 | 'O29') |
||
496 | self.assertEqual(refined_soundex('the', maxlength=4), |
||
497 | 'T6') |
||
498 | self.assertEqual(refined_soundex('lazy', maxlength=4), |
||
499 | 'L75') |
||
500 | self.assertEqual(refined_soundex('dogs', maxlength=4), |
||
501 | 'D643') |
||
502 | |||
503 | def test_dm_soundex(self): |
||
504 | """Test abydos.phonetic.dm_soundex (Daitch-Mokotoff Soundex).""" |
||
505 | # D-M tests |
||
506 | self.assertEqual(dm_soundex(''), {'000000'}) |
||
507 | |||
508 | # http://www.avotaynu.com/soundex.htm |
||
509 | self.assertEqual(dm_soundex('Augsburg'), {'054795'}) |
||
510 | self.assertEqual(dm_soundex('Breuer'), {'791900'}) |
||
511 | self.assertEqual(dm_soundex('Halberstadt'), {'587943', '587433'}) |
||
512 | self.assertEqual(dm_soundex('Mannheim'), {'665600'}) |
||
513 | self.assertEqual(dm_soundex('Chernowitz'), {'496740', '596740'}) |
||
514 | self.assertEqual(dm_soundex('Cherkassy'), {'495400', '595400'}) |
||
515 | self.assertEqual(dm_soundex('Kleinman'), {'586660'}) |
||
516 | self.assertEqual(dm_soundex('Berlin'), {'798600'}) |
||
517 | |||
518 | self.assertEqual(dm_soundex('Ceniow'), {'467000', '567000'}) |
||
519 | self.assertEqual(dm_soundex('Tsenyuv'), {'467000'}) |
||
520 | self.assertEqual(dm_soundex('Holubica'), {'587400', '587500'}) |
||
521 | self.assertEqual(dm_soundex('Golubitsa'), {'587400'}) |
||
522 | self.assertEqual(dm_soundex('Przemysl'), {'746480', '794648'}) |
||
523 | self.assertEqual(dm_soundex('Pshemeshil'), {'746480'}) |
||
524 | self.assertEqual(dm_soundex('Rosochowaciec'), |
||
525 | {'944744', '945744', '944755', '944754', '944745', |
||
526 | '945745', '945754', '945755'}) |
||
527 | self.assertEqual(dm_soundex('Rosokhovatsets'), {'945744'}) |
||
528 | |||
529 | # https://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex |
||
530 | self.assertEqual(dm_soundex('Peters'), {'739400', '734000'}) |
||
531 | self.assertEqual(dm_soundex('Peterson'), {'739460', '734600'}) |
||
532 | self.assertEqual(dm_soundex('Moskowitz'), {'645740'}) |
||
533 | self.assertEqual(dm_soundex('Moskovitz'), {'645740'}) |
||
534 | self.assertEqual(dm_soundex('Auerbach'), {'097500', '097400'}) |
||
535 | self.assertEqual(dm_soundex('Uhrbach'), {'097500', '097400'}) |
||
536 | self.assertEqual(dm_soundex('Jackson'), |
||
537 | {'154600', '454600', '145460', '445460'}) |
||
538 | self.assertEqual(dm_soundex('Jackson-Jackson'), |
||
539 | {'154654', '454654', '145465', '445465', |
||
540 | '154645', '454645', '145464', '445464', |
||
541 | '154644', '454644'}) |
||
542 | |||
543 | # http://www.jewishgen.org/infofiles/soundex.html |
||
544 | self.assertEqual(dm_soundex('OHRBACH'), {'097500', '097400'}) |
||
545 | self.assertEqual(dm_soundex('LIPSHITZ'), {'874400'}) |
||
546 | self.assertEqual(dm_soundex('LIPPSZYC'), {'874400', '874500'}) |
||
547 | self.assertEqual(dm_soundex('LEWINSKY'), {'876450'}) |
||
548 | self.assertEqual(dm_soundex('LEVINSKI'), {'876450'}) |
||
549 | self.assertEqual(dm_soundex('SZLAMAWICZ'), {'486740'}) |
||
550 | self.assertEqual(dm_soundex('SHLAMOVITZ'), {'486740'}) |
||
551 | |||
552 | # http://community.actian.com/wiki/OME_soundex_dm() |
||
553 | self.assertEqual(dm_soundex('Schwarzenegger'), |
||
554 | {'479465', '474659'}) |
||
555 | self.assertEqual(dm_soundex('Shwarzenegger'), |
||
556 | {'479465', '474659'}) |
||
557 | self.assertEqual(dm_soundex('Schwartsenegger'), {'479465'}) |
||
558 | |||
559 | # maxlength bounds tests |
||
560 | self.assertEqual(dm_soundex('Niall', maxlength=float('inf')), |
||
561 | {'68'+'0'*62}) |
||
562 | self.assertEqual(dm_soundex('Niall', maxlength=None), |
||
563 | {'68'+'0'*62}) |
||
564 | self.assertEqual(dm_soundex('Niall', maxlength=0), {'680000'}) |
||
565 | |||
566 | # zero_pad tests |
||
567 | self.assertEqual(dm_soundex('Niall', maxlength=float('inf'), |
||
568 | zero_pad=False), {'68'}) |
||
569 | self.assertEqual(dm_soundex('Niall', maxlength=None, zero_pad=False), |
||
570 | {'68'}) |
||
571 | self.assertEqual(dm_soundex('Niall', maxlength=0, zero_pad=False), |
||
572 | {'68'}) |
||
573 | self.assertEqual(dm_soundex('Niall', maxlength=0, zero_pad=True), |
||
574 | {'680000'}) |
||
575 | self.assertEqual(dm_soundex('', maxlength=6, zero_pad=False), |
||
576 | {'0'}) |
||
577 | self.assertEqual(dm_soundex('', maxlength=6, zero_pad=True), |
||
578 | {'000000'}) |
||
579 | |||
580 | |||
581 | class KoelnerPhonetikTestCases(unittest.TestCase): |
||
582 | """Test Koelner Phonetic functions. |
||
583 | |||
584 | test cases for abydos.phonetic.koelner_phonetik, |
||
585 | .koelner_phonetik_num_to_alpha, & .koelner_phonetik_alpha |
||
586 | """ |
||
587 | |||
588 | def test_koelner_phonetik(self): |
||
589 | """Test abydos.phonetic.koelner_phonetik.""" |
||
590 | self.assertEqual(koelner_phonetik(''), '') |
||
591 | |||
592 | # https://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik |
||
593 | self.assertEqual(koelner_phonetik('Müller-Lüdenscheidt'), '65752682') |
||
594 | self.assertEqual(koelner_phonetik('Wikipedia'), '3412') |
||
595 | self.assertEqual(koelner_phonetik('Breschnew'), '17863') |
||
596 | |||
597 | # http://search.cpan.org/~maros/Text-Phonetic/lib/Text/Phonetic/Koeln.pm |
||
598 | self.assertEqual(koelner_phonetik('Müller'), '657') |
||
599 | self.assertEqual(koelner_phonetik('schmidt'), '862') |
||
600 | self.assertEqual(koelner_phonetik('schneider'), '8627') |
||
601 | self.assertEqual(koelner_phonetik('fischer'), '387') |
||
602 | self.assertEqual(koelner_phonetik('weber'), '317') |
||
603 | self.assertEqual(koelner_phonetik('meyer'), '67') |
||
604 | self.assertEqual(koelner_phonetik('wagner'), '3467') |
||
605 | self.assertEqual(koelner_phonetik('schulz'), '858') |
||
606 | self.assertEqual(koelner_phonetik('becker'), '147') |
||
607 | self.assertEqual(koelner_phonetik('hoffmann'), '0366') |
||
608 | self.assertEqual(koelner_phonetik('schäfer'), '837') |
||
609 | self.assertEqual(koelner_phonetik('cater'), '427') |
||
610 | self.assertEqual(koelner_phonetik('axel'), '0485') |
||
611 | |||
612 | # etc. (for code coverage) |
||
613 | self.assertEqual(koelner_phonetik('Akxel'), '0485') |
||
614 | self.assertEqual(koelner_phonetik('Adz'), '08') |
||
615 | self.assertEqual(koelner_phonetik('Alpharades'), '053728') |
||
616 | self.assertEqual(koelner_phonetik('Cent'), '862') |
||
617 | self.assertEqual(koelner_phonetik('Acre'), '087') |
||
618 | self.assertEqual(koelner_phonetik('H'), '') |
||
619 | |||
620 | def test_koelner_phonetik_n2a(self): |
||
621 | """Test abydos.phonetic.koelner_phonetik_num_to_alpha.""" |
||
622 | self.assertEqual(koelner_phonetik_num_to_alpha('0123456789'), |
||
623 | 'APTFKLNRS') |
||
624 | |||
625 | def test_koelner_phonetik_alpha(self): |
||
626 | """Test abydos.phonetic.koelner_phonetik_alpha.""" |
||
627 | self.assertEqual(koelner_phonetik_alpha('Müller-Lüdenscheidt'), |
||
628 | 'NLRLTNST') |
||
629 | self.assertEqual(koelner_phonetik_alpha('Wikipedia'), 'FKPT') |
||
630 | self.assertEqual(koelner_phonetik_alpha('Breschnew'), 'PRSNF') |
||
631 | self.assertEqual(koelner_phonetik_alpha('Müller'), 'NLR') |
||
632 | self.assertEqual(koelner_phonetik_alpha('schmidt'), 'SNT') |
||
633 | self.assertEqual(koelner_phonetik_alpha('schneider'), 'SNTR') |
||
634 | self.assertEqual(koelner_phonetik_alpha('fischer'), 'FSR') |
||
635 | self.assertEqual(koelner_phonetik_alpha('weber'), 'FPR') |
||
636 | self.assertEqual(koelner_phonetik_alpha('meyer'), 'NR') |
||
637 | self.assertEqual(koelner_phonetik_alpha('wagner'), 'FKNR') |
||
638 | self.assertEqual(koelner_phonetik_alpha('schulz'), 'SLS') |
||
639 | self.assertEqual(koelner_phonetik_alpha('becker'), 'PKR') |
||
640 | self.assertEqual(koelner_phonetik_alpha('hoffmann'), 'AFNN') |
||
641 | self.assertEqual(koelner_phonetik_alpha('schäfer'), 'SFR') |
||
642 | self.assertEqual(koelner_phonetik_alpha('cater'), 'KTR') |
||
643 | self.assertEqual(koelner_phonetik_alpha('axel'), 'AKSL') |
||
644 | |||
645 | |||
646 | class NysiisTestCases(unittest.TestCase): |
||
647 | """Test NYSIIS functions. |
||
648 | |||
649 | test cases for abydos.phonetic.nysiis |
||
650 | """ |
||
651 | |||
652 | def test_nysiis(self): |
||
653 | """Test abydos.phonetic.nysiis.""" |
||
654 | self.assertEqual(nysiis(''), '') |
||
655 | |||
656 | # http://coryodaniel.com/index.php/2009/12/30/ruby-nysiis-implementation/ |
||
657 | self.assertEqual(nysiis('O\'Daniel'), 'ODANAL') |
||
658 | self.assertEqual(nysiis('O\'Donnel'), 'ODANAL') |
||
659 | self.assertEqual(nysiis('Cory'), 'CARY') |
||
660 | self.assertEqual(nysiis('Corey'), 'CARY') |
||
661 | self.assertEqual(nysiis('Kory'), 'CARY') |
||
662 | |||
663 | # http://ntz-develop.blogspot.com/2011/03/phonetic-algorithms.html |
||
664 | self.assertEqual(nysiis('Diggell'), 'DAGAL') |
||
665 | self.assertEqual(nysiis('Dougal'), 'DAGAL') |
||
666 | self.assertEqual(nysiis('Doughill'), 'DAGAL') |
||
667 | self.assertEqual(nysiis('Dougill'), 'DAGAL') |
||
668 | self.assertEqual(nysiis('Dowgill'), 'DAGAL') |
||
669 | self.assertEqual(nysiis('Dugall'), 'DAGAL') |
||
670 | self.assertEqual(nysiis('Dugall'), 'DAGAL') |
||
671 | self.assertEqual(nysiis('Glinde'), 'GLAND') |
||
672 | self.assertEqual(nysiis('Plumridge', maxlength=20), 'PLANRADG') |
||
673 | self.assertEqual(nysiis('Chinnick'), 'CANAC') |
||
674 | self.assertEqual(nysiis('Chinnock'), 'CANAC') |
||
675 | self.assertEqual(nysiis('Chinnock'), 'CANAC') |
||
676 | self.assertEqual(nysiis('Chomicki'), 'CANAC') |
||
677 | self.assertEqual(nysiis('Chomicz'), 'CANAC') |
||
678 | self.assertEqual(nysiis('Schimek'), 'SANAC') |
||
679 | self.assertEqual(nysiis('Shimuk'), 'SANAC') |
||
680 | self.assertEqual(nysiis('Simak'), 'SANAC') |
||
681 | self.assertEqual(nysiis('Simek'), 'SANAC') |
||
682 | self.assertEqual(nysiis('Simic'), 'SANAC') |
||
683 | self.assertEqual(nysiis('Sinnock'), 'SANAC') |
||
684 | self.assertEqual(nysiis('Sinnocke'), 'SANAC') |
||
685 | self.assertEqual(nysiis('Sunnex'), 'SANAX') |
||
686 | self.assertEqual(nysiis('Sunnucks'), 'SANAC') |
||
687 | self.assertEqual(nysiis('Sunock'), 'SANAC') |
||
688 | self.assertEqual(nysiis('Webberley', maxlength=20), 'WABARLY') |
||
689 | self.assertEqual(nysiis('Wibberley', maxlength=20), 'WABARLY') |
||
690 | |||
691 | # etc. (for code coverage) |
||
692 | self.assertEqual(nysiis('Alpharades'), 'ALFARA') |
||
693 | self.assertEqual(nysiis('Aschenputtel'), 'ASANPA') |
||
694 | self.assertEqual(nysiis('Beverly'), 'BAFARL') |
||
695 | self.assertEqual(nysiis('Hardt'), 'HARD') |
||
696 | self.assertEqual(nysiis('acknowledge'), 'ACNALA') |
||
697 | self.assertEqual(nysiis('MacNeill'), 'MCNAL') |
||
698 | self.assertEqual(nysiis('MacNeill'), nysiis('McNeill')) |
||
699 | self.assertEqual(nysiis('Knight'), 'NAGT') |
||
700 | self.assertEqual(nysiis('Knight'), nysiis('Night')) |
||
701 | self.assertEqual(nysiis('Pfarr'), 'FAR') |
||
702 | self.assertEqual(nysiis('Phair'), 'FAR') |
||
703 | self.assertEqual(nysiis('Phair'), nysiis('Pfarr')) |
||
704 | self.assertEqual(nysiis('Cherokee'), 'CARACY') |
||
705 | self.assertEqual(nysiis('Iraq'), 'IRAG') |
||
706 | |||
707 | # maxlength bounds tests |
||
708 | self.assertEqual(nysiis('Niall', maxlength=float('inf')), 'NAL') |
||
709 | self.assertEqual(nysiis('Niall', maxlength=None), 'NAL') |
||
710 | self.assertEqual(nysiis('Niall', maxlength=0), 'NAL') |
||
711 | |||
712 | def test_modified_nysiis(self): |
||
713 | """Test abydos.phonetic.nysiis (modified version).""" |
||
714 | self.assertEqual(nysiis('', maxlength=float('inf'), modified=True), '') |
||
715 | |||
716 | # https://naldc.nal.usda.gov/download/27833/PDF |
||
717 | # Some of these were... wrong... and have been corrected |
||
718 | self.assertEqual(nysiis('Daves', maxlength=8, modified=True), 'DAV') |
||
719 | self.assertEqual(nysiis('Davies', maxlength=8, modified=True), 'DAVY') |
||
720 | self.assertEqual(nysiis('Devies', maxlength=8, modified=True), 'DAFY') |
||
721 | self.assertEqual(nysiis('Divish', maxlength=8, modified=True), 'DAVAS') |
||
722 | self.assertEqual(nysiis('Dove', maxlength=8, modified=True), 'DAV') |
||
723 | self.assertEqual(nysiis('Devese', maxlength=8, modified=True), 'DAFAS') |
||
724 | self.assertEqual(nysiis('Devies', maxlength=8, modified=True), 'DAFY') |
||
725 | self.assertEqual(nysiis('Devos', maxlength=8, modified=True), 'DAF') |
||
726 | |||
727 | self.assertEqual(nysiis('Schmit', maxlength=8, modified=True), 'SNAT') |
||
728 | self.assertEqual(nysiis('Schmitt', maxlength=8, modified=True), 'SNAT') |
||
729 | self.assertEqual(nysiis('Schmitz', maxlength=8, modified=True), 'SNAT') |
||
730 | self.assertEqual(nysiis('Schmoutz', maxlength=8, modified=True), |
||
731 | 'SNAT') |
||
732 | self.assertEqual(nysiis('Schnitt', maxlength=8, modified=True), 'SNAT') |
||
733 | self.assertEqual(nysiis('Smit', maxlength=8, modified=True), 'SNAT') |
||
734 | self.assertEqual(nysiis('Smite', maxlength=8, modified=True), 'SNAT') |
||
735 | self.assertEqual(nysiis('Smits', maxlength=8, modified=True), 'SNAT') |
||
736 | self.assertEqual(nysiis('Smoot', maxlength=8, modified=True), 'SNAT') |
||
737 | self.assertEqual(nysiis('Smuts', maxlength=8, modified=True), 'SNAT') |
||
738 | self.assertEqual(nysiis('Sneath', maxlength=8, modified=True), 'SNAT') |
||
739 | self.assertEqual(nysiis('Smyth', maxlength=8, modified=True), 'SNAT') |
||
740 | self.assertEqual(nysiis('Smithy', maxlength=8, modified=True), 'SNATY') |
||
741 | self.assertEqual(nysiis('Smithey', maxlength=8, modified=True), |
||
742 | 'SNATY') |
||
743 | |||
744 | # http://www.dropby.com/NYSIISTextStrings.html |
||
745 | # Some of these have been altered since the above uses a different set |
||
746 | # of modifications. |
||
747 | self.assertEqual(nysiis('Edwards', maxlength=8, modified=True), |
||
748 | 'EDWAD') |
||
749 | self.assertEqual(nysiis('Perez', maxlength=8, modified=True), 'PAR') |
||
750 | self.assertEqual(nysiis('Macintosh', maxlength=8, modified=True), |
||
751 | 'MCANTAS') |
||
752 | self.assertEqual(nysiis('Phillipson', maxlength=8, modified=True), |
||
753 | 'FALAPSAN') |
||
754 | self.assertEqual(nysiis('Haddix', maxlength=8, modified=True), 'HADAC') |
||
755 | self.assertEqual(nysiis('Essex', maxlength=8, modified=True), 'ESAC') |
||
756 | self.assertEqual(nysiis('Moye', maxlength=8, modified=True), 'MY') |
||
757 | self.assertEqual(nysiis('McKee', maxlength=8, modified=True), 'MCY') |
||
758 | self.assertEqual(nysiis('Mackie', maxlength=8, modified=True), 'MCY') |
||
759 | self.assertEqual(nysiis('Heitschmidt', maxlength=8, modified=True), |
||
760 | 'HATSNAD') |
||
761 | self.assertEqual(nysiis('Bart', maxlength=8, modified=True), 'BAD') |
||
762 | self.assertEqual(nysiis('Hurd', maxlength=8, modified=True), 'HAD') |
||
763 | self.assertEqual(nysiis('Hunt', maxlength=8, modified=True), 'HAN') |
||
764 | self.assertEqual(nysiis('Westerlund', maxlength=8, modified=True), |
||
765 | 'WASTARLA') |
||
766 | self.assertEqual(nysiis('Evers', maxlength=8, modified=True), 'EVAR') |
||
767 | self.assertEqual(nysiis('Devito', maxlength=8, modified=True), 'DAFAT') |
||
768 | self.assertEqual(nysiis('Rawson', maxlength=8, modified=True), 'RASAN') |
||
769 | self.assertEqual(nysiis('Shoulders', maxlength=8, modified=True), |
||
770 | 'SALDAR') |
||
771 | self.assertEqual(nysiis('Leighton', maxlength=8, modified=True), |
||
772 | 'LATAN') |
||
773 | self.assertEqual(nysiis('Wooldridge', maxlength=8, modified=True), |
||
774 | 'WALDRAG') |
||
775 | self.assertEqual(nysiis('Oliphant', maxlength=8, modified=True), |
||
776 | 'OLAFAN') |
||
777 | self.assertEqual(nysiis('Hatchett', maxlength=8, modified=True), |
||
778 | 'HATCAT') |
||
779 | self.assertEqual(nysiis('McKnight', maxlength=8, modified=True), |
||
780 | 'MCNAT') |
||
781 | self.assertEqual(nysiis('Rickert', maxlength=8, modified=True), |
||
782 | 'RACAD') |
||
783 | self.assertEqual(nysiis('Bowman', maxlength=8, modified=True), 'BANAN') |
||
784 | self.assertEqual(nysiis('Vasquez', maxlength=8, modified=True), 'VASG') |
||
785 | self.assertEqual(nysiis('Bashaw', maxlength=8, modified=True), 'BAS') |
||
786 | self.assertEqual(nysiis('Schoenhoeft', maxlength=8, modified=True), |
||
787 | 'SANAFT') |
||
788 | self.assertEqual(nysiis('Heywood', maxlength=8, modified=True), 'HAD') |
||
789 | self.assertEqual(nysiis('Hayman', maxlength=8, modified=True), 'HANAN') |
||
790 | self.assertEqual(nysiis('Seawright', maxlength=8, modified=True), |
||
791 | 'SARAT') |
||
792 | self.assertEqual(nysiis('Kratzer', maxlength=8, modified=True), |
||
793 | 'CRATSAR') |
||
794 | self.assertEqual(nysiis('Canaday', maxlength=8, modified=True), |
||
795 | 'CANADY') |
||
796 | self.assertEqual(nysiis('Crepeau', maxlength=8, modified=True), 'CRAP') |
||
797 | |||
798 | # Additional tests from @Yomguithereal's talisman |
||
799 | # https://github.com/Yomguithereal/talisman/blob/master/test/phonetics/nysiis.js |
||
800 | self.assertEqual(nysiis('Andrew', maxlength=8, modified=True), 'ANDR') |
||
801 | self.assertEqual(nysiis('Robertson', maxlength=8, modified=True), |
||
802 | 'RABARTSA') |
||
803 | self.assertEqual(nysiis('Nolan', maxlength=8, modified=True), 'NALAN') |
||
804 | self.assertEqual(nysiis('Louis XVI', maxlength=8, modified=True), |
||
805 | 'LASXV') |
||
806 | self.assertEqual(nysiis('Case', maxlength=8, modified=True), 'CAS') |
||
807 | self.assertEqual(nysiis('Mclaughlin', maxlength=8, modified=True), |
||
808 | 'MCLAGLAN') |
||
809 | self.assertEqual(nysiis('Awale', maxlength=8, modified=True), 'AL') |
||
810 | self.assertEqual(nysiis('Aegir', maxlength=8, modified=True), 'AGAR') |
||
811 | self.assertEqual(nysiis('Lundgren', maxlength=8, modified=True), |
||
812 | 'LANGRAN') |
||
813 | self.assertEqual(nysiis('Philbert', maxlength=8, modified=True), |
||
814 | 'FALBAD') |
||
815 | self.assertEqual(nysiis('Harry', maxlength=8, modified=True), 'HARY') |
||
816 | self.assertEqual(nysiis('Mackenzie', maxlength=8, modified=True), |
||
817 | 'MCANSY') |
||
818 | |||
819 | # maxlength bounds tests |
||
820 | self.assertEqual(nysiis('Niall', maxlength=float('inf'), |
||
821 | modified=True), 'NAL') |
||
822 | self.assertEqual(nysiis('Niall', maxlength=None, modified=True), 'NAL') |
||
823 | self.assertEqual(nysiis('Niall', maxlength=0, modified=True), 'NAL') |
||
824 | |||
825 | # coverage |
||
826 | self.assertEqual(nysiis('Sam Jr.', modified=True), 'ERROR') |
||
827 | self.assertEqual(nysiis('John Sr.', modified=True), 'ERROR') |
||
828 | self.assertEqual(nysiis('Wright', modified=True), 'RAT') |
||
829 | self.assertEqual(nysiis('Rhodes', modified=True), 'RAD') |
||
830 | self.assertEqual(nysiis('Dgagoda', modified=True), 'GAGAD') |
||
831 | self.assertEqual(nysiis('Bosch', modified=True), 'BAS') |
||
832 | self.assertEqual(nysiis('Schrader', modified=True), 'SRADAR') |
||
833 | |||
834 | |||
835 | class MraTestCases(unittest.TestCase): |
||
836 | """Test MRA functions. |
||
837 | |||
838 | test cases for abydos.phonetic.mra |
||
839 | """ |
||
840 | |||
841 | def test_mra(self): |
||
842 | """Test abydos.phonetic.mra.""" |
||
843 | self.assertEqual(mra(''), '') |
||
844 | |||
845 | # https://en.wikipedia.org/wiki/Match_rating_approach |
||
846 | self.assertEqual(mra('Byrne'), 'BYRN') |
||
847 | self.assertEqual(mra('Boern'), 'BRN') |
||
848 | self.assertEqual(mra('Smith'), 'SMTH') |
||
849 | self.assertEqual(mra('Smyth'), 'SMYTH') |
||
850 | self.assertEqual(mra('Catherine'), 'CTHRN') |
||
851 | self.assertEqual(mra('Kathryn'), 'KTHRYN') |
||
852 | |||
853 | |||
854 | class MetaphoneTestCases(unittest.TestCase): |
||
855 | """Test Metaphone functions. |
||
856 | |||
857 | test cases for abydos.phonetic.metaphone |
||
858 | """ |
||
859 | |||
860 | def test_metaphone(self): |
||
861 | """Test abydos.phonetic.metaphone.""" |
||
862 | self.assertEqual(metaphone(''), '') |
||
863 | self.assertEqual(metaphone('...'), '') |
||
864 | |||
865 | # http://ntz-develop.blogspot.com/2011/03/phonetic-algorithms.html |
||
866 | self.assertEqual(metaphone('Fishpool', 4), 'FXPL') |
||
867 | self.assertEqual(metaphone('Fishpoole', 4), 'FXPL') |
||
868 | self.assertEqual(metaphone('Gellately', 4), 'JLTL') |
||
869 | self.assertEqual(metaphone('Gelletly', 4), 'JLTL') |
||
870 | self.assertEqual(metaphone('Lowers', 4), 'LWRS') |
||
871 | self.assertEqual(metaphone('Lowerson', 4), 'LWRS') |
||
872 | self.assertEqual(metaphone('Mallabar', 4), 'MLBR') |
||
873 | self.assertEqual(metaphone('Melbert', 4), 'MLBR') |
||
874 | self.assertEqual(metaphone('Melbourn', 4), 'MLBR') |
||
875 | self.assertEqual(metaphone('Melbourne', 4), 'MLBR') |
||
876 | self.assertEqual(metaphone('Melburg', 4), 'MLBR') |
||
877 | self.assertEqual(metaphone('Melbury', 4), 'MLBR') |
||
878 | self.assertEqual(metaphone('Milberry', 4), 'MLBR') |
||
879 | self.assertEqual(metaphone('Milborn', 4), 'MLBR') |
||
880 | self.assertEqual(metaphone('Milbourn', 4), 'MLBR') |
||
881 | self.assertEqual(metaphone('Milbourne', 4), 'MLBR') |
||
882 | self.assertEqual(metaphone('Milburn', 4), 'MLBR') |
||
883 | self.assertEqual(metaphone('Milburne', 4), 'MLBR') |
||
884 | self.assertEqual(metaphone('Millberg', 4), 'MLBR') |
||
885 | self.assertEqual(metaphone('Mulberry', 4), 'MLBR') |
||
886 | self.assertEqual(metaphone('Mulbery', 4), 'MLBR') |
||
887 | self.assertEqual(metaphone('Mulbry', 4), 'MLBR') |
||
888 | self.assertEqual(metaphone('Saipy', 4), 'SP') |
||
889 | self.assertEqual(metaphone('Sapey', 4), 'SP') |
||
890 | self.assertEqual(metaphone('Sapp', 4), 'SP') |
||
891 | self.assertEqual(metaphone('Sappy', 4), 'SP') |
||
892 | self.assertEqual(metaphone('Sepey', 4), 'SP') |
||
893 | self.assertEqual(metaphone('Seppey', 4), 'SP') |
||
894 | self.assertEqual(metaphone('Sopp', 4), 'SP') |
||
895 | self.assertEqual(metaphone('Zoppie', 4), 'SP') |
||
896 | self.assertEqual(metaphone('Zoppo', 4), 'SP') |
||
897 | self.assertEqual(metaphone('Zupa', 4), 'SP') |
||
898 | self.assertEqual(metaphone('Zupo', 4), 'SP') |
||
899 | self.assertEqual(metaphone('Zuppa', 4), 'SP') |
||
900 | |||
901 | # assorted tests to complete code coverage |
||
902 | self.assertEqual(metaphone('Xavier'), 'SFR') |
||
903 | self.assertEqual(metaphone('Acacia'), 'AKX') |
||
904 | self.assertEqual(metaphone('Schuler'), 'SKLR') |
||
905 | self.assertEqual(metaphone('Sign'), 'SN') |
||
906 | self.assertEqual(metaphone('Signed'), 'SNT') |
||
907 | self.assertEqual(metaphone('Horatio'), 'HRX') |
||
908 | self.assertEqual(metaphone('Ignatio'), 'IKNX') |
||
909 | self.assertEqual(metaphone('Lucretia'), 'LKRX') |
||
910 | |||
911 | # assorted tests to complete branch coverage |
||
912 | self.assertEqual(metaphone('Lamb'), 'LM') |
||
913 | self.assertEqual(metaphone('science'), 'SNS') |
||
914 | |||
915 | # maxlength bounds tests |
||
916 | self.assertEqual(metaphone('Niall', maxlength=float('inf')), 'NL') |
||
917 | self.assertEqual(metaphone('Niall', maxlength=None), 'NL') |
||
918 | self.assertEqual(metaphone('Niall', maxlength=0), 'NL') |
||
919 | |||
920 | |||
921 | class DoubleMetaphoneTestCases(unittest.TestCase): |
||
922 | """Test Double Metaphone functions. |
||
923 | |||
924 | test cases for abydos.phonetic.double_metaphone |
||
925 | |||
926 | These test cases are copied from two sources: |
||
927 | https://github.com/oubiwann/metaphone/blob/master/metaphone/tests/test_metaphone.py |
||
928 | and |
||
929 | http://swoodbridge.com/DoubleMetaPhone/surnames.txt |
||
930 | |||
931 | Most test cases other than those in test_surnames and test_surnames4 come |
||
932 | from the former and are under the following license: |
||
933 | |||
934 | Copyright (c) 2007 Andrew Collins, Chris Leong |
||
935 | Copyright (c) 2009 Matthew Somerville |
||
936 | Copyright (c) 2010 Maximillian Dornseif, Richard Barran |
||
937 | Copyright (c) 2012 Duncan McGreggor |
||
938 | All rights reserved. |
||
939 | |||
940 | * Redistribution and use in source and binary forms, with or without |
||
941 | modification, are permitted provided that the following conditions |
||
942 | are met: |
||
943 | |||
944 | * Redistributions of source code must retain the above copyright |
||
945 | notice, this list of conditions and the following disclaimer. |
||
946 | |||
947 | * Redistributions in binary form must reproduce the above copyright |
||
948 | notice, this list of conditions and the following disclaimer in |
||
949 | the documentation and/or other materials provided with the |
||
950 | distribution. |
||
951 | |||
952 | Neither the name "Metaphone" nor the names of its contributors may be |
||
953 | used to endorse or promote products derived from this software without |
||
954 | specific prior written permission. |
||
955 | |||
956 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
||
957 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
||
958 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
||
959 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
||
960 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||
961 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
||
962 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
||
963 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
||
964 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||
965 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
||
966 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||
967 | |||
968 | test_surname and test_surname4 come from a set of tests for a PHP port |
||
969 | of Double Metaphone that is Copyright 2001, Stephen Woodbridge and |
||
970 | identified as 'freely distributable' |
||
971 | """ |
||
972 | |||
973 | def test_double_metaphone(self): |
||
974 | """Test abydos.phonetic.double_metaphone.""" |
||
975 | # base case |
||
976 | self.assertEqual(double_metaphone(''), ('', '')) |
||
977 | |||
978 | # single result |
||
979 | self.assertEqual(double_metaphone('aubrey'), ('APR', '')) |
||
980 | |||
981 | # double result |
||
982 | self.assertEqual(double_metaphone('richard'), ('RXRT', 'RKRT')) |
||
983 | |||
984 | # general word list |
||
985 | self.assertEqual(double_metaphone('Jose'), ('HS', '')) |
||
986 | self.assertEqual(double_metaphone('cambrillo'), ('KMPRL', 'KMPR')) |
||
987 | self.assertEqual(double_metaphone('otto'), ('AT', '')) |
||
988 | self.assertEqual(double_metaphone('aubrey'), ('APR', '')) |
||
989 | self.assertEqual(double_metaphone('maurice'), ('MRS', '')) |
||
990 | self.assertEqual(double_metaphone('auto'), ('AT', '')) |
||
991 | self.assertEqual(double_metaphone('maisey'), ('MS', '')) |
||
992 | self.assertEqual(double_metaphone('catherine'), ('K0RN', 'KTRN')) |
||
993 | self.assertEqual(double_metaphone('geoff'), ('JF', 'KF')) |
||
994 | self.assertEqual(double_metaphone('Chile'), ('XL', '')) |
||
995 | self.assertEqual(double_metaphone('katherine'), ('K0RN', 'KTRN')) |
||
996 | self.assertEqual(double_metaphone('steven'), ('STFN', '')) |
||
997 | self.assertEqual(double_metaphone('zhang'), ('JNK', '')) |
||
998 | self.assertEqual(double_metaphone('bob'), ('PP', '')) |
||
999 | self.assertEqual(double_metaphone('ray'), ('R', '')) |
||
1000 | self.assertEqual(double_metaphone('Tux'), ('TKS', '')) |
||
1001 | self.assertEqual(double_metaphone('bryan'), ('PRN', '')) |
||
1002 | self.assertEqual(double_metaphone('bryce'), ('PRS', '')) |
||
1003 | self.assertEqual(double_metaphone('Rapelje'), ('RPL', '')) |
||
1004 | self.assertEqual(double_metaphone('richard'), ('RXRT', 'RKRT')) |
||
1005 | self.assertEqual(double_metaphone('solilijs'), ('SLLS', '')) |
||
1006 | self.assertEqual(double_metaphone('Dallas'), ('TLS', '')) |
||
1007 | self.assertEqual(double_metaphone('Schwein'), ('XN', 'XFN')) |
||
1008 | self.assertEqual(double_metaphone('dave'), ('TF', '')) |
||
1009 | self.assertEqual(double_metaphone('eric'), ('ARK', '')) |
||
1010 | self.assertEqual(double_metaphone('Parachute'), ('PRKT', '')) |
||
1011 | self.assertEqual(double_metaphone('brian'), ('PRN', '')) |
||
1012 | self.assertEqual(double_metaphone('randy'), ('RNT', '')) |
||
1013 | self.assertEqual(double_metaphone('Through'), ('0R', 'TR')) |
||
1014 | self.assertEqual(double_metaphone('Nowhere'), ('NR', '')) |
||
1015 | self.assertEqual(double_metaphone('heidi'), ('HT', '')) |
||
1016 | self.assertEqual(double_metaphone('Arnow'), ('ARN', 'ARNF')) |
||
1017 | self.assertEqual(double_metaphone('Thumbail'), ('0MPL', 'TMPL')) |
||
1018 | |||
1019 | # homophones |
||
1020 | self.assertEqual(double_metaphone('tolled'), double_metaphone('told')) |
||
1021 | self.assertEqual(double_metaphone('katherine'), |
||
1022 | double_metaphone('catherine')) |
||
1023 | self.assertEqual(double_metaphone('brian'), double_metaphone('bryan')) |
||
1024 | |||
1025 | # similar names |
||
1026 | self.assertEqual(double_metaphone('Bartoš'), ('PRT', '')) |
||
1027 | self.assertEqual(double_metaphone('Bartosz'), ('PRTS', 'PRTX')) |
||
1028 | self.assertEqual(double_metaphone('Bartosch'), ('PRTX', '')) |
||
1029 | self.assertEqual(double_metaphone('Bartos'), ('PRTS', '')) |
||
1030 | self.assertEqual(list(set(double_metaphone('Jablonski')) |
||
1031 | .intersection(double_metaphone('Yablonsky'))), |
||
1032 | ['APLNSK']) |
||
1033 | self.assertEqual(list(set(double_metaphone('Smith')) |
||
1034 | .intersection(double_metaphone('Schmidt'))), |
||
1035 | ['XMT']) |
||
1036 | |||
1037 | # non-English Unicode |
||
1038 | self.assertEqual(double_metaphone('andestādītu'), ('ANTSTTT', '')) |
||
1039 | |||
1040 | # c-cedilla |
||
1041 | self.assertEqual(double_metaphone('français'), ('FRNS', 'FRNSS')) |
||
1042 | self.assertEqual(double_metaphone('garçon'), ('KRSN', '')) |
||
1043 | self.assertEqual(double_metaphone('leçon'), ('LSN', '')) |
||
1044 | |||
1045 | # German words |
||
1046 | self.assertEqual(double_metaphone('ach'), ('AK', '')) |
||
1047 | self.assertEqual(double_metaphone('bacher'), ('PKR', '')) |
||
1048 | self.assertEqual(double_metaphone('macher'), ('MKR', '')) |
||
1049 | |||
1050 | # Italian words |
||
1051 | self.assertEqual(double_metaphone('bacci'), ('PX', '')) |
||
1052 | self.assertEqual(double_metaphone('bertucci'), ('PRTX', '')) |
||
1053 | self.assertEqual(double_metaphone('bellocchio'), ('PLX', '')) |
||
1054 | self.assertEqual(double_metaphone('bacchus'), ('PKS', '')) |
||
1055 | self.assertEqual(double_metaphone('focaccia'), ('FKX', '')) |
||
1056 | self.assertEqual(double_metaphone('chianti'), ('KNT', '')) |
||
1057 | self.assertEqual(double_metaphone('tagliaro'), ('TKLR', 'TLR')) |
||
1058 | self.assertEqual(double_metaphone('biaggi'), ('PJ', 'PK')) |
||
1059 | |||
1060 | # Spanish words |
||
1061 | self.assertEqual(double_metaphone('bajador'), ('PJTR', 'PHTR')) |
||
1062 | self.assertEqual(double_metaphone('cabrillo'), ('KPRL', 'KPR')) |
||
1063 | self.assertEqual(double_metaphone('gallegos'), ('KLKS', 'KKS')) |
||
1064 | self.assertEqual(double_metaphone('San Jacinto'), ('SNHSNT', '')) |
||
1065 | |||
1066 | # French words |
||
1067 | self.assertEqual(double_metaphone('rogier'), ('RJ', 'RJR')) |
||
1068 | self.assertEqual(double_metaphone('breaux'), ('PR', '')) |
||
1069 | |||
1070 | # Slavic words |
||
1071 | self.assertEqual(double_metaphone('Wewski'), ('ASK', 'FFSK')) |
||
1072 | |||
1073 | # Chinese words |
||
1074 | self.assertEqual(double_metaphone('zhao'), ('J', '')) |
||
1075 | |||
1076 | # Dutch-origin words |
||
1077 | self.assertEqual(double_metaphone('school'), ('SKL', '')) |
||
1078 | self.assertEqual(double_metaphone('schooner'), ('SKNR', '')) |
||
1079 | self.assertEqual(double_metaphone('schermerhorn'), |
||
1080 | ('XRMRRN', 'SKRMRRN')) |
||
1081 | self.assertEqual(double_metaphone('schenker'), ('XNKR', 'SKNKR')) |
||
1082 | |||
1083 | # <ch> words |
||
1084 | self.assertEqual(double_metaphone('Charac'), ('KRK', '')) |
||
1085 | self.assertEqual(double_metaphone('Charis'), ('KRS', '')) |
||
1086 | self.assertEqual(double_metaphone('chord'), ('KRT', '')) |
||
1087 | self.assertEqual(double_metaphone('Chym'), ('KM', '')) |
||
1088 | self.assertEqual(double_metaphone('Chia'), ('K', '')) |
||
1089 | self.assertEqual(double_metaphone('chem'), ('KM', '')) |
||
1090 | self.assertEqual(double_metaphone('chore'), ('XR', '')) |
||
1091 | self.assertEqual(double_metaphone('orchestra'), ('ARKSTR', '')) |
||
1092 | self.assertEqual(double_metaphone('architect'), ('ARKTKT', '')) |
||
1093 | self.assertEqual(double_metaphone('orchid'), ('ARKT', '')) |
||
1094 | |||
1095 | # <cc> words |
||
1096 | self.assertEqual(double_metaphone('accident'), ('AKSTNT', '')) |
||
1097 | self.assertEqual(double_metaphone('accede'), ('AKST', '')) |
||
1098 | self.assertEqual(double_metaphone('succeed'), ('SKST', '')) |
||
1099 | |||
1100 | # <mc> words |
||
1101 | self.assertEqual(double_metaphone('mac caffrey'), ('MKFR', '')) |
||
1102 | self.assertEqual(double_metaphone('mac gregor'), ('MKRKR', '')) |
||
1103 | self.assertEqual(double_metaphone('mc crae'), ('MKR', '')) |
||
1104 | self.assertEqual(double_metaphone('mcclain'), ('MKLN', '')) |
||
1105 | |||
1106 | # <gh> words |
||
1107 | self.assertEqual(double_metaphone('laugh'), ('LF', '')) |
||
1108 | self.assertEqual(double_metaphone('cough'), ('KF', '')) |
||
1109 | self.assertEqual(double_metaphone('rough'), ('RF', '')) |
||
1110 | |||
1111 | # <g__> words |
||
1112 | self.assertEqual(double_metaphone('gya'), ('K', 'J')) |
||
1113 | self.assertEqual(double_metaphone('ges'), ('KS', 'JS')) |
||
1114 | self.assertEqual(double_metaphone('gep'), ('KP', 'JP')) |
||
1115 | self.assertEqual(double_metaphone('geb'), ('KP', 'JP')) |
||
1116 | self.assertEqual(double_metaphone('gel'), ('KL', 'JL')) |
||
1117 | self.assertEqual(double_metaphone('gey'), ('K', 'J')) |
||
1118 | self.assertEqual(double_metaphone('gib'), ('KP', 'JP')) |
||
1119 | self.assertEqual(double_metaphone('gil'), ('KL', 'JL')) |
||
1120 | self.assertEqual(double_metaphone('gin'), ('KN', 'JN')) |
||
1121 | self.assertEqual(double_metaphone('gie'), ('K', 'J')) |
||
1122 | self.assertEqual(double_metaphone('gei'), ('K', 'J')) |
||
1123 | self.assertEqual(double_metaphone('ger'), ('KR', 'JR')) |
||
1124 | self.assertEqual(double_metaphone('danger'), ('TNJR', 'TNKR')) |
||
1125 | self.assertEqual(double_metaphone('manager'), ('MNKR', 'MNJR')) |
||
1126 | self.assertEqual(double_metaphone('dowager'), ('TKR', 'TJR')) |
||
1127 | |||
1128 | # <pb> words |
||
1129 | self.assertEqual(double_metaphone('Campbell'), ('KMPL', '')) |
||
1130 | self.assertEqual(double_metaphone('raspberry'), ('RSPR', '')) |
||
1131 | |||
1132 | # <th> words |
||
1133 | self.assertEqual(double_metaphone('Thomas'), ('TMS', '')) |
||
1134 | self.assertEqual(double_metaphone('Thames'), ('TMS', '')) |
||
1135 | |||
1136 | # etc. (for code coverage) |
||
1137 | self.assertEqual(double_metaphone('Xavier'), ('SF', 'SFR')) |
||
1138 | self.assertEqual(double_metaphone('Michael'), ('MKL', 'MXL')) |
||
1139 | self.assertEqual(double_metaphone('Ignacio'), ('AKNS', 'ANX')) |
||
1140 | self.assertEqual(double_metaphone('Ajjam'), ('AJM', '')) |
||
1141 | self.assertEqual(double_metaphone('Akkad'), ('AKT', '')) |
||
1142 | self.assertEqual(double_metaphone('Año'), ('AN', '')) |
||
1143 | self.assertEqual(double_metaphone('Año'), double_metaphone('Anno')) |
||
1144 | self.assertEqual(double_metaphone('Caucasian'), ('KKSN', 'KKXN')) |
||
1145 | self.assertEqual(double_metaphone('Kaukasian'), ('KKSN', '')) |
||
1146 | self.assertEqual(double_metaphone('Zaqqum'), ('SKM', '')) |
||
1147 | self.assertEqual(double_metaphone('stevven'), ('STFN', '')) |
||
1148 | self.assertEqual(double_metaphone('Tuxx'), ('TKS', '')) |
||
1149 | self.assertEqual(double_metaphone('Ghiradelli'), ('JRTL', '')) |
||
1150 | self.assertEqual(double_metaphone('ghoul'), ('KL', '')) |
||
1151 | self.assertEqual(double_metaphone('hej'), ('HJ', 'H')) |
||
1152 | |||
1153 | # maxlength bounds tests |
||
1154 | self.assertEqual(double_metaphone('Niall', maxlength=float('inf')), |
||
1155 | ('NL', '')) |
||
1156 | self.assertEqual(double_metaphone('Niall', maxlength=None), ('NL', '')) |
||
1157 | self.assertEqual(double_metaphone('Niall', maxlength=0), ('NL', '')) |
||
1158 | |||
1159 | def test_double_metaphone_surnames(self): |
||
1160 | """Test abydos.phonetic.double_metaphone (surname data).""" |
||
1161 | self.assertEqual(double_metaphone(''), ('', '')) |
||
1162 | self.assertEqual(double_metaphone('ALLERTON'), ('ALRTN', '')) |
||
1163 | self.assertEqual(double_metaphone('Acton'), ('AKTN', '')) |
||
1164 | self.assertEqual(double_metaphone('Adams'), ('ATMS', '')) |
||
1165 | self.assertEqual(double_metaphone('Aggar'), ('AKR', '')) |
||
1166 | self.assertEqual(double_metaphone('Ahl'), ('AL', '')) |
||
1167 | self.assertEqual(double_metaphone('Aiken'), ('AKN', '')) |
||
1168 | self.assertEqual(double_metaphone('Alan'), ('ALN', '')) |
||
1169 | self.assertEqual(double_metaphone('Alcock'), ('ALKK', '')) |
||
1170 | self.assertEqual(double_metaphone('Alden'), ('ALTN', '')) |
||
1171 | self.assertEqual(double_metaphone('Aldham'), ('ALTM', '')) |
||
1172 | self.assertEqual(double_metaphone('Allen'), ('ALN', '')) |
||
1173 | self.assertEqual(double_metaphone('Allerton'), ('ALRTN', '')) |
||
1174 | self.assertEqual(double_metaphone('Alsop'), ('ALSP', '')) |
||
1175 | self.assertEqual(double_metaphone('Alwein'), ('ALN', '')) |
||
1176 | self.assertEqual(double_metaphone('Ambler'), ('AMPLR', '')) |
||
1177 | self.assertEqual(double_metaphone('Andevill'), ('ANTFL', '')) |
||
1178 | self.assertEqual(double_metaphone('Andrews'), ('ANTRS', '')) |
||
1179 | self.assertEqual(double_metaphone('Andreyco'), ('ANTRK', '')) |
||
1180 | self.assertEqual(double_metaphone('Andriesse'), ('ANTRS', '')) |
||
1181 | self.assertEqual(double_metaphone('Angier'), ('ANJ', 'ANJR')) |
||
1182 | self.assertEqual(double_metaphone('Annabel'), ('ANPL', '')) |
||
1183 | self.assertEqual(double_metaphone('Anne'), ('AN', '')) |
||
1184 | self.assertEqual(double_metaphone('Anstye'), ('ANST', '')) |
||
1185 | self.assertEqual(double_metaphone('Appling'), ('APLNK', '')) |
||
1186 | self.assertEqual(double_metaphone('Apuke'), ('APK', '')) |
||
1187 | self.assertEqual(double_metaphone('Arnold'), ('ARNLT', '')) |
||
1188 | self.assertEqual(double_metaphone('Ashby'), ('AXP', '')) |
||
1189 | self.assertEqual(double_metaphone('Astwood'), ('ASTT', '')) |
||
1190 | self.assertEqual(double_metaphone('Atkinson'), ('ATKNSN', '')) |
||
1191 | self.assertEqual(double_metaphone('Audley'), ('ATL', '')) |
||
1192 | self.assertEqual(double_metaphone('Austin'), ('ASTN', '')) |
||
1193 | self.assertEqual(double_metaphone('Avenal'), ('AFNL', '')) |
||
1194 | self.assertEqual(double_metaphone('Ayer'), ('AR', '')) |
||
1195 | self.assertEqual(double_metaphone('Ayot'), ('AT', '')) |
||
1196 | self.assertEqual(double_metaphone('Babbitt'), ('PPT', '')) |
||
1197 | self.assertEqual(double_metaphone('Bachelor'), ('PXLR', 'PKLR')) |
||
1198 | self.assertEqual(double_metaphone('Bachelour'), ('PXLR', 'PKLR')) |
||
1199 | self.assertEqual(double_metaphone('Bailey'), ('PL', '')) |
||
1200 | self.assertEqual(double_metaphone('Baivel'), ('PFL', '')) |
||
1201 | self.assertEqual(double_metaphone('Baker'), ('PKR', '')) |
||
1202 | self.assertEqual(double_metaphone('Baldwin'), ('PLTN', '')) |
||
1203 | self.assertEqual(double_metaphone('Balsley'), ('PLSL', '')) |
||
1204 | self.assertEqual(double_metaphone('Barber'), ('PRPR', '')) |
||
1205 | self.assertEqual(double_metaphone('Barker'), ('PRKR', '')) |
||
1206 | self.assertEqual(double_metaphone('Barlow'), ('PRL', 'PRLF')) |
||
1207 | self.assertEqual(double_metaphone('Barnard'), ('PRNRT', '')) |
||
1208 | self.assertEqual(double_metaphone('Barnes'), ('PRNS', '')) |
||
1209 | self.assertEqual(double_metaphone('Barnsley'), ('PRNSL', '')) |
||
1210 | self.assertEqual(double_metaphone('Barouxis'), ('PRKSS', '')) |
||
1211 | self.assertEqual(double_metaphone('Bartlet'), ('PRTLT', '')) |
||
1212 | self.assertEqual(double_metaphone('Basley'), ('PSL', '')) |
||
1213 | self.assertEqual(double_metaphone('Basset'), ('PST', '')) |
||
1214 | self.assertEqual(double_metaphone('Bassett'), ('PST', '')) |
||
1215 | self.assertEqual(double_metaphone('Batchlor'), ('PXLR', '')) |
||
1216 | self.assertEqual(double_metaphone('Bates'), ('PTS', '')) |
||
1217 | self.assertEqual(double_metaphone('Batson'), ('PTSN', '')) |
||
1218 | self.assertEqual(double_metaphone('Bayes'), ('PS', '')) |
||
1219 | self.assertEqual(double_metaphone('Bayley'), ('PL', '')) |
||
1220 | self.assertEqual(double_metaphone('Beale'), ('PL', '')) |
||
1221 | self.assertEqual(double_metaphone('Beauchamp'), ('PXMP', 'PKMP')) |
||
1222 | self.assertEqual(double_metaphone('Beauclerc'), ('PKLRK', '')) |
||
1223 | self.assertEqual(double_metaphone('Beech'), ('PK', '')) |
||
1224 | self.assertEqual(double_metaphone('Beers'), ('PRS', '')) |
||
1225 | self.assertEqual(double_metaphone('Beke'), ('PK', '')) |
||
1226 | self.assertEqual(double_metaphone('Belcher'), ('PLXR', 'PLKR')) |
||
1227 | self.assertEqual(double_metaphone('Benjamin'), ('PNJMN', '')) |
||
1228 | self.assertEqual(double_metaphone('Benningham'), ('PNNKM', '')) |
||
1229 | self.assertEqual(double_metaphone('Bereford'), ('PRFRT', '')) |
||
1230 | self.assertEqual(double_metaphone('Bergen'), ('PRJN', 'PRKN')) |
||
1231 | self.assertEqual(double_metaphone('Berkeley'), ('PRKL', '')) |
||
1232 | self.assertEqual(double_metaphone('Berry'), ('PR', '')) |
||
1233 | self.assertEqual(double_metaphone('Besse'), ('PS', '')) |
||
1234 | self.assertEqual(double_metaphone('Bessey'), ('PS', '')) |
||
1235 | self.assertEqual(double_metaphone('Bessiles'), ('PSLS', '')) |
||
1236 | self.assertEqual(double_metaphone('Bigelow'), ('PJL', 'PKLF')) |
||
1237 | self.assertEqual(double_metaphone('Bigg'), ('PK', '')) |
||
1238 | self.assertEqual(double_metaphone('Bigod'), ('PKT', '')) |
||
1239 | self.assertEqual(double_metaphone('Billings'), ('PLNKS', '')) |
||
1240 | self.assertEqual(double_metaphone('Bimper'), ('PMPR', '')) |
||
1241 | self.assertEqual(double_metaphone('Binker'), ('PNKR', '')) |
||
1242 | self.assertEqual(double_metaphone('Birdsill'), ('PRTSL', '')) |
||
1243 | self.assertEqual(double_metaphone('Bishop'), ('PXP', '')) |
||
1244 | self.assertEqual(double_metaphone('Black'), ('PLK', '')) |
||
1245 | self.assertEqual(double_metaphone('Blagge'), ('PLK', '')) |
||
1246 | self.assertEqual(double_metaphone('Blake'), ('PLK', '')) |
||
1247 | self.assertEqual(double_metaphone('Blanck'), ('PLNK', '')) |
||
1248 | self.assertEqual(double_metaphone('Bledsoe'), ('PLTS', '')) |
||
1249 | self.assertEqual(double_metaphone('Blennerhasset'), ('PLNRST', '')) |
||
1250 | self.assertEqual(double_metaphone('Blessing'), ('PLSNK', '')) |
||
1251 | self.assertEqual(double_metaphone('Blewett'), ('PLT', '')) |
||
1252 | self.assertEqual(double_metaphone('Bloctgoed'), ('PLKTKT', '')) |
||
1253 | self.assertEqual(double_metaphone('Bloetgoet'), ('PLTKT', '')) |
||
1254 | self.assertEqual(double_metaphone('Bloodgood'), ('PLTKT', '')) |
||
1255 | self.assertEqual(double_metaphone('Blossom'), ('PLSM', '')) |
||
1256 | self.assertEqual(double_metaphone('Blount'), ('PLNT', '')) |
||
1257 | self.assertEqual(double_metaphone('Bodine'), ('PTN', '')) |
||
1258 | self.assertEqual(double_metaphone('Bodman'), ('PTMN', '')) |
||
1259 | self.assertEqual(double_metaphone('BonCoeur'), ('PNKR', '')) |
||
1260 | self.assertEqual(double_metaphone('Bond'), ('PNT', '')) |
||
1261 | self.assertEqual(double_metaphone('Boscawen'), ('PSKN', '')) |
||
1262 | self.assertEqual(double_metaphone('Bosworth'), ('PSR0', 'PSRT')) |
||
1263 | self.assertEqual(double_metaphone('Bouchier'), ('PX', 'PKR')) |
||
1264 | self.assertEqual(double_metaphone('Bowne'), ('PN', '')) |
||
1265 | self.assertEqual(double_metaphone('Bradbury'), ('PRTPR', '')) |
||
1266 | self.assertEqual(double_metaphone('Bradder'), ('PRTR', '')) |
||
1267 | self.assertEqual(double_metaphone('Bradford'), ('PRTFRT', '')) |
||
1268 | self.assertEqual(double_metaphone('Bradstreet'), ('PRTSTRT', '')) |
||
1269 | self.assertEqual(double_metaphone('Braham'), ('PRHM', '')) |
||
1270 | self.assertEqual(double_metaphone('Brailsford'), ('PRLSFRT', '')) |
||
1271 | self.assertEqual(double_metaphone('Brainard'), ('PRNRT', '')) |
||
1272 | self.assertEqual(double_metaphone('Brandish'), ('PRNTX', '')) |
||
1273 | self.assertEqual(double_metaphone('Braun'), ('PRN', '')) |
||
1274 | self.assertEqual(double_metaphone('Brecc'), ('PRK', '')) |
||
1275 | self.assertEqual(double_metaphone('Brent'), ('PRNT', '')) |
||
1276 | self.assertEqual(double_metaphone('Brenton'), ('PRNTN', '')) |
||
1277 | self.assertEqual(double_metaphone('Briggs'), ('PRKS', '')) |
||
1278 | self.assertEqual(double_metaphone('Brigham'), ('PRM', '')) |
||
1279 | self.assertEqual(double_metaphone('Brobst'), ('PRPST', '')) |
||
1280 | self.assertEqual(double_metaphone('Brome'), ('PRM', '')) |
||
1281 | self.assertEqual(double_metaphone('Bronson'), ('PRNSN', '')) |
||
1282 | self.assertEqual(double_metaphone('Brooks'), ('PRKS', '')) |
||
1283 | self.assertEqual(double_metaphone('Brouillard'), ('PRLRT', '')) |
||
1284 | self.assertEqual(double_metaphone('Brown'), ('PRN', '')) |
||
1285 | self.assertEqual(double_metaphone('Browne'), ('PRN', '')) |
||
1286 | self.assertEqual(double_metaphone('Brownell'), ('PRNL', '')) |
||
1287 | self.assertEqual(double_metaphone('Bruley'), ('PRL', '')) |
||
1288 | self.assertEqual(double_metaphone('Bryant'), ('PRNT', '')) |
||
1289 | self.assertEqual(double_metaphone('Brzozowski'), |
||
1290 | ('PRSSSK', 'PRTSTSFSK')) |
||
1291 | self.assertEqual(double_metaphone('Buide'), ('PT', '')) |
||
1292 | self.assertEqual(double_metaphone('Bulmer'), ('PLMR', '')) |
||
1293 | self.assertEqual(double_metaphone('Bunker'), ('PNKR', '')) |
||
1294 | self.assertEqual(double_metaphone('Burden'), ('PRTN', '')) |
||
1295 | self.assertEqual(double_metaphone('Burge'), ('PRJ', 'PRK')) |
||
1296 | self.assertEqual(double_metaphone('Burgoyne'), ('PRKN', '')) |
||
1297 | self.assertEqual(double_metaphone('Burke'), ('PRK', '')) |
||
1298 | self.assertEqual(double_metaphone('Burnett'), ('PRNT', '')) |
||
1299 | self.assertEqual(double_metaphone('Burpee'), ('PRP', '')) |
||
1300 | self.assertEqual(double_metaphone('Bursley'), ('PRSL', '')) |
||
1301 | self.assertEqual(double_metaphone('Burton'), ('PRTN', '')) |
||
1302 | self.assertEqual(double_metaphone('Bushnell'), ('PXNL', '')) |
||
1303 | self.assertEqual(double_metaphone('Buss'), ('PS', '')) |
||
1304 | self.assertEqual(double_metaphone('Buswell'), ('PSL', '')) |
||
1305 | self.assertEqual(double_metaphone('Butler'), ('PTLR', '')) |
||
1306 | self.assertEqual(double_metaphone('Calkin'), ('KLKN', '')) |
||
1307 | self.assertEqual(double_metaphone('Canada'), ('KNT', '')) |
||
1308 | self.assertEqual(double_metaphone('Canmore'), ('KNMR', '')) |
||
1309 | self.assertEqual(double_metaphone('Canney'), ('KN', '')) |
||
1310 | self.assertEqual(double_metaphone('Capet'), ('KPT', '')) |
||
1311 | self.assertEqual(double_metaphone('Card'), ('KRT', '')) |
||
1312 | self.assertEqual(double_metaphone('Carman'), ('KRMN', '')) |
||
1313 | self.assertEqual(double_metaphone('Carpenter'), ('KRPNTR', '')) |
||
1314 | self.assertEqual(double_metaphone('Cartwright'), ('KRTRT', '')) |
||
1315 | self.assertEqual(double_metaphone('Casey'), ('KS', '')) |
||
1316 | self.assertEqual(double_metaphone('Catterfield'), ('KTRFLT', '')) |
||
1317 | self.assertEqual(double_metaphone('Ceeley'), ('SL', '')) |
||
1318 | self.assertEqual(double_metaphone('Chambers'), ('XMPRS', '')) |
||
1319 | self.assertEqual(double_metaphone('Champion'), ('XMPN', '')) |
||
1320 | self.assertEqual(double_metaphone('Chapman'), ('XPMN', '')) |
||
1321 | self.assertEqual(double_metaphone('Chase'), ('XS', '')) |
||
1322 | self.assertEqual(double_metaphone('Cheney'), ('XN', '')) |
||
1323 | self.assertEqual(double_metaphone('Chetwynd'), ('XTNT', '')) |
||
1324 | self.assertEqual(double_metaphone('Chevalier'), ('XFL', 'XFLR')) |
||
1325 | self.assertEqual(double_metaphone('Chillingsworth'), |
||
1326 | ('XLNKSR0', 'XLNKSRT')) |
||
1327 | self.assertEqual(double_metaphone('Christie'), ('KRST', '')) |
||
1328 | self.assertEqual(double_metaphone('Chubbuck'), ('XPK', '')) |
||
1329 | self.assertEqual(double_metaphone('Church'), ('XRX', 'XRK')) |
||
1330 | self.assertEqual(double_metaphone('Clark'), ('KLRK', '')) |
||
1331 | self.assertEqual(double_metaphone('Clarke'), ('KLRK', '')) |
||
1332 | self.assertEqual(double_metaphone('Cleare'), ('KLR', '')) |
||
1333 | self.assertEqual(double_metaphone('Clement'), ('KLMNT', '')) |
||
1334 | self.assertEqual(double_metaphone('Clerke'), ('KLRK', '')) |
||
1335 | self.assertEqual(double_metaphone('Clibben'), ('KLPN', '')) |
||
1336 | self.assertEqual(double_metaphone('Clifford'), ('KLFRT', '')) |
||
1337 | self.assertEqual(double_metaphone('Clivedon'), ('KLFTN', '')) |
||
1338 | self.assertEqual(double_metaphone('Close'), ('KLS', '')) |
||
1339 | self.assertEqual(double_metaphone('Clothilde'), ('KL0LT', 'KLTLT')) |
||
1340 | self.assertEqual(double_metaphone('Cobb'), ('KP', '')) |
||
1341 | self.assertEqual(double_metaphone('Coburn'), ('KPRN', '')) |
||
1342 | self.assertEqual(double_metaphone('Coburne'), ('KPRN', '')) |
||
1343 | self.assertEqual(double_metaphone('Cocke'), ('KK', '')) |
||
1344 | self.assertEqual(double_metaphone('Coffin'), ('KFN', '')) |
||
1345 | self.assertEqual(double_metaphone('Coffyn'), ('KFN', '')) |
||
1346 | self.assertEqual(double_metaphone('Colborne'), ('KLPRN', '')) |
||
1347 | self.assertEqual(double_metaphone('Colby'), ('KLP', '')) |
||
1348 | self.assertEqual(double_metaphone('Cole'), ('KL', '')) |
||
1349 | self.assertEqual(double_metaphone('Coleman'), ('KLMN', '')) |
||
1350 | self.assertEqual(double_metaphone('Collier'), ('KL', 'KLR')) |
||
1351 | self.assertEqual(double_metaphone('Compton'), ('KMPTN', '')) |
||
1352 | self.assertEqual(double_metaphone('Cone'), ('KN', '')) |
||
1353 | self.assertEqual(double_metaphone('Cook'), ('KK', '')) |
||
1354 | self.assertEqual(double_metaphone('Cooke'), ('KK', '')) |
||
1355 | self.assertEqual(double_metaphone('Cooper'), ('KPR', '')) |
||
1356 | self.assertEqual(double_metaphone('Copperthwaite'), ('KPR0T', 'KPRTT')) |
||
1357 | self.assertEqual(double_metaphone('Corbet'), ('KRPT', '')) |
||
1358 | self.assertEqual(double_metaphone('Corell'), ('KRL', '')) |
||
1359 | self.assertEqual(double_metaphone('Corey'), ('KR', '')) |
||
1360 | self.assertEqual(double_metaphone('Corlies'), ('KRLS', '')) |
||
1361 | self.assertEqual(double_metaphone('Corneliszen'), ('KRNLSN', 'KRNLXN')) |
||
1362 | self.assertEqual(double_metaphone('Cornelius'), ('KRNLS', '')) |
||
1363 | self.assertEqual(double_metaphone('Cornwallis'), ('KRNLS', '')) |
||
1364 | self.assertEqual(double_metaphone('Cosgrove'), ('KSKRF', '')) |
||
1365 | self.assertEqual(double_metaphone('Count of Brionne'), ('KNTFPRN', '')) |
||
1366 | self.assertEqual(double_metaphone('Covill'), ('KFL', '')) |
||
1367 | self.assertEqual(double_metaphone('Cowperthwaite'), ('KPR0T', 'KPRTT')) |
||
1368 | self.assertEqual(double_metaphone('Cowperwaite'), ('KPRT', '')) |
||
1369 | self.assertEqual(double_metaphone('Crane'), ('KRN', '')) |
||
1370 | self.assertEqual(double_metaphone('Creagmile'), ('KRKML', '')) |
||
1371 | self.assertEqual(double_metaphone('Crew'), ('KR', 'KRF')) |
||
1372 | self.assertEqual(double_metaphone('Crispin'), ('KRSPN', '')) |
||
1373 | self.assertEqual(double_metaphone('Crocker'), ('KRKR', '')) |
||
1374 | self.assertEqual(double_metaphone('Crockett'), ('KRKT', '')) |
||
1375 | self.assertEqual(double_metaphone('Crosby'), ('KRSP', '')) |
||
1376 | self.assertEqual(double_metaphone('Crump'), ('KRMP', '')) |
||
1377 | self.assertEqual(double_metaphone('Cunningham'), ('KNNKM', '')) |
||
1378 | self.assertEqual(double_metaphone('Curtis'), ('KRTS', '')) |
||
1379 | self.assertEqual(double_metaphone('Cutha'), ('K0', 'KT')) |
||
1380 | self.assertEqual(double_metaphone('Cutter'), ('KTR', '')) |
||
1381 | self.assertEqual(double_metaphone('D\'Aubigny'), ('TPN', 'TPKN')) |
||
1382 | self.assertEqual(double_metaphone('DAVIS'), ('TFS', '')) |
||
1383 | self.assertEqual(double_metaphone('Dabinott'), ('TPNT', '')) |
||
1384 | self.assertEqual(double_metaphone('Dacre'), ('TKR', '')) |
||
1385 | self.assertEqual(double_metaphone('Daggett'), ('TKT', '')) |
||
1386 | self.assertEqual(double_metaphone('Danvers'), ('TNFRS', '')) |
||
1387 | self.assertEqual(double_metaphone('Darcy'), ('TRS', '')) |
||
1388 | self.assertEqual(double_metaphone('Davis'), ('TFS', '')) |
||
1389 | self.assertEqual(double_metaphone('Dawn'), ('TN', '')) |
||
1390 | self.assertEqual(double_metaphone('Dawson'), ('TSN', '')) |
||
1391 | self.assertEqual(double_metaphone('Day'), ('T', '')) |
||
1392 | self.assertEqual(double_metaphone('Daye'), ('T', '')) |
||
1393 | self.assertEqual(double_metaphone('DeGrenier'), ('TKRN', 'TKRNR')) |
||
1394 | self.assertEqual(double_metaphone('Dean'), ('TN', '')) |
||
1395 | self.assertEqual(double_metaphone('Deekindaugh'), ('TKNT', '')) |
||
1396 | self.assertEqual(double_metaphone('Dennis'), ('TNS', '')) |
||
1397 | self.assertEqual(double_metaphone('Denny'), ('TN', '')) |
||
1398 | self.assertEqual(double_metaphone('Denton'), ('TNTN', '')) |
||
1399 | self.assertEqual(double_metaphone('Desborough'), ('TSPRF', '')) |
||
1400 | self.assertEqual(double_metaphone('Despenser'), ('TSPNSR', '')) |
||
1401 | self.assertEqual(double_metaphone('Deverill'), ('TFRL', '')) |
||
1402 | self.assertEqual(double_metaphone('Devine'), ('TFN', '')) |
||
1403 | self.assertEqual(double_metaphone('Dexter'), ('TKSTR', '')) |
||
1404 | self.assertEqual(double_metaphone('Dillaway'), ('TL', '')) |
||
1405 | self.assertEqual(double_metaphone('Dimmick'), ('TMK', '')) |
||
1406 | self.assertEqual(double_metaphone('Dinan'), ('TNN', '')) |
||
1407 | self.assertEqual(double_metaphone('Dix'), ('TKS', '')) |
||
1408 | self.assertEqual(double_metaphone('Doggett'), ('TKT', '')) |
||
1409 | self.assertEqual(double_metaphone('Donahue'), ('TNH', '')) |
||
1410 | self.assertEqual(double_metaphone('Dorfman'), ('TRFMN', '')) |
||
1411 | self.assertEqual(double_metaphone('Dorris'), ('TRS', '')) |
||
1412 | self.assertEqual(double_metaphone('Dow'), ('T', 'TF')) |
||
1413 | self.assertEqual(double_metaphone('Downey'), ('TN', '')) |
||
1414 | self.assertEqual(double_metaphone('Downing'), ('TNNK', '')) |
||
1415 | self.assertEqual(double_metaphone('Dowsett'), ('TST', '')) |
||
1416 | self.assertEqual(double_metaphone('Duck?'), ('TK', '')) |
||
1417 | self.assertEqual(double_metaphone('Dudley'), ('TTL', '')) |
||
1418 | self.assertEqual(double_metaphone('Duffy'), ('TF', '')) |
||
1419 | self.assertEqual(double_metaphone('Dunn'), ('TN', '')) |
||
1420 | self.assertEqual(double_metaphone('Dunsterville'), ('TNSTRFL', '')) |
||
1421 | self.assertEqual(double_metaphone('Durrant'), ('TRNT', '')) |
||
1422 | self.assertEqual(double_metaphone('Durrin'), ('TRN', '')) |
||
1423 | self.assertEqual(double_metaphone('Dustin'), ('TSTN', '')) |
||
1424 | self.assertEqual(double_metaphone('Duston'), ('TSTN', '')) |
||
1425 | self.assertEqual(double_metaphone('Eames'), ('AMS', '')) |
||
1426 | self.assertEqual(double_metaphone('Early'), ('ARL', '')) |
||
1427 | self.assertEqual(double_metaphone('Easty'), ('AST', '')) |
||
1428 | self.assertEqual(double_metaphone('Ebbett'), ('APT', '')) |
||
1429 | self.assertEqual(double_metaphone('Eberbach'), ('APRPK', '')) |
||
1430 | self.assertEqual(double_metaphone('Eberhard'), ('APRRT', '')) |
||
1431 | self.assertEqual(double_metaphone('Eddy'), ('AT', '')) |
||
1432 | self.assertEqual(double_metaphone('Edenden'), ('ATNTN', '')) |
||
1433 | self.assertEqual(double_metaphone('Edwards'), ('ATRTS', '')) |
||
1434 | self.assertEqual(double_metaphone('Eglinton'), ('AKLNTN', 'ALNTN')) |
||
1435 | self.assertEqual(double_metaphone('Eliot'), ('ALT', '')) |
||
1436 | self.assertEqual(double_metaphone('Elizabeth'), ('ALSP0', 'ALSPT')) |
||
1437 | self.assertEqual(double_metaphone('Ellis'), ('ALS', '')) |
||
1438 | self.assertEqual(double_metaphone('Ellison'), ('ALSN', '')) |
||
1439 | self.assertEqual(double_metaphone('Ellot'), ('ALT', '')) |
||
1440 | self.assertEqual(double_metaphone('Elny'), ('ALN', '')) |
||
1441 | self.assertEqual(double_metaphone('Elsner'), ('ALSNR', '')) |
||
1442 | self.assertEqual(double_metaphone('Emerson'), ('AMRSN', '')) |
||
1443 | self.assertEqual(double_metaphone('Empson'), ('AMPSN', '')) |
||
1444 | self.assertEqual(double_metaphone('Est'), ('AST', '')) |
||
1445 | self.assertEqual(double_metaphone('Estabrook'), ('ASTPRK', '')) |
||
1446 | self.assertEqual(double_metaphone('Estes'), ('ASTS', '')) |
||
1447 | self.assertEqual(double_metaphone('Estey'), ('AST', '')) |
||
1448 | self.assertEqual(double_metaphone('Evans'), ('AFNS', '')) |
||
1449 | self.assertEqual(double_metaphone('Fallowell'), ('FLL', '')) |
||
1450 | self.assertEqual(double_metaphone('Farnsworth'), ('FRNSR0', 'FRNSRT')) |
||
1451 | self.assertEqual(double_metaphone('Feake'), ('FK', '')) |
||
1452 | self.assertEqual(double_metaphone('Feke'), ('FK', '')) |
||
1453 | self.assertEqual(double_metaphone('Fellows'), ('FLS', '')) |
||
1454 | self.assertEqual(double_metaphone('Fettiplace'), ('FTPLS', '')) |
||
1455 | self.assertEqual(double_metaphone('Finney'), ('FN', '')) |
||
1456 | self.assertEqual(double_metaphone('Fischer'), ('FXR', 'FSKR')) |
||
1457 | self.assertEqual(double_metaphone('Fisher'), ('FXR', '')) |
||
1458 | self.assertEqual(double_metaphone('Fisk'), ('FSK', '')) |
||
1459 | self.assertEqual(double_metaphone('Fiske'), ('FSK', '')) |
||
1460 | self.assertEqual(double_metaphone('Fletcher'), ('FLXR', '')) |
||
1461 | self.assertEqual(double_metaphone('Folger'), ('FLKR', 'FLJR')) |
||
1462 | self.assertEqual(double_metaphone('Foliot'), ('FLT', '')) |
||
1463 | self.assertEqual(double_metaphone('Folyot'), ('FLT', '')) |
||
1464 | self.assertEqual(double_metaphone('Fones'), ('FNS', '')) |
||
1465 | self.assertEqual(double_metaphone('Fordham'), ('FRTM', '')) |
||
1466 | self.assertEqual(double_metaphone('Forstner'), ('FRSTNR', '')) |
||
1467 | self.assertEqual(double_metaphone('Fosten'), ('FSTN', '')) |
||
1468 | self.assertEqual(double_metaphone('Foster'), ('FSTR', '')) |
||
1469 | self.assertEqual(double_metaphone('Foulke'), ('FLK', '')) |
||
1470 | self.assertEqual(double_metaphone('Fowler'), ('FLR', '')) |
||
1471 | self.assertEqual(double_metaphone('Foxwell'), ('FKSL', '')) |
||
1472 | self.assertEqual(double_metaphone('Fraley'), ('FRL', '')) |
||
1473 | self.assertEqual(double_metaphone('Franceys'), ('FRNSS', '')) |
||
1474 | self.assertEqual(double_metaphone('Franke'), ('FRNK', '')) |
||
1475 | self.assertEqual(double_metaphone('Frascella'), ('FRSL', '')) |
||
1476 | self.assertEqual(double_metaphone('Frazer'), ('FRSR', '')) |
||
1477 | self.assertEqual(double_metaphone('Fredd'), ('FRT', '')) |
||
1478 | self.assertEqual(double_metaphone('Freeman'), ('FRMN', '')) |
||
1479 | self.assertEqual(double_metaphone('French'), ('FRNX', 'FRNK')) |
||
1480 | self.assertEqual(double_metaphone('Freville'), ('FRFL', '')) |
||
1481 | self.assertEqual(double_metaphone('Frey'), ('FR', '')) |
||
1482 | self.assertEqual(double_metaphone('Frick'), ('FRK', '')) |
||
1483 | self.assertEqual(double_metaphone('Frier'), ('FR', 'FRR')) |
||
1484 | self.assertEqual(double_metaphone('Froe'), ('FR', '')) |
||
1485 | self.assertEqual(double_metaphone('Frorer'), ('FRRR', '')) |
||
1486 | self.assertEqual(double_metaphone('Frost'), ('FRST', '')) |
||
1487 | self.assertEqual(double_metaphone('Frothingham'), ('FR0NKM', 'FRTNKM')) |
||
1488 | self.assertEqual(double_metaphone('Fry'), ('FR', '')) |
||
1489 | self.assertEqual(double_metaphone('Gaffney'), ('KFN', '')) |
||
1490 | self.assertEqual(double_metaphone('Gage'), ('KJ', 'KK')) |
||
1491 | self.assertEqual(double_metaphone('Gallion'), ('KLN', '')) |
||
1492 | self.assertEqual(double_metaphone('Gallishan'), ('KLXN', '')) |
||
1493 | self.assertEqual(double_metaphone('Gamble'), ('KMPL', '')) |
||
1494 | self.assertEqual(double_metaphone('Garbrand'), ('KRPRNT', '')) |
||
1495 | self.assertEqual(double_metaphone('Gardner'), ('KRTNR', '')) |
||
1496 | self.assertEqual(double_metaphone('Garrett'), ('KRT', '')) |
||
1497 | self.assertEqual(double_metaphone('Gassner'), ('KSNR', '')) |
||
1498 | self.assertEqual(double_metaphone('Gater'), ('KTR', '')) |
||
1499 | self.assertEqual(double_metaphone('Gaunt'), ('KNT', '')) |
||
1500 | self.assertEqual(double_metaphone('Gayer'), ('KR', '')) |
||
1501 | self.assertEqual(double_metaphone('Gerken'), ('KRKN', 'JRKN')) |
||
1502 | self.assertEqual(double_metaphone('Gerritsen'), ('KRTSN', 'JRTSN')) |
||
1503 | self.assertEqual(double_metaphone('Gibbs'), ('KPS', 'JPS')) |
||
1504 | self.assertEqual(double_metaphone('Giffard'), ('JFRT', 'KFRT')) |
||
1505 | self.assertEqual(double_metaphone('Gilbert'), ('KLPRT', 'JLPRT')) |
||
1506 | self.assertEqual(double_metaphone('Gill'), ('KL', 'JL')) |
||
1507 | self.assertEqual(double_metaphone('Gilman'), ('KLMN', 'JLMN')) |
||
1508 | self.assertEqual(double_metaphone('Glass'), ('KLS', '')) |
||
1509 | self.assertEqual(double_metaphone('GoddardGifford'), ('KTRJFRT', '')) |
||
1510 | self.assertEqual(double_metaphone('Godfrey'), ('KTFR', '')) |
||
1511 | self.assertEqual(double_metaphone('Godwin'), ('KTN', '')) |
||
1512 | self.assertEqual(double_metaphone('Goodale'), ('KTL', '')) |
||
1513 | self.assertEqual(double_metaphone('Goodnow'), ('KTN', 'KTNF')) |
||
1514 | self.assertEqual(double_metaphone('Gorham'), ('KRM', '')) |
||
1515 | self.assertEqual(double_metaphone('Goseline'), ('KSLN', '')) |
||
1516 | self.assertEqual(double_metaphone('Gott'), ('KT', '')) |
||
1517 | self.assertEqual(double_metaphone('Gould'), ('KLT', '')) |
||
1518 | self.assertEqual(double_metaphone('Grafton'), ('KRFTN', '')) |
||
1519 | self.assertEqual(double_metaphone('Grant'), ('KRNT', '')) |
||
1520 | self.assertEqual(double_metaphone('Gray'), ('KR', '')) |
||
1521 | self.assertEqual(double_metaphone('Green'), ('KRN', '')) |
||
1522 | self.assertEqual(double_metaphone('Griffin'), ('KRFN', '')) |
||
1523 | self.assertEqual(double_metaphone('Grill'), ('KRL', '')) |
||
1524 | self.assertEqual(double_metaphone('Grim'), ('KRM', '')) |
||
1525 | self.assertEqual(double_metaphone('Grisgonelle'), ('KRSKNL', '')) |
||
1526 | self.assertEqual(double_metaphone('Gross'), ('KRS', '')) |
||
1527 | self.assertEqual(double_metaphone('Guba'), ('KP', '')) |
||
1528 | self.assertEqual(double_metaphone('Gybbes'), ('KPS', 'JPS')) |
||
1529 | self.assertEqual(double_metaphone('Haburne'), ('HPRN', '')) |
||
1530 | self.assertEqual(double_metaphone('Hackburne'), ('HKPRN', '')) |
||
1531 | self.assertEqual(double_metaphone('Haddon?'), ('HTN', '')) |
||
1532 | self.assertEqual(double_metaphone('Haines'), ('HNS', '')) |
||
1533 | self.assertEqual(double_metaphone('Hale'), ('HL', '')) |
||
1534 | self.assertEqual(double_metaphone('Hall'), ('HL', '')) |
||
1535 | self.assertEqual(double_metaphone('Hallet'), ('HLT', '')) |
||
1536 | self.assertEqual(double_metaphone('Hallock'), ('HLK', '')) |
||
1537 | self.assertEqual(double_metaphone('Halstead'), ('HLSTT', '')) |
||
1538 | self.assertEqual(double_metaphone('Hammond'), ('HMNT', '')) |
||
1539 | self.assertEqual(double_metaphone('Hance'), ('HNS', '')) |
||
1540 | self.assertEqual(double_metaphone('Handy'), ('HNT', '')) |
||
1541 | self.assertEqual(double_metaphone('Hanson'), ('HNSN', '')) |
||
1542 | self.assertEqual(double_metaphone('Harasek'), ('HRSK', '')) |
||
1543 | self.assertEqual(double_metaphone('Harcourt'), ('HRKRT', '')) |
||
1544 | self.assertEqual(double_metaphone('Hardy'), ('HRT', '')) |
||
1545 | self.assertEqual(double_metaphone('Harlock'), ('HRLK', '')) |
||
1546 | self.assertEqual(double_metaphone('Harris'), ('HRS', '')) |
||
1547 | self.assertEqual(double_metaphone('Hartley'), ('HRTL', '')) |
||
1548 | self.assertEqual(double_metaphone('Harvey'), ('HRF', '')) |
||
1549 | self.assertEqual(double_metaphone('Harvie'), ('HRF', '')) |
||
1550 | self.assertEqual(double_metaphone('Harwood'), ('HRT', '')) |
||
1551 | self.assertEqual(double_metaphone('Hathaway'), ('H0', 'HT')) |
||
1552 | self.assertEqual(double_metaphone('Haukeness'), ('HKNS', '')) |
||
1553 | self.assertEqual(double_metaphone('Hawkes'), ('HKS', '')) |
||
1554 | self.assertEqual(double_metaphone('Hawkhurst'), ('HKRST', '')) |
||
1555 | self.assertEqual(double_metaphone('Hawkins'), ('HKNS', '')) |
||
1556 | self.assertEqual(double_metaphone('Hawley'), ('HL', '')) |
||
1557 | self.assertEqual(double_metaphone('Heald'), ('HLT', '')) |
||
1558 | self.assertEqual(double_metaphone('Helsdon'), ('HLSTN', '')) |
||
1559 | self.assertEqual(double_metaphone('Hemenway'), ('HMN', '')) |
||
1560 | self.assertEqual(double_metaphone('Hemmenway'), ('HMN', '')) |
||
1561 | self.assertEqual(double_metaphone('Henck'), ('HNK', '')) |
||
1562 | self.assertEqual(double_metaphone('Henderson'), ('HNTRSN', '')) |
||
1563 | self.assertEqual(double_metaphone('Hendricks'), ('HNTRKS', '')) |
||
1564 | self.assertEqual(double_metaphone('Hersey'), ('HRS', '')) |
||
1565 | self.assertEqual(double_metaphone('Hewes'), ('HS', '')) |
||
1566 | self.assertEqual(double_metaphone('Heyman'), ('HMN', '')) |
||
1567 | self.assertEqual(double_metaphone('Hicks'), ('HKS', '')) |
||
1568 | self.assertEqual(double_metaphone('Hidden'), ('HTN', '')) |
||
1569 | self.assertEqual(double_metaphone('Higgs'), ('HKS', '')) |
||
1570 | self.assertEqual(double_metaphone('Hill'), ('HL', '')) |
||
1571 | self.assertEqual(double_metaphone('Hills'), ('HLS', '')) |
||
1572 | self.assertEqual(double_metaphone('Hinckley'), ('HNKL', '')) |
||
1573 | self.assertEqual(double_metaphone('Hipwell'), ('HPL', '')) |
||
1574 | self.assertEqual(double_metaphone('Hobart'), ('HPRT', '')) |
||
1575 | self.assertEqual(double_metaphone('Hoben'), ('HPN', '')) |
||
1576 | self.assertEqual(double_metaphone('Hoffmann'), ('HFMN', '')) |
||
1577 | self.assertEqual(double_metaphone('Hogan'), ('HKN', '')) |
||
1578 | self.assertEqual(double_metaphone('Holmes'), ('HLMS', '')) |
||
1579 | self.assertEqual(double_metaphone('Hoo'), ('H', '')) |
||
1580 | self.assertEqual(double_metaphone('Hooker'), ('HKR', '')) |
||
1581 | self.assertEqual(double_metaphone('Hopcott'), ('HPKT', '')) |
||
1582 | self.assertEqual(double_metaphone('Hopkins'), ('HPKNS', '')) |
||
1583 | self.assertEqual(double_metaphone('Hopkinson'), ('HPKNSN', '')) |
||
1584 | self.assertEqual(double_metaphone('Hornsey'), ('HRNS', '')) |
||
1585 | self.assertEqual(double_metaphone('Houckgeest'), ('HKJST', 'HKKST')) |
||
1586 | self.assertEqual(double_metaphone('Hough'), ('H', '')) |
||
1587 | self.assertEqual(double_metaphone('Houstin'), ('HSTN', '')) |
||
1588 | self.assertEqual(double_metaphone('How'), ('H', 'HF')) |
||
1589 | self.assertEqual(double_metaphone('Howe'), ('H', '')) |
||
1590 | self.assertEqual(double_metaphone('Howland'), ('HLNT', '')) |
||
1591 | self.assertEqual(double_metaphone('Hubner'), ('HPNR', '')) |
||
1592 | self.assertEqual(double_metaphone('Hudnut'), ('HTNT', '')) |
||
1593 | self.assertEqual(double_metaphone('Hughes'), ('HS', '')) |
||
1594 | self.assertEqual(double_metaphone('Hull'), ('HL', '')) |
||
1595 | self.assertEqual(double_metaphone('Hulme'), ('HLM', '')) |
||
1596 | self.assertEqual(double_metaphone('Hume'), ('HM', '')) |
||
1597 | self.assertEqual(double_metaphone('Hundertumark'), ('HNTRTMRK', '')) |
||
1598 | self.assertEqual(double_metaphone('Hundley'), ('HNTL', '')) |
||
1599 | self.assertEqual(double_metaphone('Hungerford'), |
||
1600 | ('HNKRFRT', 'HNJRFRT')) |
||
1601 | self.assertEqual(double_metaphone('Hunt'), ('HNT', '')) |
||
1602 | self.assertEqual(double_metaphone('Hurst'), ('HRST', '')) |
||
1603 | self.assertEqual(double_metaphone('Husbands'), ('HSPNTS', '')) |
||
1604 | self.assertEqual(double_metaphone('Hussey'), ('HS', '')) |
||
1605 | self.assertEqual(double_metaphone('Husted'), ('HSTT', '')) |
||
1606 | self.assertEqual(double_metaphone('Hutchins'), ('HXNS', '')) |
||
1607 | self.assertEqual(double_metaphone('Hutchinson'), ('HXNSN', '')) |
||
1608 | self.assertEqual(double_metaphone('Huttinger'), ('HTNKR', 'HTNJR')) |
||
1609 | self.assertEqual(double_metaphone('Huybertsen'), ('HPRTSN', '')) |
||
1610 | self.assertEqual(double_metaphone('Iddenden'), ('ATNTN', '')) |
||
1611 | self.assertEqual(double_metaphone('Ingraham'), ('ANKRHM', '')) |
||
1612 | self.assertEqual(double_metaphone('Ives'), ('AFS', '')) |
||
1613 | self.assertEqual(double_metaphone('Jackson'), ('JKSN', 'AKSN')) |
||
1614 | self.assertEqual(double_metaphone('Jacob'), ('JKP', 'AKP')) |
||
1615 | self.assertEqual(double_metaphone('Jans'), ('JNS', 'ANS')) |
||
1616 | self.assertEqual(double_metaphone('Jenkins'), ('JNKNS', 'ANKNS')) |
||
1617 | self.assertEqual(double_metaphone('Jewett'), ('JT', 'AT')) |
||
1618 | self.assertEqual(double_metaphone('Jewitt'), ('JT', 'AT')) |
||
1619 | self.assertEqual(double_metaphone('Johnson'), ('JNSN', 'ANSN')) |
||
1620 | self.assertEqual(double_metaphone('Jones'), ('JNS', 'ANS')) |
||
1621 | self.assertEqual(double_metaphone('Josephine'), ('JSFN', 'HSFN')) |
||
1622 | self.assertEqual(double_metaphone('Judd'), ('JT', 'AT')) |
||
1623 | self.assertEqual(double_metaphone('June'), ('JN', 'AN')) |
||
1624 | self.assertEqual(double_metaphone('Kamarowska'), ('KMRSK', '')) |
||
1625 | self.assertEqual(double_metaphone('Kay'), ('K', '')) |
||
1626 | self.assertEqual(double_metaphone('Kelley'), ('KL', '')) |
||
1627 | self.assertEqual(double_metaphone('Kelly'), ('KL', '')) |
||
1628 | self.assertEqual(double_metaphone('Keymber'), ('KMPR', '')) |
||
1629 | self.assertEqual(double_metaphone('Keynes'), ('KNS', '')) |
||
1630 | self.assertEqual(double_metaphone('Kilham'), ('KLM', '')) |
||
1631 | self.assertEqual(double_metaphone('Kim'), ('KM', '')) |
||
1632 | self.assertEqual(double_metaphone('Kimball'), ('KMPL', '')) |
||
1633 | self.assertEqual(double_metaphone('King'), ('KNK', '')) |
||
1634 | self.assertEqual(double_metaphone('Kinsey'), ('KNS', '')) |
||
1635 | self.assertEqual(double_metaphone('Kirk'), ('KRK', '')) |
||
1636 | self.assertEqual(double_metaphone('Kirton'), ('KRTN', '')) |
||
1637 | self.assertEqual(double_metaphone('Kistler'), ('KSTLR', '')) |
||
1638 | self.assertEqual(double_metaphone('Kitchen'), ('KXN', '')) |
||
1639 | self.assertEqual(double_metaphone('Kitson'), ('KTSN', '')) |
||
1640 | self.assertEqual(double_metaphone('Klett'), ('KLT', '')) |
||
1641 | self.assertEqual(double_metaphone('Kline'), ('KLN', '')) |
||
1642 | self.assertEqual(double_metaphone('Knapp'), ('NP', '')) |
||
1643 | self.assertEqual(double_metaphone('Knight'), ('NT', '')) |
||
1644 | self.assertEqual(double_metaphone('Knote'), ('NT', '')) |
||
1645 | self.assertEqual(double_metaphone('Knott'), ('NT', '')) |
||
1646 | self.assertEqual(double_metaphone('Knox'), ('NKS', '')) |
||
1647 | self.assertEqual(double_metaphone('Koeller'), ('KLR', '')) |
||
1648 | self.assertEqual(double_metaphone('La Pointe'), ('LPNT', '')) |
||
1649 | self.assertEqual(double_metaphone('LaPlante'), ('LPLNT', '')) |
||
1650 | self.assertEqual(double_metaphone('Laimbeer'), ('LMPR', '')) |
||
1651 | self.assertEqual(double_metaphone('Lamb'), ('LMP', '')) |
||
1652 | self.assertEqual(double_metaphone('Lambertson'), ('LMPRTSN', '')) |
||
1653 | self.assertEqual(double_metaphone('Lancto'), ('LNKT', '')) |
||
1654 | self.assertEqual(double_metaphone('Landry'), ('LNTR', '')) |
||
1655 | self.assertEqual(double_metaphone('Lane'), ('LN', '')) |
||
1656 | self.assertEqual(double_metaphone('Langendyck'), ('LNJNTK', 'LNKNTK')) |
||
1657 | self.assertEqual(double_metaphone('Langer'), ('LNKR', 'LNJR')) |
||
1658 | self.assertEqual(double_metaphone('Langford'), ('LNKFRT', '')) |
||
1659 | self.assertEqual(double_metaphone('Lantersee'), ('LNTRS', '')) |
||
1660 | self.assertEqual(double_metaphone('Laquer'), ('LKR', '')) |
||
1661 | self.assertEqual(double_metaphone('Larkin'), ('LRKN', '')) |
||
1662 | self.assertEqual(double_metaphone('Latham'), ('LTM', '')) |
||
1663 | self.assertEqual(double_metaphone('Lathrop'), ('L0RP', 'LTRP')) |
||
1664 | self.assertEqual(double_metaphone('Lauter'), ('LTR', '')) |
||
1665 | self.assertEqual(double_metaphone('Lawrence'), ('LRNS', '')) |
||
1666 | self.assertEqual(double_metaphone('Leach'), ('LK', '')) |
||
1667 | self.assertEqual(double_metaphone('Leager'), ('LKR', 'LJR')) |
||
1668 | self.assertEqual(double_metaphone('Learned'), ('LRNT', '')) |
||
1669 | self.assertEqual(double_metaphone('Leavitt'), ('LFT', '')) |
||
1670 | self.assertEqual(double_metaphone('Lee'), ('L', '')) |
||
1671 | self.assertEqual(double_metaphone('Leete'), ('LT', '')) |
||
1672 | self.assertEqual(double_metaphone('Leggett'), ('LKT', '')) |
||
1673 | self.assertEqual(double_metaphone('Leland'), ('LLNT', '')) |
||
1674 | self.assertEqual(double_metaphone('Leonard'), ('LNRT', '')) |
||
1675 | self.assertEqual(double_metaphone('Lester'), ('LSTR', '')) |
||
1676 | self.assertEqual(double_metaphone('Lestrange'), ('LSTRNJ', 'LSTRNK')) |
||
1677 | self.assertEqual(double_metaphone('Lethem'), ('L0M', 'LTM')) |
||
1678 | self.assertEqual(double_metaphone('Levine'), ('LFN', '')) |
||
1679 | self.assertEqual(double_metaphone('Lewes'), ('LS', '')) |
||
1680 | self.assertEqual(double_metaphone('Lewis'), ('LS', '')) |
||
1681 | self.assertEqual(double_metaphone('Lincoln'), ('LNKLN', '')) |
||
1682 | self.assertEqual(double_metaphone('Lindsey'), ('LNTS', '')) |
||
1683 | self.assertEqual(double_metaphone('Linher'), ('LNR', '')) |
||
1684 | self.assertEqual(double_metaphone('Lippet'), ('LPT', '')) |
||
1685 | self.assertEqual(double_metaphone('Lippincott'), ('LPNKT', '')) |
||
1686 | self.assertEqual(double_metaphone('Lockwood'), ('LKT', '')) |
||
1687 | self.assertEqual(double_metaphone('Loines'), ('LNS', '')) |
||
1688 | self.assertEqual(double_metaphone('Lombard'), ('LMPRT', '')) |
||
1689 | self.assertEqual(double_metaphone('Long'), ('LNK', '')) |
||
1690 | self.assertEqual(double_metaphone('Longespee'), ('LNJSP', 'LNKSP')) |
||
1691 | self.assertEqual(double_metaphone('Look'), ('LK', '')) |
||
1692 | self.assertEqual(double_metaphone('Lounsberry'), ('LNSPR', '')) |
||
1693 | self.assertEqual(double_metaphone('Lounsbury'), ('LNSPR', '')) |
||
1694 | self.assertEqual(double_metaphone('Louthe'), ('L0', 'LT')) |
||
1695 | self.assertEqual(double_metaphone('Loveyne'), ('LFN', '')) |
||
1696 | self.assertEqual(double_metaphone('Lowe'), ('L', '')) |
||
1697 | self.assertEqual(double_metaphone('Ludlam'), ('LTLM', '')) |
||
1698 | self.assertEqual(double_metaphone('Lumbard'), ('LMPRT', '')) |
||
1699 | self.assertEqual(double_metaphone('Lund'), ('LNT', '')) |
||
1700 | self.assertEqual(double_metaphone('Luno'), ('LN', '')) |
||
1701 | self.assertEqual(double_metaphone('Lutz'), ('LTS', '')) |
||
1702 | self.assertEqual(double_metaphone('Lydia'), ('LT', '')) |
||
1703 | self.assertEqual(double_metaphone('Lynne'), ('LN', '')) |
||
1704 | self.assertEqual(double_metaphone('Lyon'), ('LN', '')) |
||
1705 | self.assertEqual(double_metaphone('MacAlpin'), ('MKLPN', '')) |
||
1706 | self.assertEqual(double_metaphone('MacBricc'), ('MKPRK', '')) |
||
1707 | self.assertEqual(double_metaphone('MacCrinan'), ('MKRNN', '')) |
||
1708 | self.assertEqual(double_metaphone('MacKenneth'), ('MKN0', 'MKNT')) |
||
1709 | self.assertEqual(double_metaphone('MacMael nam Bo'), ('MKMLNMP', '')) |
||
1710 | self.assertEqual(double_metaphone('MacMurchada'), ('MKMRXT', 'MKMRKT')) |
||
1711 | self.assertEqual(double_metaphone('Macomber'), ('MKMPR', '')) |
||
1712 | self.assertEqual(double_metaphone('Macy'), ('MS', '')) |
||
1713 | self.assertEqual(double_metaphone('Magnus'), ('MNS', 'MKNS')) |
||
1714 | self.assertEqual(double_metaphone('Mahien'), ('MHN', '')) |
||
1715 | self.assertEqual(double_metaphone('Malmains'), ('MLMNS', '')) |
||
1716 | self.assertEqual(double_metaphone('Malory'), ('MLR', '')) |
||
1717 | self.assertEqual(double_metaphone('Mancinelli'), ('MNSNL', '')) |
||
1718 | self.assertEqual(double_metaphone('Mancini'), ('MNSN', '')) |
||
1719 | self.assertEqual(double_metaphone('Mann'), ('MN', '')) |
||
1720 | self.assertEqual(double_metaphone('Manning'), ('MNNK', '')) |
||
1721 | self.assertEqual(double_metaphone('Manter'), ('MNTR', '')) |
||
1722 | self.assertEqual(double_metaphone('Marion'), ('MRN', '')) |
||
1723 | self.assertEqual(double_metaphone('Marley'), ('MRL', '')) |
||
1724 | self.assertEqual(double_metaphone('Marmion'), ('MRMN', '')) |
||
1725 | self.assertEqual(double_metaphone('Marquart'), ('MRKRT', '')) |
||
1726 | self.assertEqual(double_metaphone('Marsh'), ('MRX', '')) |
||
1727 | self.assertEqual(double_metaphone('Marshal'), ('MRXL', '')) |
||
1728 | self.assertEqual(double_metaphone('Marshall'), ('MRXL', '')) |
||
1729 | self.assertEqual(double_metaphone('Martel'), ('MRTL', '')) |
||
1730 | self.assertEqual(double_metaphone('Martha'), ('MR0', 'MRT')) |
||
1731 | self.assertEqual(double_metaphone('Martin'), ('MRTN', '')) |
||
1732 | self.assertEqual(double_metaphone('Marturano'), ('MRTRN', '')) |
||
1733 | self.assertEqual(double_metaphone('Marvin'), ('MRFN', '')) |
||
1734 | self.assertEqual(double_metaphone('Mary'), ('MR', '')) |
||
1735 | self.assertEqual(double_metaphone('Mason'), ('MSN', '')) |
||
1736 | self.assertEqual(double_metaphone('Maxwell'), ('MKSL', '')) |
||
1737 | self.assertEqual(double_metaphone('Mayhew'), ('MH', 'MHF')) |
||
1738 | self.assertEqual(double_metaphone('McAllaster'), ('MKLSTR', '')) |
||
1739 | self.assertEqual(double_metaphone('McAllister'), ('MKLSTR', '')) |
||
1740 | self.assertEqual(double_metaphone('McConnell'), ('MKNL', '')) |
||
1741 | self.assertEqual(double_metaphone('McFarland'), ('MKFRLNT', '')) |
||
1742 | self.assertEqual(double_metaphone('McIlroy'), ('MSLR', '')) |
||
1743 | self.assertEqual(double_metaphone('McNair'), ('MKNR', '')) |
||
1744 | self.assertEqual(double_metaphone('McNair-Landry'), ('MKNRLNTR', '')) |
||
1745 | self.assertEqual(double_metaphone('McRaven'), ('MKRFN', '')) |
||
1746 | self.assertEqual(double_metaphone('Mead'), ('MT', '')) |
||
1747 | self.assertEqual(double_metaphone('Meade'), ('MT', '')) |
||
1748 | self.assertEqual(double_metaphone('Meck'), ('MK', '')) |
||
1749 | self.assertEqual(double_metaphone('Melton'), ('MLTN', '')) |
||
1750 | self.assertEqual(double_metaphone('Mendenhall'), ('MNTNL', '')) |
||
1751 | self.assertEqual(double_metaphone('Mering'), ('MRNK', '')) |
||
1752 | self.assertEqual(double_metaphone('Merrick'), ('MRK', '')) |
||
1753 | self.assertEqual(double_metaphone('Merry'), ('MR', '')) |
||
1754 | self.assertEqual(double_metaphone('Mighill'), ('ML', '')) |
||
1755 | self.assertEqual(double_metaphone('Miller'), ('MLR', '')) |
||
1756 | self.assertEqual(double_metaphone('Milton'), ('MLTN', '')) |
||
1757 | self.assertEqual(double_metaphone('Mohun'), ('MHN', '')) |
||
1758 | self.assertEqual(double_metaphone('Montague'), ('MNTK', '')) |
||
1759 | self.assertEqual(double_metaphone('Montboucher'), ('MNTPXR', 'MNTPKR')) |
||
1760 | self.assertEqual(double_metaphone('Moore'), ('MR', '')) |
||
1761 | self.assertEqual(double_metaphone('Morrel'), ('MRL', '')) |
||
1762 | self.assertEqual(double_metaphone('Morrill'), ('MRL', '')) |
||
1763 | self.assertEqual(double_metaphone('Morris'), ('MRS', '')) |
||
1764 | self.assertEqual(double_metaphone('Morton'), ('MRTN', '')) |
||
1765 | self.assertEqual(double_metaphone('Moton'), ('MTN', '')) |
||
1766 | self.assertEqual(double_metaphone('Muir'), ('MR', '')) |
||
1767 | self.assertEqual(double_metaphone('Mulferd'), ('MLFRT', '')) |
||
1768 | self.assertEqual(double_metaphone('Mullins'), ('MLNS', '')) |
||
1769 | self.assertEqual(double_metaphone('Mulso'), ('MLS', '')) |
||
1770 | self.assertEqual(double_metaphone('Munger'), ('MNKR', 'MNJR')) |
||
1771 | self.assertEqual(double_metaphone('Munt'), ('MNT', '')) |
||
1772 | self.assertEqual(double_metaphone('Murchad'), ('MRXT', 'MRKT')) |
||
1773 | self.assertEqual(double_metaphone('Murdock'), ('MRTK', '')) |
||
1774 | self.assertEqual(double_metaphone('Murray'), ('MR', '')) |
||
1775 | self.assertEqual(double_metaphone('Muskett'), ('MSKT', '')) |
||
1776 | self.assertEqual(double_metaphone('Myers'), ('MRS', '')) |
||
1777 | self.assertEqual(double_metaphone('Myrick'), ('MRK', '')) |
||
1778 | self.assertEqual(double_metaphone('NORRIS'), ('NRS', '')) |
||
1779 | self.assertEqual(double_metaphone('Nayle'), ('NL', '')) |
||
1780 | self.assertEqual(double_metaphone('Newcomb'), ('NKMP', '')) |
||
1781 | self.assertEqual(double_metaphone('Newcomb(e)'), ('NKMP', '')) |
||
1782 | self.assertEqual(double_metaphone('Newkirk'), ('NKRK', '')) |
||
1783 | self.assertEqual(double_metaphone('Newton'), ('NTN', '')) |
||
1784 | self.assertEqual(double_metaphone('Niles'), ('NLS', '')) |
||
1785 | self.assertEqual(double_metaphone('Noble'), ('NPL', '')) |
||
1786 | self.assertEqual(double_metaphone('Noel'), ('NL', '')) |
||
1787 | self.assertEqual(double_metaphone('Northend'), ('NR0NT', 'NRTNT')) |
||
1788 | self.assertEqual(double_metaphone('Norton'), ('NRTN', '')) |
||
1789 | self.assertEqual(double_metaphone('Nutter'), ('NTR', '')) |
||
1790 | self.assertEqual(double_metaphone('Odding'), ('ATNK', '')) |
||
1791 | self.assertEqual(double_metaphone('Odenbaugh'), ('ATNP', '')) |
||
1792 | self.assertEqual(double_metaphone('Ogborn'), ('AKPRN', '')) |
||
1793 | self.assertEqual(double_metaphone('Oppenheimer'), ('APNMR', '')) |
||
1794 | self.assertEqual(double_metaphone('Otis'), ('ATS', '')) |
||
1795 | self.assertEqual(double_metaphone('Oviatt'), ('AFT', '')) |
||
1796 | self.assertEqual(double_metaphone('PRUST?'), ('PRST', '')) |
||
1797 | self.assertEqual(double_metaphone('Paddock'), ('PTK', '')) |
||
1798 | self.assertEqual(double_metaphone('Page'), ('PJ', 'PK')) |
||
1799 | self.assertEqual(double_metaphone('Paine'), ('PN', '')) |
||
1800 | self.assertEqual(double_metaphone('Paist'), ('PST', '')) |
||
1801 | self.assertEqual(double_metaphone('Palmer'), ('PLMR', '')) |
||
1802 | self.assertEqual(double_metaphone('Park'), ('PRK', '')) |
||
1803 | self.assertEqual(double_metaphone('Parker'), ('PRKR', '')) |
||
1804 | self.assertEqual(double_metaphone('Parkhurst'), ('PRKRST', '')) |
||
1805 | self.assertEqual(double_metaphone('Parrat'), ('PRT', '')) |
||
1806 | self.assertEqual(double_metaphone('Parsons'), ('PRSNS', '')) |
||
1807 | self.assertEqual(double_metaphone('Partridge'), ('PRTRJ', '')) |
||
1808 | self.assertEqual(double_metaphone('Pashley'), ('PXL', '')) |
||
1809 | self.assertEqual(double_metaphone('Pasley'), ('PSL', '')) |
||
1810 | self.assertEqual(double_metaphone('Patrick'), ('PTRK', '')) |
||
1811 | self.assertEqual(double_metaphone('Pattee'), ('PT', '')) |
||
1812 | self.assertEqual(double_metaphone('Patten'), ('PTN', '')) |
||
1813 | self.assertEqual(double_metaphone('Pawley'), ('PL', '')) |
||
1814 | self.assertEqual(double_metaphone('Payne'), ('PN', '')) |
||
1815 | self.assertEqual(double_metaphone('Peabody'), ('PPT', '')) |
||
1816 | self.assertEqual(double_metaphone('Peake'), ('PK', '')) |
||
1817 | self.assertEqual(double_metaphone('Pearson'), ('PRSN', '')) |
||
1818 | self.assertEqual(double_metaphone('Peat'), ('PT', '')) |
||
1819 | self.assertEqual(double_metaphone('Pedersen'), ('PTRSN', '')) |
||
1820 | self.assertEqual(double_metaphone('Percy'), ('PRS', '')) |
||
1821 | self.assertEqual(double_metaphone('Perkins'), ('PRKNS', '')) |
||
1822 | self.assertEqual(double_metaphone('Perrine'), ('PRN', '')) |
||
1823 | self.assertEqual(double_metaphone('Perry'), ('PR', '')) |
||
1824 | self.assertEqual(double_metaphone('Peson'), ('PSN', '')) |
||
1825 | self.assertEqual(double_metaphone('Peterson'), ('PTRSN', '')) |
||
1826 | self.assertEqual(double_metaphone('Peyton'), ('PTN', '')) |
||
1827 | self.assertEqual(double_metaphone('Phinney'), ('FN', '')) |
||
1828 | self.assertEqual(double_metaphone('Pickard'), ('PKRT', '')) |
||
1829 | self.assertEqual(double_metaphone('Pierce'), ('PRS', '')) |
||
1830 | self.assertEqual(double_metaphone('Pierrepont'), ('PRPNT', '')) |
||
1831 | self.assertEqual(double_metaphone('Pike'), ('PK', '')) |
||
1832 | self.assertEqual(double_metaphone('Pinkham'), ('PNKM', '')) |
||
1833 | self.assertEqual(double_metaphone('Pitman'), ('PTMN', '')) |
||
1834 | self.assertEqual(double_metaphone('Pitt'), ('PT', '')) |
||
1835 | self.assertEqual(double_metaphone('Pitts'), ('PTS', '')) |
||
1836 | self.assertEqual(double_metaphone('Plantagenet'), |
||
1837 | ('PLNTJNT', 'PLNTKNT')) |
||
1838 | self.assertEqual(double_metaphone('Platt'), ('PLT', '')) |
||
1839 | self.assertEqual(double_metaphone('Platts'), ('PLTS', '')) |
||
1840 | self.assertEqual(double_metaphone('Pleis'), ('PLS', '')) |
||
1841 | self.assertEqual(double_metaphone('Pleiss'), ('PLS', '')) |
||
1842 | self.assertEqual(double_metaphone('Plisko'), ('PLSK', '')) |
||
1843 | self.assertEqual(double_metaphone('Pliskovitch'), ('PLSKFX', '')) |
||
1844 | self.assertEqual(double_metaphone('Plum'), ('PLM', '')) |
||
1845 | self.assertEqual(double_metaphone('Plume'), ('PLM', '')) |
||
1846 | self.assertEqual(double_metaphone('Poitou'), ('PT', '')) |
||
1847 | self.assertEqual(double_metaphone('Pomeroy'), ('PMR', '')) |
||
1848 | self.assertEqual(double_metaphone('Poretiers'), ('PRTRS', '')) |
||
1849 | self.assertEqual(double_metaphone('Pote'), ('PT', '')) |
||
1850 | self.assertEqual(double_metaphone('Potter'), ('PTR', '')) |
||
1851 | self.assertEqual(double_metaphone('Potts'), ('PTS', '')) |
||
1852 | self.assertEqual(double_metaphone('Powell'), ('PL', '')) |
||
1853 | self.assertEqual(double_metaphone('Pratt'), ('PRT', '')) |
||
1854 | self.assertEqual(double_metaphone('Presbury'), ('PRSPR', '')) |
||
1855 | self.assertEqual(double_metaphone('Priest'), ('PRST', '')) |
||
1856 | self.assertEqual(double_metaphone('Prindle'), ('PRNTL', '')) |
||
1857 | self.assertEqual(double_metaphone('Prior'), ('PRR', '')) |
||
1858 | self.assertEqual(double_metaphone('Profumo'), ('PRFM', '')) |
||
1859 | self.assertEqual(double_metaphone('Purdy'), ('PRT', '')) |
||
1860 | self.assertEqual(double_metaphone('Purefoy'), ('PRF', '')) |
||
1861 | self.assertEqual(double_metaphone('Pury'), ('PR', '')) |
||
1862 | self.assertEqual(double_metaphone('Quinter'), ('KNTR', '')) |
||
1863 | self.assertEqual(double_metaphone('Rachel'), ('RXL', 'RKL')) |
||
1864 | self.assertEqual(double_metaphone('Rand'), ('RNT', '')) |
||
1865 | self.assertEqual(double_metaphone('Rankin'), ('RNKN', '')) |
||
1866 | self.assertEqual(double_metaphone('Ravenscroft'), ('RFNSKFT', '')) |
||
1867 | self.assertEqual(double_metaphone('Raynsford'), ('RNSFRT', '')) |
||
1868 | self.assertEqual(double_metaphone('Reakirt'), ('RKRT', '')) |
||
1869 | self.assertEqual(double_metaphone('Reaves'), ('RFS', '')) |
||
1870 | self.assertEqual(double_metaphone('Reeves'), ('RFS', '')) |
||
1871 | self.assertEqual(double_metaphone('Reichert'), ('RXRT', 'RKRT')) |
||
1872 | self.assertEqual(double_metaphone('Remmele'), ('RML', '')) |
||
1873 | self.assertEqual(double_metaphone('Reynolds'), ('RNLTS', '')) |
||
1874 | self.assertEqual(double_metaphone('Rhodes'), ('RTS', '')) |
||
1875 | self.assertEqual(double_metaphone('Richards'), ('RXRTS', 'RKRTS')) |
||
1876 | self.assertEqual(double_metaphone('Richardson'), ('RXRTSN', 'RKRTSN')) |
||
1877 | self.assertEqual(double_metaphone('Ring'), ('RNK', '')) |
||
1878 | self.assertEqual(double_metaphone('Roberts'), ('RPRTS', '')) |
||
1879 | self.assertEqual(double_metaphone('Robertson'), ('RPRTSN', '')) |
||
1880 | self.assertEqual(double_metaphone('Robson'), ('RPSN', '')) |
||
1881 | self.assertEqual(double_metaphone('Rodie'), ('RT', '')) |
||
1882 | self.assertEqual(double_metaphone('Rody'), ('RT', '')) |
||
1883 | self.assertEqual(double_metaphone('Rogers'), ('RKRS', 'RJRS')) |
||
1884 | self.assertEqual(double_metaphone('Ross'), ('RS', '')) |
||
1885 | self.assertEqual(double_metaphone('Rosslevin'), ('RSLFN', '')) |
||
1886 | self.assertEqual(double_metaphone('Rowland'), ('RLNT', '')) |
||
1887 | self.assertEqual(double_metaphone('Ruehl'), ('RL', '')) |
||
1888 | self.assertEqual(double_metaphone('Russell'), ('RSL', '')) |
||
1889 | self.assertEqual(double_metaphone('Ruth'), ('R0', 'RT')) |
||
1890 | self.assertEqual(double_metaphone('Ryan'), ('RN', '')) |
||
1891 | self.assertEqual(double_metaphone('Rysse'), ('RS', '')) |
||
1892 | self.assertEqual(double_metaphone('Sadler'), ('STLR', '')) |
||
1893 | self.assertEqual(double_metaphone('Salmon'), ('SLMN', '')) |
||
1894 | self.assertEqual(double_metaphone('Salter'), ('SLTR', '')) |
||
1895 | self.assertEqual(double_metaphone('Salvatore'), ('SLFTR', '')) |
||
1896 | self.assertEqual(double_metaphone('Sanders'), ('SNTRS', '')) |
||
1897 | self.assertEqual(double_metaphone('Sands'), ('SNTS', '')) |
||
1898 | self.assertEqual(double_metaphone('Sanford'), ('SNFRT', '')) |
||
1899 | self.assertEqual(double_metaphone('Sanger'), ('SNKR', 'SNJR')) |
||
1900 | self.assertEqual(double_metaphone('Sargent'), ('SRJNT', 'SRKNT')) |
||
1901 | self.assertEqual(double_metaphone('Saunders'), ('SNTRS', '')) |
||
1902 | self.assertEqual(double_metaphone('Schilling'), ('XLNK', '')) |
||
1903 | self.assertEqual(double_metaphone('Schlegel'), ('XLKL', 'SLKL')) |
||
1904 | self.assertEqual(double_metaphone('Scott'), ('SKT', '')) |
||
1905 | self.assertEqual(double_metaphone('Sears'), ('SRS', '')) |
||
1906 | self.assertEqual(double_metaphone('Segersall'), ('SJRSL', 'SKRSL')) |
||
1907 | self.assertEqual(double_metaphone('Senecal'), ('SNKL', '')) |
||
1908 | self.assertEqual(double_metaphone('Sergeaux'), ('SRJ', 'SRK')) |
||
1909 | self.assertEqual(double_metaphone('Severance'), ('SFRNS', '')) |
||
1910 | self.assertEqual(double_metaphone('Sharp'), ('XRP', '')) |
||
1911 | self.assertEqual(double_metaphone('Sharpe'), ('XRP', '')) |
||
1912 | self.assertEqual(double_metaphone('Sharply'), ('XRPL', '')) |
||
1913 | self.assertEqual(double_metaphone('Shatswell'), ('XTSL', '')) |
||
1914 | self.assertEqual(double_metaphone('Shattack'), ('XTK', '')) |
||
1915 | self.assertEqual(double_metaphone('Shattock'), ('XTK', '')) |
||
1916 | self.assertEqual(double_metaphone('Shattuck'), ('XTK', '')) |
||
1917 | self.assertEqual(double_metaphone('Shaw'), ('X', 'XF')) |
||
1918 | self.assertEqual(double_metaphone('Sheldon'), ('XLTN', '')) |
||
1919 | self.assertEqual(double_metaphone('Sherman'), ('XRMN', '')) |
||
1920 | self.assertEqual(double_metaphone('Shinn'), ('XN', '')) |
||
1921 | self.assertEqual(double_metaphone('Shirford'), ('XRFRT', '')) |
||
1922 | self.assertEqual(double_metaphone('Shirley'), ('XRL', '')) |
||
1923 | self.assertEqual(double_metaphone('Shively'), ('XFL', '')) |
||
1924 | self.assertEqual(double_metaphone('Shoemaker'), ('XMKR', '')) |
||
1925 | self.assertEqual(double_metaphone('Short'), ('XRT', '')) |
||
1926 | self.assertEqual(double_metaphone('Shotwell'), ('XTL', '')) |
||
1927 | self.assertEqual(double_metaphone('Shute'), ('XT', '')) |
||
1928 | self.assertEqual(double_metaphone('Sibley'), ('SPL', '')) |
||
1929 | self.assertEqual(double_metaphone('Silver'), ('SLFR', '')) |
||
1930 | self.assertEqual(double_metaphone('Simes'), ('SMS', '')) |
||
1931 | self.assertEqual(double_metaphone('Sinken'), ('SNKN', '')) |
||
1932 | self.assertEqual(double_metaphone('Sinn'), ('SN', '')) |
||
1933 | self.assertEqual(double_metaphone('Skelton'), ('SKLTN', '')) |
||
1934 | self.assertEqual(double_metaphone('Skiffe'), ('SKF', '')) |
||
1935 | self.assertEqual(double_metaphone('Skotkonung'), ('SKTKNNK', '')) |
||
1936 | self.assertEqual(double_metaphone('Slade'), ('SLT', 'XLT')) |
||
1937 | self.assertEqual(double_metaphone('Slye'), ('SL', 'XL')) |
||
1938 | self.assertEqual(double_metaphone('Smedley'), ('SMTL', 'XMTL')) |
||
1939 | self.assertEqual(double_metaphone('Smith'), ('SM0', 'XMT')) |
||
1940 | self.assertEqual(double_metaphone('Snow'), ('SN', 'XNF')) |
||
1941 | self.assertEqual(double_metaphone('Soole'), ('SL', '')) |
||
1942 | self.assertEqual(double_metaphone('Soule'), ('SL', '')) |
||
1943 | self.assertEqual(double_metaphone('Southworth'), ('S0R0', 'STRT')) |
||
1944 | self.assertEqual(double_metaphone('Sowles'), ('SLS', '')) |
||
1945 | self.assertEqual(double_metaphone('Spalding'), ('SPLTNK', '')) |
||
1946 | self.assertEqual(double_metaphone('Spark'), ('SPRK', '')) |
||
1947 | self.assertEqual(double_metaphone('Spencer'), ('SPNSR', '')) |
||
1948 | self.assertEqual(double_metaphone('Sperry'), ('SPR', '')) |
||
1949 | self.assertEqual(double_metaphone('Spofford'), ('SPFRT', '')) |
||
1950 | self.assertEqual(double_metaphone('Spooner'), ('SPNR', '')) |
||
1951 | self.assertEqual(double_metaphone('Sprague'), ('SPRK', '')) |
||
1952 | self.assertEqual(double_metaphone('Springer'), ('SPRNKR', 'SPRNJR')) |
||
1953 | self.assertEqual(double_metaphone('St. Clair'), ('STKLR', '')) |
||
1954 | self.assertEqual(double_metaphone('St. Claire'), ('STKLR', '')) |
||
1955 | self.assertEqual(double_metaphone('St. Leger'), ('STLJR', 'STLKR')) |
||
1956 | self.assertEqual(double_metaphone('St. Omer'), ('STMR', '')) |
||
1957 | self.assertEqual(double_metaphone('Stafferton'), ('STFRTN', '')) |
||
1958 | self.assertEqual(double_metaphone('Stafford'), ('STFRT', '')) |
||
1959 | self.assertEqual(double_metaphone('Stalham'), ('STLM', '')) |
||
1960 | self.assertEqual(double_metaphone('Stanford'), ('STNFRT', '')) |
||
1961 | self.assertEqual(double_metaphone('Stanton'), ('STNTN', '')) |
||
1962 | self.assertEqual(double_metaphone('Star'), ('STR', '')) |
||
1963 | self.assertEqual(double_metaphone('Starbuck'), ('STRPK', '')) |
||
1964 | self.assertEqual(double_metaphone('Starkey'), ('STRK', '')) |
||
1965 | self.assertEqual(double_metaphone('Starkweather'), |
||
1966 | ('STRK0R', 'STRKTR')) |
||
1967 | self.assertEqual(double_metaphone('Stearns'), ('STRNS', '')) |
||
1968 | self.assertEqual(double_metaphone('Stebbins'), ('STPNS', '')) |
||
1969 | self.assertEqual(double_metaphone('Steele'), ('STL', '')) |
||
1970 | self.assertEqual(double_metaphone('Stephenson'), ('STFNSN', '')) |
||
1971 | self.assertEqual(double_metaphone('Stevens'), ('STFNS', '')) |
||
1972 | self.assertEqual(double_metaphone('Stoddard'), ('STTRT', '')) |
||
1973 | self.assertEqual(double_metaphone('Stodder'), ('STTR', '')) |
||
1974 | self.assertEqual(double_metaphone('Stone'), ('STN', '')) |
||
1975 | self.assertEqual(double_metaphone('Storey'), ('STR', '')) |
||
1976 | self.assertEqual(double_metaphone('Storrada'), ('STRT', '')) |
||
1977 | self.assertEqual(double_metaphone('Story'), ('STR', '')) |
||
1978 | self.assertEqual(double_metaphone('Stoughton'), ('STFTN', '')) |
||
1979 | self.assertEqual(double_metaphone('Stout'), ('STT', '')) |
||
1980 | self.assertEqual(double_metaphone('Stow'), ('ST', 'STF')) |
||
1981 | self.assertEqual(double_metaphone('Strong'), ('STRNK', '')) |
||
1982 | self.assertEqual(double_metaphone('Strutt'), ('STRT', '')) |
||
1983 | self.assertEqual(double_metaphone('Stryker'), ('STRKR', '')) |
||
1984 | self.assertEqual(double_metaphone('Stuckeley'), ('STKL', '')) |
||
1985 | self.assertEqual(double_metaphone('Sturges'), ('STRJS', 'STRKS')) |
||
1986 | self.assertEqual(double_metaphone('Sturgess'), ('STRJS', 'STRKS')) |
||
1987 | self.assertEqual(double_metaphone('Sturgis'), ('STRJS', 'STRKS')) |
||
1988 | self.assertEqual(double_metaphone('Suevain'), ('SFN', '')) |
||
1989 | self.assertEqual(double_metaphone('Sulyard'), ('SLRT', '')) |
||
1990 | self.assertEqual(double_metaphone('Sutton'), ('STN', '')) |
||
1991 | self.assertEqual(double_metaphone('Swain'), ('SN', 'XN')) |
||
1992 | self.assertEqual(double_metaphone('Swayne'), ('SN', 'XN')) |
||
1993 | self.assertEqual(double_metaphone('Swayze'), ('SS', 'XTS')) |
||
1994 | self.assertEqual(double_metaphone('Swift'), ('SFT', 'XFT')) |
||
1995 | self.assertEqual(double_metaphone('Taber'), ('TPR', '')) |
||
1996 | self.assertEqual(double_metaphone('Talcott'), ('TLKT', '')) |
||
1997 | self.assertEqual(double_metaphone('Tarne'), ('TRN', '')) |
||
1998 | self.assertEqual(double_metaphone('Tatum'), ('TTM', '')) |
||
1999 | self.assertEqual(double_metaphone('Taverner'), ('TFRNR', '')) |
||
2000 | self.assertEqual(double_metaphone('Taylor'), ('TLR', '')) |
||
2001 | self.assertEqual(double_metaphone('Tenney'), ('TN', '')) |
||
2002 | self.assertEqual(double_metaphone('Thayer'), ('0R', 'TR')) |
||
2003 | self.assertEqual(double_metaphone('Thember'), ('0MPR', 'TMPR')) |
||
2004 | self.assertEqual(double_metaphone('Thomas'), ('TMS', '')) |
||
2005 | self.assertEqual(double_metaphone('Thompson'), ('TMPSN', '')) |
||
2006 | self.assertEqual(double_metaphone('Thorne'), ('0RN', 'TRN')) |
||
2007 | self.assertEqual(double_metaphone('Thornycraft'), |
||
2008 | ('0RNKRFT', 'TRNKRFT')) |
||
2009 | self.assertEqual(double_metaphone('Threlkeld'), ('0RLKLT', 'TRLKLT')) |
||
2010 | self.assertEqual(double_metaphone('Throckmorton'), |
||
2011 | ('0RKMRTN', 'TRKMRTN')) |
||
2012 | self.assertEqual(double_metaphone('Thwaits'), ('0TS', 'TTS')) |
||
2013 | self.assertEqual(double_metaphone('Tibbetts'), ('TPTS', '')) |
||
2014 | self.assertEqual(double_metaphone('Tidd'), ('TT', '')) |
||
2015 | self.assertEqual(double_metaphone('Tierney'), ('TRN', '')) |
||
2016 | self.assertEqual(double_metaphone('Tilley'), ('TL', '')) |
||
2017 | self.assertEqual(double_metaphone('Tillieres'), ('TLRS', '')) |
||
2018 | self.assertEqual(double_metaphone('Tilly'), ('TL', '')) |
||
2019 | self.assertEqual(double_metaphone('Tisdale'), ('TSTL', '')) |
||
2020 | self.assertEqual(double_metaphone('Titus'), ('TTS', '')) |
||
2021 | self.assertEqual(double_metaphone('Tobey'), ('TP', '')) |
||
2022 | self.assertEqual(double_metaphone('Tooker'), ('TKR', '')) |
||
2023 | self.assertEqual(double_metaphone('Towle'), ('TL', '')) |
||
2024 | self.assertEqual(double_metaphone('Towne'), ('TN', '')) |
||
2025 | self.assertEqual(double_metaphone('Townsend'), ('TNSNT', '')) |
||
2026 | self.assertEqual(double_metaphone('Treadway'), ('TRT', '')) |
||
2027 | self.assertEqual(double_metaphone('Trelawney'), ('TRLN', '')) |
||
2028 | self.assertEqual(double_metaphone('Trinder'), ('TRNTR', '')) |
||
2029 | self.assertEqual(double_metaphone('Tripp'), ('TRP', '')) |
||
2030 | self.assertEqual(double_metaphone('Trippe'), ('TRP', '')) |
||
2031 | self.assertEqual(double_metaphone('Trott'), ('TRT', '')) |
||
2032 | self.assertEqual(double_metaphone('True'), ('TR', '')) |
||
2033 | self.assertEqual(double_metaphone('Trussebut'), ('TRSPT', '')) |
||
2034 | self.assertEqual(double_metaphone('Tucker'), ('TKR', '')) |
||
2035 | self.assertEqual(double_metaphone('Turgeon'), ('TRJN', 'TRKN')) |
||
2036 | self.assertEqual(double_metaphone('Turner'), ('TRNR', '')) |
||
2037 | self.assertEqual(double_metaphone('Tuttle'), ('TTL', '')) |
||
2038 | self.assertEqual(double_metaphone('Tyler'), ('TLR', '')) |
||
2039 | self.assertEqual(double_metaphone('Tylle'), ('TL', '')) |
||
2040 | self.assertEqual(double_metaphone('Tyrrel'), ('TRL', '')) |
||
2041 | self.assertEqual(double_metaphone('Ua Tuathail'), ('AT0L', 'ATTL')) |
||
2042 | self.assertEqual(double_metaphone('Ulrich'), ('ALRX', 'ALRK')) |
||
2043 | self.assertEqual(double_metaphone('Underhill'), ('ANTRL', '')) |
||
2044 | self.assertEqual(double_metaphone('Underwood'), ('ANTRT', '')) |
||
2045 | self.assertEqual(double_metaphone('Unknown'), ('ANKNN', '')) |
||
2046 | self.assertEqual(double_metaphone('Valentine'), ('FLNTN', '')) |
||
2047 | self.assertEqual(double_metaphone('Van Egmond'), ('FNKMNT', '')) |
||
2048 | self.assertEqual(double_metaphone('Van der Beek'), ('FNTRPK', '')) |
||
2049 | self.assertEqual(double_metaphone('Vaughan'), ('FKN', '')) |
||
2050 | self.assertEqual(double_metaphone('Vermenlen'), ('FRMNLN', '')) |
||
2051 | self.assertEqual(double_metaphone('Vincent'), ('FNSNT', '')) |
||
2052 | self.assertEqual(double_metaphone('Volentine'), ('FLNTN', '')) |
||
2053 | self.assertEqual(double_metaphone('Wagner'), ('AKNR', 'FKNR')) |
||
2054 | self.assertEqual(double_metaphone('Waite'), ('AT', 'FT')) |
||
2055 | self.assertEqual(double_metaphone('Walker'), ('ALKR', 'FLKR')) |
||
2056 | self.assertEqual(double_metaphone('Walter'), ('ALTR', 'FLTR')) |
||
2057 | self.assertEqual(double_metaphone('Wandell'), ('ANTL', 'FNTL')) |
||
2058 | self.assertEqual(double_metaphone('Wandesford'), |
||
2059 | ('ANTSFRT', 'FNTSFRT')) |
||
2060 | self.assertEqual(double_metaphone('Warbleton'), ('ARPLTN', 'FRPLTN')) |
||
2061 | self.assertEqual(double_metaphone('Ward'), ('ART', 'FRT')) |
||
2062 | self.assertEqual(double_metaphone('Warde'), ('ART', 'FRT')) |
||
2063 | self.assertEqual(double_metaphone('Ware'), ('AR', 'FR')) |
||
2064 | self.assertEqual(double_metaphone('Wareham'), ('ARHM', 'FRHM')) |
||
2065 | self.assertEqual(double_metaphone('Warner'), ('ARNR', 'FRNR')) |
||
2066 | self.assertEqual(double_metaphone('Warren'), ('ARN', 'FRN')) |
||
2067 | self.assertEqual(double_metaphone('Washburne'), ('AXPRN', 'FXPRN')) |
||
2068 | self.assertEqual(double_metaphone('Waterbury'), ('ATRPR', 'FTRPR')) |
||
2069 | self.assertEqual(double_metaphone('Watson'), ('ATSN', 'FTSN')) |
||
2070 | self.assertEqual(double_metaphone('WatsonEllithorpe'), |
||
2071 | ('ATSNL0RP', 'FTSNLTRP')) |
||
2072 | self.assertEqual(double_metaphone('Watts'), ('ATS', 'FTS')) |
||
2073 | self.assertEqual(double_metaphone('Wayne'), ('AN', 'FN')) |
||
2074 | self.assertEqual(double_metaphone('Webb'), ('AP', 'FP')) |
||
2075 | self.assertEqual(double_metaphone('Weber'), ('APR', 'FPR')) |
||
2076 | self.assertEqual(double_metaphone('Webster'), ('APSTR', 'FPSTR')) |
||
2077 | self.assertEqual(double_metaphone('Weed'), ('AT', 'FT')) |
||
2078 | self.assertEqual(double_metaphone('Weeks'), ('AKS', 'FKS')) |
||
2079 | self.assertEqual(double_metaphone('Wells'), ('ALS', 'FLS')) |
||
2080 | self.assertEqual(double_metaphone('Wenzell'), ('ANSL', 'FNTSL')) |
||
2081 | self.assertEqual(double_metaphone('West'), ('AST', 'FST')) |
||
2082 | self.assertEqual(double_metaphone('Westbury'), ('ASTPR', 'FSTPR')) |
||
2083 | self.assertEqual(double_metaphone('Whatlocke'), ('ATLK', '')) |
||
2084 | self.assertEqual(double_metaphone('Wheeler'), ('ALR', '')) |
||
2085 | self.assertEqual(double_metaphone('Whiston'), ('ASTN', '')) |
||
2086 | self.assertEqual(double_metaphone('White'), ('AT', '')) |
||
2087 | self.assertEqual(double_metaphone('Whitman'), ('ATMN', '')) |
||
2088 | self.assertEqual(double_metaphone('Whiton'), ('ATN', '')) |
||
2089 | self.assertEqual(double_metaphone('Whitson'), ('ATSN', '')) |
||
2090 | self.assertEqual(double_metaphone('Wickes'), ('AKS', 'FKS')) |
||
2091 | self.assertEqual(double_metaphone('Wilbur'), ('ALPR', 'FLPR')) |
||
2092 | self.assertEqual(double_metaphone('Wilcotes'), ('ALKTS', 'FLKTS')) |
||
2093 | self.assertEqual(double_metaphone('Wilkinson'), ('ALKNSN', 'FLKNSN')) |
||
2094 | self.assertEqual(double_metaphone('Willets'), ('ALTS', 'FLTS')) |
||
2095 | self.assertEqual(double_metaphone('Willett'), ('ALT', 'FLT')) |
||
2096 | self.assertEqual(double_metaphone('Willey'), ('AL', 'FL')) |
||
2097 | self.assertEqual(double_metaphone('Williams'), ('ALMS', 'FLMS')) |
||
2098 | self.assertEqual(double_metaphone('Williston'), ('ALSTN', 'FLSTN')) |
||
2099 | self.assertEqual(double_metaphone('Wilson'), ('ALSN', 'FLSN')) |
||
2100 | self.assertEqual(double_metaphone('Wimes'), ('AMS', 'FMS')) |
||
2101 | self.assertEqual(double_metaphone('Winch'), ('ANX', 'FNK')) |
||
2102 | self.assertEqual(double_metaphone('Winegar'), ('ANKR', 'FNKR')) |
||
2103 | self.assertEqual(double_metaphone('Wing'), ('ANK', 'FNK')) |
||
2104 | self.assertEqual(double_metaphone('Winsley'), ('ANSL', 'FNSL')) |
||
2105 | self.assertEqual(double_metaphone('Winslow'), ('ANSL', 'FNSLF')) |
||
2106 | self.assertEqual(double_metaphone('Winthrop'), ('AN0RP', 'FNTRP')) |
||
2107 | self.assertEqual(double_metaphone('Wise'), ('AS', 'FS')) |
||
2108 | self.assertEqual(double_metaphone('Wood'), ('AT', 'FT')) |
||
2109 | self.assertEqual(double_metaphone('Woodbridge'), ('ATPRJ', 'FTPRJ')) |
||
2110 | self.assertEqual(double_metaphone('Woodward'), ('ATRT', 'FTRT')) |
||
2111 | self.assertEqual(double_metaphone('Wooley'), ('AL', 'FL')) |
||
2112 | self.assertEqual(double_metaphone('Woolley'), ('AL', 'FL')) |
||
2113 | self.assertEqual(double_metaphone('Worth'), ('AR0', 'FRT')) |
||
2114 | self.assertEqual(double_metaphone('Worthen'), ('AR0N', 'FRTN')) |
||
2115 | self.assertEqual(double_metaphone('Worthley'), ('AR0L', 'FRTL')) |
||
2116 | self.assertEqual(double_metaphone('Wright'), ('RT', '')) |
||
2117 | self.assertEqual(double_metaphone('Wyer'), ('AR', 'FR')) |
||
2118 | self.assertEqual(double_metaphone('Wyere'), ('AR', 'FR')) |
||
2119 | self.assertEqual(double_metaphone('Wynkoop'), ('ANKP', 'FNKP')) |
||
2120 | self.assertEqual(double_metaphone('Yarnall'), ('ARNL', '')) |
||
2121 | self.assertEqual(double_metaphone('Yeoman'), ('AMN', '')) |
||
2122 | self.assertEqual(double_metaphone('Yorke'), ('ARK', '')) |
||
2123 | self.assertEqual(double_metaphone('Young'), ('ANK', '')) |
||
2124 | self.assertEqual(double_metaphone('ab Wennonwen'), ('APNNN', '')) |
||
2125 | self.assertEqual(double_metaphone('ap Llewellyn'), ('APLLN', '')) |
||
2126 | self.assertEqual(double_metaphone('ap Lorwerth'), ('APLRR0', 'APLRRT')) |
||
2127 | self.assertEqual(double_metaphone('d\'Angouleme'), ('TNKLM', '')) |
||
2128 | self.assertEqual(double_metaphone('de Audeham'), ('TTHM', '')) |
||
2129 | self.assertEqual(double_metaphone('de Bavant'), ('TPFNT', '')) |
||
2130 | self.assertEqual(double_metaphone('de Beauchamp'), ('TPXMP', 'TPKMP')) |
||
2131 | self.assertEqual(double_metaphone('de Beaumont'), ('TPMNT', '')) |
||
2132 | self.assertEqual(double_metaphone('de Bolbec'), ('TPLPK', '')) |
||
2133 | self.assertEqual(double_metaphone('de Braiose'), ('TPRS', '')) |
||
2134 | self.assertEqual(double_metaphone('de Braose'), ('TPRS', '')) |
||
2135 | self.assertEqual(double_metaphone('de Briwere'), ('TPRR', '')) |
||
2136 | self.assertEqual(double_metaphone('de Cantelou'), ('TKNTL', '')) |
||
2137 | self.assertEqual(double_metaphone('de Cherelton'), |
||
2138 | ('TXRLTN', 'TKRLTN')) |
||
2139 | self.assertEqual(double_metaphone('de Cherleton'), |
||
2140 | ('TXRLTN', 'TKRLTN')) |
||
2141 | self.assertEqual(double_metaphone('de Clare'), ('TKLR', '')) |
||
2142 | self.assertEqual(double_metaphone('de Claremont'), ('TKLRMNT', '')) |
||
2143 | self.assertEqual(double_metaphone('de Clifford'), ('TKLFRT', '')) |
||
2144 | self.assertEqual(double_metaphone('de Colville'), ('TKLFL', '')) |
||
2145 | self.assertEqual(double_metaphone('de Courtenay'), ('TKRTN', '')) |
||
2146 | self.assertEqual(double_metaphone('de Fauconberg'), ('TFKNPRK', '')) |
||
2147 | self.assertEqual(double_metaphone('de Forest'), ('TFRST', '')) |
||
2148 | self.assertEqual(double_metaphone('de Gai'), ('TK', '')) |
||
2149 | self.assertEqual(double_metaphone('de Grey'), ('TKR', '')) |
||
2150 | self.assertEqual(double_metaphone('de Guernons'), ('TKRNNS', '')) |
||
2151 | self.assertEqual(double_metaphone('de Haia'), ('T', '')) |
||
2152 | self.assertEqual(double_metaphone('de Harcourt'), ('TRKRT', '')) |
||
2153 | self.assertEqual(double_metaphone('de Hastings'), ('TSTNKS', '')) |
||
2154 | self.assertEqual(double_metaphone('de Hoke'), ('TK', '')) |
||
2155 | self.assertEqual(double_metaphone('de Hooch'), ('TK', '')) |
||
2156 | self.assertEqual(double_metaphone('de Hugelville'), ('TJLFL', 'TKLFL')) |
||
2157 | self.assertEqual(double_metaphone('de Huntingdon'), ('TNTNKTN', '')) |
||
2158 | self.assertEqual(double_metaphone('de Insula'), ('TNSL', '')) |
||
2159 | self.assertEqual(double_metaphone('de Keynes'), ('TKNS', '')) |
||
2160 | self.assertEqual(double_metaphone('de Lacy'), ('TLS', '')) |
||
2161 | self.assertEqual(double_metaphone('de Lexington'), ('TLKSNKTN', '')) |
||
2162 | self.assertEqual(double_metaphone('de Lusignan'), ('TLSNN', 'TLSKNN')) |
||
2163 | self.assertEqual(double_metaphone('de Manvers'), ('TMNFRS', '')) |
||
2164 | self.assertEqual(double_metaphone('de Montagu'), ('TMNTK', '')) |
||
2165 | self.assertEqual(double_metaphone('de Montault'), ('TMNTLT', '')) |
||
2166 | self.assertEqual(double_metaphone('de Montfort'), ('TMNTFRT', '')) |
||
2167 | self.assertEqual(double_metaphone('de Mortimer'), ('TMRTMR', '')) |
||
2168 | self.assertEqual(double_metaphone('de Morville'), ('TMRFL', '')) |
||
2169 | self.assertEqual(double_metaphone('de Morvois'), ('TMRF', 'TMRFS')) |
||
2170 | self.assertEqual(double_metaphone('de Neufmarche'), |
||
2171 | ('TNFMRX', 'TNFMRK')) |
||
2172 | self.assertEqual(double_metaphone('de Odingsells'), ('TTNKSLS', '')) |
||
2173 | self.assertEqual(double_metaphone('de Odyngsells'), ('TTNKSLS', '')) |
||
2174 | self.assertEqual(double_metaphone('de Percy'), ('TPRS', '')) |
||
2175 | self.assertEqual(double_metaphone('de Pierrepont'), ('TPRPNT', '')) |
||
2176 | self.assertEqual(double_metaphone('de Plessetis'), ('TPLSTS', '')) |
||
2177 | self.assertEqual(double_metaphone('de Porhoet'), ('TPRT', '')) |
||
2178 | self.assertEqual(double_metaphone('de Prouz'), ('TPRS', '')) |
||
2179 | self.assertEqual(double_metaphone('de Quincy'), ('TKNS', '')) |
||
2180 | self.assertEqual(double_metaphone('de Ripellis'), ('TRPLS', '')) |
||
2181 | self.assertEqual(double_metaphone('de Ros'), ('TRS', '')) |
||
2182 | self.assertEqual(double_metaphone('de Salisbury'), ('TSLSPR', '')) |
||
2183 | self.assertEqual(double_metaphone('de Sanford'), ('TSNFRT', '')) |
||
2184 | self.assertEqual(double_metaphone('de Somery'), ('TSMR', '')) |
||
2185 | self.assertEqual(double_metaphone('de St. Hilary'), ('TSTLR', '')) |
||
2186 | self.assertEqual(double_metaphone('de St. Liz'), ('TSTLS', '')) |
||
2187 | self.assertEqual(double_metaphone('de Sutton'), ('TSTN', '')) |
||
2188 | self.assertEqual(double_metaphone('de Toeni'), ('TTN', '')) |
||
2189 | self.assertEqual(double_metaphone('de Tony'), ('TTN', '')) |
||
2190 | self.assertEqual(double_metaphone('de Umfreville'), ('TMFRFL', '')) |
||
2191 | self.assertEqual(double_metaphone('de Valognes'), ('TFLNS', 'TFLKNS')) |
||
2192 | self.assertEqual(double_metaphone('de Vaux'), ('TF', '')) |
||
2193 | self.assertEqual(double_metaphone('de Vere'), ('TFR', '')) |
||
2194 | self.assertEqual(double_metaphone('de Vermandois'), |
||
2195 | ('TFRMNT', 'TFRMNTS')) |
||
2196 | self.assertEqual(double_metaphone('de Vernon'), ('TFRNN', '')) |
||
2197 | self.assertEqual(double_metaphone('de Vexin'), ('TFKSN', '')) |
||
2198 | self.assertEqual(double_metaphone('de Vitre'), ('TFTR', '')) |
||
2199 | self.assertEqual(double_metaphone('de Wandesford'), ('TNTSFRT', '')) |
||
2200 | self.assertEqual(double_metaphone('de Warenne'), ('TRN', '')) |
||
2201 | self.assertEqual(double_metaphone('de Westbury'), ('TSTPR', '')) |
||
2202 | self.assertEqual(double_metaphone('di Saluzzo'), ('TSLS', 'TSLTS')) |
||
2203 | self.assertEqual(double_metaphone('fitz Alan'), ('FTSLN', '')) |
||
2204 | self.assertEqual(double_metaphone('fitz Geoffrey'), |
||
2205 | ('FTSJFR', 'FTSKFR')) |
||
2206 | self.assertEqual(double_metaphone('fitz Herbert'), ('FTSRPRT', '')) |
||
2207 | self.assertEqual(double_metaphone('fitz John'), ('FTSJN', '')) |
||
2208 | self.assertEqual(double_metaphone('fitz Patrick'), ('FTSPTRK', '')) |
||
2209 | self.assertEqual(double_metaphone('fitz Payn'), ('FTSPN', '')) |
||
2210 | self.assertEqual(double_metaphone('fitz Piers'), ('FTSPRS', '')) |
||
2211 | self.assertEqual(double_metaphone('fitz Randolph'), ('FTSRNTLF', '')) |
||
2212 | self.assertEqual(double_metaphone('fitz Richard'), |
||
2213 | ('FTSRXRT', 'FTSRKRT')) |
||
2214 | self.assertEqual(double_metaphone('fitz Robert'), ('FTSRPRT', '')) |
||
2215 | self.assertEqual(double_metaphone('fitz Roy'), ('FTSR', '')) |
||
2216 | self.assertEqual(double_metaphone('fitz Scrob'), ('FTSSKP', '')) |
||
2217 | self.assertEqual(double_metaphone('fitz Walter'), ('FTSLTR', '')) |
||
2218 | self.assertEqual(double_metaphone('fitz Warin'), ('FTSRN', '')) |
||
2219 | self.assertEqual(double_metaphone('fitz Williams'), ('FTSLMS', '')) |
||
2220 | self.assertEqual(double_metaphone('la Zouche'), ('LSX', 'LSK')) |
||
2221 | self.assertEqual(double_metaphone('le Botiller'), ('LPTLR', '')) |
||
2222 | self.assertEqual(double_metaphone('le Despenser'), ('LTSPNSR', '')) |
||
2223 | self.assertEqual(double_metaphone('le deSpencer'), ('LTSPNSR', '')) |
||
2224 | self.assertEqual(double_metaphone('of Allendale'), ('AFLNTL', '')) |
||
2225 | self.assertEqual(double_metaphone('of Angouleme'), ('AFNKLM', '')) |
||
2226 | self.assertEqual(double_metaphone('of Anjou'), ('AFNJ', '')) |
||
2227 | self.assertEqual(double_metaphone('of Aquitaine'), ('AFKTN', '')) |
||
2228 | self.assertEqual(double_metaphone('of Aumale'), ('AFML', '')) |
||
2229 | self.assertEqual(double_metaphone('of Bavaria'), ('AFPFR', '')) |
||
2230 | self.assertEqual(double_metaphone('of Boulogne'), ('AFPLN', 'AFPLKN')) |
||
2231 | self.assertEqual(double_metaphone('of Brittany'), ('AFPRTN', '')) |
||
2232 | self.assertEqual(double_metaphone('of Brittary'), ('AFPRTR', '')) |
||
2233 | self.assertEqual(double_metaphone('of Castile'), ('AFKSTL', '')) |
||
2234 | self.assertEqual(double_metaphone('of Chester'), ('AFXSTR', 'AFKSTR')) |
||
2235 | self.assertEqual(double_metaphone('of Clermont'), ('AFKLRMNT', '')) |
||
2236 | self.assertEqual(double_metaphone('of Cologne'), ('AFKLN', 'AFKLKN')) |
||
2237 | self.assertEqual(double_metaphone('of Dinan'), ('AFTNN', '')) |
||
2238 | self.assertEqual(double_metaphone('of Dunbar'), ('AFTNPR', '')) |
||
2239 | self.assertEqual(double_metaphone('of England'), ('AFNKLNT', '')) |
||
2240 | self.assertEqual(double_metaphone('of Essex'), ('AFSKS', '')) |
||
2241 | self.assertEqual(double_metaphone('of Falaise'), ('AFFLS', '')) |
||
2242 | self.assertEqual(double_metaphone('of Flanders'), ('AFFLNTRS', '')) |
||
2243 | self.assertEqual(double_metaphone('of Galloway'), ('AFKL', '')) |
||
2244 | self.assertEqual(double_metaphone('of Germany'), ('AFKRMN', 'AFJRMN')) |
||
2245 | self.assertEqual(double_metaphone('of Gloucester'), ('AFKLSSTR', '')) |
||
2246 | self.assertEqual(double_metaphone('of Heristal'), ('AFRSTL', '')) |
||
2247 | self.assertEqual(double_metaphone('of Hungary'), ('AFNKR', '')) |
||
2248 | self.assertEqual(double_metaphone('of Huntington'), ('AFNTNKTN', '')) |
||
2249 | self.assertEqual(double_metaphone('of Kiev'), ('AFKF', '')) |
||
2250 | self.assertEqual(double_metaphone('of Kuno'), ('AFKN', '')) |
||
2251 | self.assertEqual(double_metaphone('of Landen'), ('AFLNTN', '')) |
||
2252 | self.assertEqual(double_metaphone('of Laon'), ('AFLN', '')) |
||
2253 | self.assertEqual(double_metaphone('of Leinster'), ('AFLNSTR', '')) |
||
2254 | self.assertEqual(double_metaphone('of Lens'), ('AFLNS', '')) |
||
2255 | self.assertEqual(double_metaphone('of Lorraine'), ('AFLRN', '')) |
||
2256 | self.assertEqual(double_metaphone('of Louvain'), ('AFLFN', '')) |
||
2257 | self.assertEqual(double_metaphone('of Mercia'), ('AFMRS', 'AFMRX')) |
||
2258 | self.assertEqual(double_metaphone('of Metz'), ('AFMTS', '')) |
||
2259 | self.assertEqual(double_metaphone('of Meulan'), ('AFMLN', '')) |
||
2260 | self.assertEqual(double_metaphone('of Nass'), ('AFNS', '')) |
||
2261 | self.assertEqual(double_metaphone('of Normandy'), ('AFNRMNT', '')) |
||
2262 | self.assertEqual(double_metaphone('of Ohningen'), ('AFNNJN', 'AFNNKN')) |
||
2263 | self.assertEqual(double_metaphone('of Orleans'), ('AFRLNS', '')) |
||
2264 | self.assertEqual(double_metaphone('of Poitou'), ('AFPT', '')) |
||
2265 | self.assertEqual(double_metaphone('of Polotzk'), ('AFPLTSK', '')) |
||
2266 | self.assertEqual(double_metaphone('of Provence'), ('AFPRFNS', '')) |
||
2267 | self.assertEqual(double_metaphone('of Ringelheim'), |
||
2268 | ('AFRNJLM', 'AFRNKLM')) |
||
2269 | self.assertEqual(double_metaphone('of Salisbury'), ('AFSLSPR', '')) |
||
2270 | self.assertEqual(double_metaphone('of Saxony'), ('AFSKSN', '')) |
||
2271 | self.assertEqual(double_metaphone('of Scotland'), ('AFSKTLNT', '')) |
||
2272 | self.assertEqual(double_metaphone('of Senlis'), ('AFSNLS', '')) |
||
2273 | self.assertEqual(double_metaphone('of Stafford'), ('AFSTFRT', '')) |
||
2274 | self.assertEqual(double_metaphone('of Swabia'), ('AFSP', '')) |
||
2275 | self.assertEqual(double_metaphone('of Tongres'), ('AFTNKRS', '')) |
||
2276 | self.assertEqual(double_metaphone('of the Tributes'), |
||
2277 | ('AF0TRPTS', 'AFTTRPTS')) |
||
2278 | self.assertEqual(double_metaphone('unknown'), ('ANKNN', '')) |
||
2279 | self.assertEqual(double_metaphone('van der Gouda'), ('FNTRKT', '')) |
||
2280 | self.assertEqual(double_metaphone('von Adenbaugh'), ('FNTNP', '')) |
||
2281 | self.assertEqual(double_metaphone('ARCHITure'), ('ARKTR', '')) |
||
2282 | self.assertEqual(double_metaphone('Arnoff'), ('ARNF', '')) |
||
2283 | self.assertEqual(double_metaphone('Arnow'), ('ARN', 'ARNF')) |
||
2284 | self.assertEqual(double_metaphone('DANGER'), ('TNJR', 'TNKR')) |
||
2285 | self.assertEqual(double_metaphone('Jankelowicz'), ('JNKLTS', 'ANKLFX')) |
||
2286 | self.assertEqual(double_metaphone('MANGER'), ('MNJR', 'MNKR')) |
||
2287 | self.assertEqual(double_metaphone('McClellan'), ('MKLLN', '')) |
||
2288 | self.assertEqual(double_metaphone('McHugh'), ('MK', '')) |
||
2289 | self.assertEqual(double_metaphone('McLaughlin'), ('MKLFLN', '')) |
||
2290 | self.assertEqual(double_metaphone('ORCHEStra'), ('ARKSTR', '')) |
||
2291 | self.assertEqual(double_metaphone('ORCHID'), ('ARKT', '')) |
||
2292 | self.assertEqual(double_metaphone('Pierce'), ('PRS', '')) |
||
2293 | self.assertEqual(double_metaphone('RANGER'), ('RNJR', 'RNKR')) |
||
2294 | self.assertEqual(double_metaphone('Schlesinger'), ('XLSNKR', 'SLSNJR')) |
||
2295 | self.assertEqual(double_metaphone('Uomo'), ('AM', '')) |
||
2296 | self.assertEqual(double_metaphone('Vasserman'), ('FSRMN', '')) |
||
2297 | self.assertEqual(double_metaphone('Wasserman'), ('ASRMN', 'FSRMN')) |
||
2298 | self.assertEqual(double_metaphone('Womo'), ('AM', 'FM')) |
||
2299 | self.assertEqual(double_metaphone('Yankelovich'), ('ANKLFX', 'ANKLFK')) |
||
2300 | self.assertEqual(double_metaphone('accede'), ('AKST', '')) |
||
2301 | self.assertEqual(double_metaphone('accident'), ('AKSTNT', '')) |
||
2302 | self.assertEqual(double_metaphone('adelsheim'), ('ATLSM', '')) |
||
2303 | self.assertEqual(double_metaphone('aged'), ('AJT', 'AKT')) |
||
2304 | self.assertEqual(double_metaphone('ageless'), ('AJLS', 'AKLS')) |
||
2305 | self.assertEqual(double_metaphone('agency'), ('AJNS', 'AKNS')) |
||
2306 | self.assertEqual(double_metaphone('aghast'), ('AKST', '')) |
||
2307 | self.assertEqual(double_metaphone('agio'), ('AJ', 'AK')) |
||
2308 | self.assertEqual(double_metaphone('agrimony'), ('AKRMN', '')) |
||
2309 | self.assertEqual(double_metaphone('album'), ('ALPM', '')) |
||
2310 | self.assertEqual(double_metaphone('alcmene'), ('ALKMN', '')) |
||
2311 | self.assertEqual(double_metaphone('alehouse'), ('ALHS', '')) |
||
2312 | self.assertEqual(double_metaphone('antique'), ('ANTK', '')) |
||
2313 | self.assertEqual(double_metaphone('artois'), ('ART', 'ARTS')) |
||
2314 | self.assertEqual(double_metaphone('automation'), ('ATMXN', '')) |
||
2315 | self.assertEqual(double_metaphone('bacchus'), ('PKS', '')) |
||
2316 | self.assertEqual(double_metaphone('bacci'), ('PX', '')) |
||
2317 | self.assertEqual(double_metaphone('bajador'), ('PJTR', 'PHTR')) |
||
2318 | self.assertEqual(double_metaphone('bellocchio'), ('PLX', '')) |
||
2319 | self.assertEqual(double_metaphone('bertucci'), ('PRTX', '')) |
||
2320 | self.assertEqual(double_metaphone('biaggi'), ('PJ', 'PK')) |
||
2321 | self.assertEqual(double_metaphone('bough'), ('P', '')) |
||
2322 | self.assertEqual(double_metaphone('breaux'), ('PR', '')) |
||
2323 | self.assertEqual(double_metaphone('broughton'), ('PRTN', '')) |
||
2324 | self.assertEqual(double_metaphone('cabrillo'), ('KPRL', 'KPR')) |
||
2325 | self.assertEqual(double_metaphone('caesar'), ('SSR', '')) |
||
2326 | self.assertEqual(double_metaphone('cagney'), ('KKN', '')) |
||
2327 | self.assertEqual(double_metaphone('campbell'), ('KMPL', '')) |
||
2328 | self.assertEqual(double_metaphone('carlisle'), ('KRLL', '')) |
||
2329 | self.assertEqual(double_metaphone('carlysle'), ('KRLL', '')) |
||
2330 | self.assertEqual(double_metaphone('chemistry'), ('KMSTR', '')) |
||
2331 | self.assertEqual(double_metaphone('chianti'), ('KNT', '')) |
||
2332 | self.assertEqual(double_metaphone('chorus'), ('KRS', '')) |
||
2333 | self.assertEqual(double_metaphone('cough'), ('KF', '')) |
||
2334 | self.assertEqual(double_metaphone('czerny'), ('SRN', 'XRN')) |
||
2335 | self.assertEqual(double_metaphone('deffenbacher'), ('TFNPKR', '')) |
||
2336 | self.assertEqual(double_metaphone('dumb'), ('TM', '')) |
||
2337 | self.assertEqual(double_metaphone('edgar'), ('ATKR', '')) |
||
2338 | self.assertEqual(double_metaphone('edge'), ('AJ', '')) |
||
2339 | self.assertEqual(double_metaphone('filipowicz'), ('FLPTS', 'FLPFX')) |
||
2340 | self.assertEqual(double_metaphone('focaccia'), ('FKX', '')) |
||
2341 | self.assertEqual(double_metaphone('gallegos'), ('KLKS', 'KKS')) |
||
2342 | self.assertEqual(double_metaphone('gambrelli'), ('KMPRL', '')) |
||
2343 | self.assertEqual(double_metaphone('geithain'), ('K0N', 'JTN')) |
||
2344 | self.assertEqual(double_metaphone('ghiradelli'), ('JRTL', '')) |
||
2345 | self.assertEqual(double_metaphone('ghislane'), ('JLN', '')) |
||
2346 | self.assertEqual(double_metaphone('gough'), ('KF', '')) |
||
2347 | self.assertEqual(double_metaphone('hartheim'), ('HR0M', 'HRTM')) |
||
2348 | self.assertEqual(double_metaphone('heimsheim'), ('HMSM', '')) |
||
2349 | self.assertEqual(double_metaphone('hochmeier'), ('HKMR', '')) |
||
2350 | self.assertEqual(double_metaphone('hugh'), ('H', '')) |
||
2351 | self.assertEqual(double_metaphone('hunger'), ('HNKR', 'HNJR')) |
||
2352 | self.assertEqual(double_metaphone('hungry'), ('HNKR', '')) |
||
2353 | self.assertEqual(double_metaphone('island'), ('ALNT', '')) |
||
2354 | self.assertEqual(double_metaphone('isle'), ('AL', '')) |
||
2355 | self.assertEqual(double_metaphone('jose'), ('HS', '')) |
||
2356 | self.assertEqual(double_metaphone('laugh'), ('LF', '')) |
||
2357 | self.assertEqual(double_metaphone('mac caffrey'), ('MKFR', '')) |
||
2358 | self.assertEqual(double_metaphone('mac gregor'), ('MKRKR', '')) |
||
2359 | self.assertEqual(double_metaphone('pegnitz'), ('PNTS', 'PKNTS')) |
||
2360 | self.assertEqual(double_metaphone('piskowitz'), ('PSKTS', 'PSKFX')) |
||
2361 | self.assertEqual(double_metaphone('queen'), ('KN', '')) |
||
2362 | self.assertEqual(double_metaphone('raspberry'), ('RSPR', '')) |
||
2363 | self.assertEqual(double_metaphone('resnais'), ('RSN', 'RSNS')) |
||
2364 | self.assertEqual(double_metaphone('rogier'), ('RJ', 'RJR')) |
||
2365 | self.assertEqual(double_metaphone('rough'), ('RF', '')) |
||
2366 | self.assertEqual(double_metaphone('san jacinto'), ('SNHSNT', '')) |
||
2367 | self.assertEqual(double_metaphone('schenker'), ('XNKR', 'SKNKR')) |
||
2368 | self.assertEqual(double_metaphone('schermerhorn'), |
||
2369 | ('XRMRRN', 'SKRMRRN')) |
||
2370 | self.assertEqual(double_metaphone('schmidt'), ('XMT', 'SMT')) |
||
2371 | self.assertEqual(double_metaphone('schneider'), ('XNTR', 'SNTR')) |
||
2372 | self.assertEqual(double_metaphone('school'), ('SKL', '')) |
||
2373 | self.assertEqual(double_metaphone('schooner'), ('SKNR', '')) |
||
2374 | self.assertEqual(double_metaphone('schrozberg'), ('XRSPRK', 'SRSPRK')) |
||
2375 | self.assertEqual(double_metaphone('schulman'), ('XLMN', '')) |
||
2376 | self.assertEqual(double_metaphone('schwabach'), ('XPK', 'XFPK')) |
||
2377 | self.assertEqual(double_metaphone('schwarzach'), ('XRSK', 'XFRTSK')) |
||
2378 | self.assertEqual(double_metaphone('smith'), ('SM0', 'XMT')) |
||
2379 | self.assertEqual(double_metaphone('snider'), ('SNTR', 'XNTR')) |
||
2380 | self.assertEqual(double_metaphone('succeed'), ('SKST', '')) |
||
2381 | self.assertEqual(double_metaphone('sugarcane'), ('XKRKN', 'SKRKN')) |
||
2382 | self.assertEqual(double_metaphone('svobodka'), ('SFPTK', '')) |
||
2383 | self.assertEqual(double_metaphone('tagliaro'), ('TKLR', 'TLR')) |
||
2384 | self.assertEqual(double_metaphone('thames'), ('TMS', '')) |
||
2385 | self.assertEqual(double_metaphone('theilheim'), ('0LM', 'TLM')) |
||
2386 | self.assertEqual(double_metaphone('thomas'), ('TMS', '')) |
||
2387 | self.assertEqual(double_metaphone('thumb'), ('0M', 'TM')) |
||
2388 | self.assertEqual(double_metaphone('tichner'), ('TXNR', 'TKNR')) |
||
2389 | self.assertEqual(double_metaphone('tough'), ('TF', '')) |
||
2390 | self.assertEqual(double_metaphone('umbrella'), ('AMPRL', '')) |
||
2391 | self.assertEqual(double_metaphone('vilshofen'), ('FLXFN', '')) |
||
2392 | self.assertEqual(double_metaphone('von schuller'), ('FNXLR', '')) |
||
2393 | self.assertEqual(double_metaphone('wachtler'), ('AKTLR', 'FKTLR')) |
||
2394 | self.assertEqual(double_metaphone('wechsler'), ('AKSLR', 'FKSLR')) |
||
2395 | self.assertEqual(double_metaphone('weikersheim'), ('AKRSM', 'FKRSM')) |
||
2396 | self.assertEqual(double_metaphone('zhao'), ('J', '')) |
||
2397 | |||
2398 | def test_double_metaphone_surnames4(self): |
||
2399 | """Test abydos.phonetic.double_metaphone (surname data, 4-letter).""" |
||
2400 | self.assertEqual(double_metaphone('', 4), ('', '')) |
||
2401 | self.assertEqual(double_metaphone('ALLERTON', 4), ('ALRT', '')) |
||
2402 | self.assertEqual(double_metaphone('Acton', 4), ('AKTN', '')) |
||
2403 | self.assertEqual(double_metaphone('Adams', 4), ('ATMS', '')) |
||
2404 | self.assertEqual(double_metaphone('Aggar', 4), ('AKR', '')) |
||
2405 | self.assertEqual(double_metaphone('Ahl', 4), ('AL', '')) |
||
2406 | self.assertEqual(double_metaphone('Aiken', 4), ('AKN', '')) |
||
2407 | self.assertEqual(double_metaphone('Alan', 4), ('ALN', '')) |
||
2408 | self.assertEqual(double_metaphone('Alcock', 4), ('ALKK', '')) |
||
2409 | self.assertEqual(double_metaphone('Alden', 4), ('ALTN', '')) |
||
2410 | self.assertEqual(double_metaphone('Aldham', 4), ('ALTM', '')) |
||
2411 | self.assertEqual(double_metaphone('Allen', 4), ('ALN', '')) |
||
2412 | self.assertEqual(double_metaphone('Allerton', 4), ('ALRT', '')) |
||
2413 | self.assertEqual(double_metaphone('Alsop', 4), ('ALSP', '')) |
||
2414 | self.assertEqual(double_metaphone('Alwein', 4), ('ALN', '')) |
||
2415 | self.assertEqual(double_metaphone('Ambler', 4), ('AMPL', '')) |
||
2416 | self.assertEqual(double_metaphone('Andevill', 4), ('ANTF', '')) |
||
2417 | self.assertEqual(double_metaphone('Andrews', 4), ('ANTR', '')) |
||
2418 | self.assertEqual(double_metaphone('Andreyco', 4), ('ANTR', '')) |
||
2419 | self.assertEqual(double_metaphone('Andriesse', 4), ('ANTR', '')) |
||
2420 | self.assertEqual(double_metaphone('Angier', 4), ('ANJ', 'ANJR')) |
||
2421 | self.assertEqual(double_metaphone('Annabel', 4), ('ANPL', '')) |
||
2422 | self.assertEqual(double_metaphone('Anne', 4), ('AN', '')) |
||
2423 | self.assertEqual(double_metaphone('Anstye', 4), ('ANST', '')) |
||
2424 | self.assertEqual(double_metaphone('Appling', 4), ('APLN', '')) |
||
2425 | self.assertEqual(double_metaphone('Apuke', 4), ('APK', '')) |
||
2426 | self.assertEqual(double_metaphone('Arnold', 4), ('ARNL', '')) |
||
2427 | self.assertEqual(double_metaphone('Ashby', 4), ('AXP', '')) |
||
2428 | self.assertEqual(double_metaphone('Astwood', 4), ('ASTT', '')) |
||
2429 | self.assertEqual(double_metaphone('Atkinson', 4), ('ATKN', '')) |
||
2430 | self.assertEqual(double_metaphone('Audley', 4), ('ATL', '')) |
||
2431 | self.assertEqual(double_metaphone('Austin', 4), ('ASTN', '')) |
||
2432 | self.assertEqual(double_metaphone('Avenal', 4), ('AFNL', '')) |
||
2433 | self.assertEqual(double_metaphone('Ayer', 4), ('AR', '')) |
||
2434 | self.assertEqual(double_metaphone('Ayot', 4), ('AT', '')) |
||
2435 | self.assertEqual(double_metaphone('Babbitt', 4), ('PPT', '')) |
||
2436 | self.assertEqual(double_metaphone('Bachelor', 4), ('PXLR', 'PKLR')) |
||
2437 | self.assertEqual(double_metaphone('Bachelour', 4), ('PXLR', 'PKLR')) |
||
2438 | self.assertEqual(double_metaphone('Bailey', 4), ('PL', '')) |
||
2439 | self.assertEqual(double_metaphone('Baivel', 4), ('PFL', '')) |
||
2440 | self.assertEqual(double_metaphone('Baker', 4), ('PKR', '')) |
||
2441 | self.assertEqual(double_metaphone('Baldwin', 4), ('PLTN', '')) |
||
2442 | self.assertEqual(double_metaphone('Balsley', 4), ('PLSL', '')) |
||
2443 | self.assertEqual(double_metaphone('Barber', 4), ('PRPR', '')) |
||
2444 | self.assertEqual(double_metaphone('Barker', 4), ('PRKR', '')) |
||
2445 | self.assertEqual(double_metaphone('Barlow', 4), ('PRL', 'PRLF')) |
||
2446 | self.assertEqual(double_metaphone('Barnard', 4), ('PRNR', '')) |
||
2447 | self.assertEqual(double_metaphone('Barnes', 4), ('PRNS', '')) |
||
2448 | self.assertEqual(double_metaphone('Barnsley', 4), ('PRNS', '')) |
||
2449 | self.assertEqual(double_metaphone('Barouxis', 4), ('PRKS', '')) |
||
2450 | self.assertEqual(double_metaphone('Bartlet', 4), ('PRTL', '')) |
||
2451 | self.assertEqual(double_metaphone('Basley', 4), ('PSL', '')) |
||
2452 | self.assertEqual(double_metaphone('Basset', 4), ('PST', '')) |
||
2453 | self.assertEqual(double_metaphone('Bassett', 4), ('PST', '')) |
||
2454 | self.assertEqual(double_metaphone('Batchlor', 4), ('PXLR', '')) |
||
2455 | self.assertEqual(double_metaphone('Bates', 4), ('PTS', '')) |
||
2456 | self.assertEqual(double_metaphone('Batson', 4), ('PTSN', '')) |
||
2457 | self.assertEqual(double_metaphone('Bayes', 4), ('PS', '')) |
||
2458 | self.assertEqual(double_metaphone('Bayley', 4), ('PL', '')) |
||
2459 | self.assertEqual(double_metaphone('Beale', 4), ('PL', '')) |
||
2460 | self.assertEqual(double_metaphone('Beauchamp', 4), ('PXMP', 'PKMP')) |
||
2461 | self.assertEqual(double_metaphone('Beauclerc', 4), ('PKLR', '')) |
||
2462 | self.assertEqual(double_metaphone('Beech', 4), ('PK', '')) |
||
2463 | self.assertEqual(double_metaphone('Beers', 4), ('PRS', '')) |
||
2464 | self.assertEqual(double_metaphone('Beke', 4), ('PK', '')) |
||
2465 | self.assertEqual(double_metaphone('Belcher', 4), ('PLXR', 'PLKR')) |
||
2466 | self.assertEqual(double_metaphone('Benjamin', 4), ('PNJM', '')) |
||
2467 | self.assertEqual(double_metaphone('Benningham', 4), ('PNNK', '')) |
||
2468 | self.assertEqual(double_metaphone('Bereford', 4), ('PRFR', '')) |
||
2469 | self.assertEqual(double_metaphone('Bergen', 4), ('PRJN', 'PRKN')) |
||
2470 | self.assertEqual(double_metaphone('Berkeley', 4), ('PRKL', '')) |
||
2471 | self.assertEqual(double_metaphone('Berry', 4), ('PR', '')) |
||
2472 | self.assertEqual(double_metaphone('Besse', 4), ('PS', '')) |
||
2473 | self.assertEqual(double_metaphone('Bessey', 4), ('PS', '')) |
||
2474 | self.assertEqual(double_metaphone('Bessiles', 4), ('PSLS', '')) |
||
2475 | self.assertEqual(double_metaphone('Bigelow', 4), ('PJL', 'PKLF')) |
||
2476 | self.assertEqual(double_metaphone('Bigg', 4), ('PK', '')) |
||
2477 | self.assertEqual(double_metaphone('Bigod', 4), ('PKT', '')) |
||
2478 | self.assertEqual(double_metaphone('Billings', 4), ('PLNK', '')) |
||
2479 | self.assertEqual(double_metaphone('Bimper', 4), ('PMPR', '')) |
||
2480 | self.assertEqual(double_metaphone('Binker', 4), ('PNKR', '')) |
||
2481 | self.assertEqual(double_metaphone('Birdsill', 4), ('PRTS', '')) |
||
2482 | self.assertEqual(double_metaphone('Bishop', 4), ('PXP', '')) |
||
2483 | self.assertEqual(double_metaphone('Black', 4), ('PLK', '')) |
||
2484 | self.assertEqual(double_metaphone('Blagge', 4), ('PLK', '')) |
||
2485 | self.assertEqual(double_metaphone('Blake', 4), ('PLK', '')) |
||
2486 | self.assertEqual(double_metaphone('Blanck', 4), ('PLNK', '')) |
||
2487 | self.assertEqual(double_metaphone('Bledsoe', 4), ('PLTS', '')) |
||
2488 | self.assertEqual(double_metaphone('Blennerhasset', 4), ('PLNR', '')) |
||
2489 | self.assertEqual(double_metaphone('Blessing', 4), ('PLSN', '')) |
||
2490 | self.assertEqual(double_metaphone('Blewett', 4), ('PLT', '')) |
||
2491 | self.assertEqual(double_metaphone('Bloctgoed', 4), ('PLKT', '')) |
||
2492 | self.assertEqual(double_metaphone('Bloetgoet', 4), ('PLTK', '')) |
||
2493 | self.assertEqual(double_metaphone('Bloodgood', 4), ('PLTK', '')) |
||
2494 | self.assertEqual(double_metaphone('Blossom', 4), ('PLSM', '')) |
||
2495 | self.assertEqual(double_metaphone('Blount', 4), ('PLNT', '')) |
||
2496 | self.assertEqual(double_metaphone('Bodine', 4), ('PTN', '')) |
||
2497 | self.assertEqual(double_metaphone('Bodman', 4), ('PTMN', '')) |
||
2498 | self.assertEqual(double_metaphone('BonCoeur', 4), ('PNKR', '')) |
||
2499 | self.assertEqual(double_metaphone('Bond', 4), ('PNT', '')) |
||
2500 | self.assertEqual(double_metaphone('Boscawen', 4), ('PSKN', '')) |
||
2501 | self.assertEqual(double_metaphone('Bosworth', 4), ('PSR0', 'PSRT')) |
||
2502 | self.assertEqual(double_metaphone('Bouchier', 4), ('PX', 'PKR')) |
||
2503 | self.assertEqual(double_metaphone('Bowne', 4), ('PN', '')) |
||
2504 | self.assertEqual(double_metaphone('Bradbury', 4), ('PRTP', '')) |
||
2505 | self.assertEqual(double_metaphone('Bradder', 4), ('PRTR', '')) |
||
2506 | self.assertEqual(double_metaphone('Bradford', 4), ('PRTF', '')) |
||
2507 | self.assertEqual(double_metaphone('Bradstreet', 4), ('PRTS', '')) |
||
2508 | self.assertEqual(double_metaphone('Braham', 4), ('PRHM', '')) |
||
2509 | self.assertEqual(double_metaphone('Brailsford', 4), ('PRLS', '')) |
||
2510 | self.assertEqual(double_metaphone('Brainard', 4), ('PRNR', '')) |
||
2511 | self.assertEqual(double_metaphone('Brandish', 4), ('PRNT', '')) |
||
2512 | self.assertEqual(double_metaphone('Braun', 4), ('PRN', '')) |
||
2513 | self.assertEqual(double_metaphone('Brecc', 4), ('PRK', '')) |
||
2514 | self.assertEqual(double_metaphone('Brent', 4), ('PRNT', '')) |
||
2515 | self.assertEqual(double_metaphone('Brenton', 4), ('PRNT', '')) |
||
2516 | self.assertEqual(double_metaphone('Briggs', 4), ('PRKS', '')) |
||
2517 | self.assertEqual(double_metaphone('Brigham', 4), ('PRM', '')) |
||
2518 | self.assertEqual(double_metaphone('Brobst', 4), ('PRPS', '')) |
||
2519 | self.assertEqual(double_metaphone('Brome', 4), ('PRM', '')) |
||
2520 | self.assertEqual(double_metaphone('Bronson', 4), ('PRNS', '')) |
||
2521 | self.assertEqual(double_metaphone('Brooks', 4), ('PRKS', '')) |
||
2522 | self.assertEqual(double_metaphone('Brouillard', 4), ('PRLR', '')) |
||
2523 | self.assertEqual(double_metaphone('Brown', 4), ('PRN', '')) |
||
2524 | self.assertEqual(double_metaphone('Browne', 4), ('PRN', '')) |
||
2525 | self.assertEqual(double_metaphone('Brownell', 4), ('PRNL', '')) |
||
2526 | self.assertEqual(double_metaphone('Bruley', 4), ('PRL', '')) |
||
2527 | self.assertEqual(double_metaphone('Bryant', 4), ('PRNT', '')) |
||
2528 | self.assertEqual(double_metaphone('Brzozowski', 4), ('PRSS', 'PRTS')) |
||
2529 | self.assertEqual(double_metaphone('Buide', 4), ('PT', '')) |
||
2530 | self.assertEqual(double_metaphone('Bulmer', 4), ('PLMR', '')) |
||
2531 | self.assertEqual(double_metaphone('Bunker', 4), ('PNKR', '')) |
||
2532 | self.assertEqual(double_metaphone('Burden', 4), ('PRTN', '')) |
||
2533 | self.assertEqual(double_metaphone('Burge', 4), ('PRJ', 'PRK')) |
||
2534 | self.assertEqual(double_metaphone('Burgoyne', 4), ('PRKN', '')) |
||
2535 | self.assertEqual(double_metaphone('Burke', 4), ('PRK', '')) |
||
2536 | self.assertEqual(double_metaphone('Burnett', 4), ('PRNT', '')) |
||
2537 | self.assertEqual(double_metaphone('Burpee', 4), ('PRP', '')) |
||
2538 | self.assertEqual(double_metaphone('Bursley', 4), ('PRSL', '')) |
||
2539 | self.assertEqual(double_metaphone('Burton', 4), ('PRTN', '')) |
||
2540 | self.assertEqual(double_metaphone('Bushnell', 4), ('PXNL', '')) |
||
2541 | self.assertEqual(double_metaphone('Buss', 4), ('PS', '')) |
||
2542 | self.assertEqual(double_metaphone('Buswell', 4), ('PSL', '')) |
||
2543 | self.assertEqual(double_metaphone('Butler', 4), ('PTLR', '')) |
||
2544 | self.assertEqual(double_metaphone('Calkin', 4), ('KLKN', '')) |
||
2545 | self.assertEqual(double_metaphone('Canada', 4), ('KNT', '')) |
||
2546 | self.assertEqual(double_metaphone('Canmore', 4), ('KNMR', '')) |
||
2547 | self.assertEqual(double_metaphone('Canney', 4), ('KN', '')) |
||
2548 | self.assertEqual(double_metaphone('Capet', 4), ('KPT', '')) |
||
2549 | self.assertEqual(double_metaphone('Card', 4), ('KRT', '')) |
||
2550 | self.assertEqual(double_metaphone('Carman', 4), ('KRMN', '')) |
||
2551 | self.assertEqual(double_metaphone('Carpenter', 4), ('KRPN', '')) |
||
2552 | self.assertEqual(double_metaphone('Cartwright', 4), ('KRTR', '')) |
||
2553 | self.assertEqual(double_metaphone('Casey', 4), ('KS', '')) |
||
2554 | self.assertEqual(double_metaphone('Catterfield', 4), ('KTRF', '')) |
||
2555 | self.assertEqual(double_metaphone('Ceeley', 4), ('SL', '')) |
||
2556 | self.assertEqual(double_metaphone('Chambers', 4), ('XMPR', '')) |
||
2557 | self.assertEqual(double_metaphone('Champion', 4), ('XMPN', '')) |
||
2558 | self.assertEqual(double_metaphone('Chapman', 4), ('XPMN', '')) |
||
2559 | self.assertEqual(double_metaphone('Chase', 4), ('XS', '')) |
||
2560 | self.assertEqual(double_metaphone('Cheney', 4), ('XN', '')) |
||
2561 | self.assertEqual(double_metaphone('Chetwynd', 4), ('XTNT', '')) |
||
2562 | self.assertEqual(double_metaphone('Chevalier', 4), ('XFL', 'XFLR')) |
||
2563 | self.assertEqual(double_metaphone('Chillingsworth', 4), ('XLNK', '')) |
||
2564 | self.assertEqual(double_metaphone('Christie', 4), ('KRST', '')) |
||
2565 | self.assertEqual(double_metaphone('Chubbuck', 4), ('XPK', '')) |
||
2566 | self.assertEqual(double_metaphone('Church', 4), ('XRX', 'XRK')) |
||
2567 | self.assertEqual(double_metaphone('Clark', 4), ('KLRK', '')) |
||
2568 | self.assertEqual(double_metaphone('Clarke', 4), ('KLRK', '')) |
||
2569 | self.assertEqual(double_metaphone('Cleare', 4), ('KLR', '')) |
||
2570 | self.assertEqual(double_metaphone('Clement', 4), ('KLMN', '')) |
||
2571 | self.assertEqual(double_metaphone('Clerke', 4), ('KLRK', '')) |
||
2572 | self.assertEqual(double_metaphone('Clibben', 4), ('KLPN', '')) |
||
2573 | self.assertEqual(double_metaphone('Clifford', 4), ('KLFR', '')) |
||
2574 | self.assertEqual(double_metaphone('Clivedon', 4), ('KLFT', '')) |
||
2575 | self.assertEqual(double_metaphone('Close', 4), ('KLS', '')) |
||
2576 | self.assertEqual(double_metaphone('Clothilde', 4), ('KL0L', 'KLTL')) |
||
2577 | self.assertEqual(double_metaphone('Cobb', 4), ('KP', '')) |
||
2578 | self.assertEqual(double_metaphone('Coburn', 4), ('KPRN', '')) |
||
2579 | self.assertEqual(double_metaphone('Coburne', 4), ('KPRN', '')) |
||
2580 | self.assertEqual(double_metaphone('Cocke', 4), ('KK', '')) |
||
2581 | self.assertEqual(double_metaphone('Coffin', 4), ('KFN', '')) |
||
2582 | self.assertEqual(double_metaphone('Coffyn', 4), ('KFN', '')) |
||
2583 | self.assertEqual(double_metaphone('Colborne', 4), ('KLPR', '')) |
||
2584 | self.assertEqual(double_metaphone('Colby', 4), ('KLP', '')) |
||
2585 | self.assertEqual(double_metaphone('Cole', 4), ('KL', '')) |
||
2586 | self.assertEqual(double_metaphone('Coleman', 4), ('KLMN', '')) |
||
2587 | self.assertEqual(double_metaphone('Collier', 4), ('KL', 'KLR')) |
||
2588 | self.assertEqual(double_metaphone('Compton', 4), ('KMPT', '')) |
||
2589 | self.assertEqual(double_metaphone('Cone', 4), ('KN', '')) |
||
2590 | self.assertEqual(double_metaphone('Cook', 4), ('KK', '')) |
||
2591 | self.assertEqual(double_metaphone('Cooke', 4), ('KK', '')) |
||
2592 | self.assertEqual(double_metaphone('Cooper', 4), ('KPR', '')) |
||
2593 | self.assertEqual(double_metaphone('Copperthwaite', 4), |
||
2594 | ('KPR0', 'KPRT')) |
||
2595 | self.assertEqual(double_metaphone('Corbet', 4), ('KRPT', '')) |
||
2596 | self.assertEqual(double_metaphone('Corell', 4), ('KRL', '')) |
||
2597 | self.assertEqual(double_metaphone('Corey', 4), ('KR', '')) |
||
2598 | self.assertEqual(double_metaphone('Corlies', 4), ('KRLS', '')) |
||
2599 | self.assertEqual(double_metaphone('Corneliszen', 4), ('KRNL', '')) |
||
2600 | self.assertEqual(double_metaphone('Cornelius', 4), ('KRNL', '')) |
||
2601 | self.assertEqual(double_metaphone('Cornwallis', 4), ('KRNL', '')) |
||
2602 | self.assertEqual(double_metaphone('Cosgrove', 4), ('KSKR', '')) |
||
2603 | self.assertEqual(double_metaphone('Count of Brionne', 4), ('KNTF', '')) |
||
2604 | self.assertEqual(double_metaphone('Covill', 4), ('KFL', '')) |
||
2605 | self.assertEqual(double_metaphone('Cowperthwaite', 4), |
||
2606 | ('KPR0', 'KPRT')) |
||
2607 | self.assertEqual(double_metaphone('Cowperwaite', 4), ('KPRT', '')) |
||
2608 | self.assertEqual(double_metaphone('Crane', 4), ('KRN', '')) |
||
2609 | self.assertEqual(double_metaphone('Creagmile', 4), ('KRKM', '')) |
||
2610 | self.assertEqual(double_metaphone('Crew', 4), ('KR', 'KRF')) |
||
2611 | self.assertEqual(double_metaphone('Crispin', 4), ('KRSP', '')) |
||
2612 | self.assertEqual(double_metaphone('Crocker', 4), ('KRKR', '')) |
||
2613 | self.assertEqual(double_metaphone('Crockett', 4), ('KRKT', '')) |
||
2614 | self.assertEqual(double_metaphone('Crosby', 4), ('KRSP', '')) |
||
2615 | self.assertEqual(double_metaphone('Crump', 4), ('KRMP', '')) |
||
2616 | self.assertEqual(double_metaphone('Cunningham', 4), ('KNNK', '')) |
||
2617 | self.assertEqual(double_metaphone('Curtis', 4), ('KRTS', '')) |
||
2618 | self.assertEqual(double_metaphone('Cutha', 4), ('K0', 'KT')) |
||
2619 | self.assertEqual(double_metaphone('Cutter', 4), ('KTR', '')) |
||
2620 | self.assertEqual(double_metaphone('D\'Aubigny', 4), ('TPN', 'TPKN')) |
||
2621 | self.assertEqual(double_metaphone('DAVIS', 4), ('TFS', '')) |
||
2622 | self.assertEqual(double_metaphone('Dabinott', 4), ('TPNT', '')) |
||
2623 | self.assertEqual(double_metaphone('Dacre', 4), ('TKR', '')) |
||
2624 | self.assertEqual(double_metaphone('Daggett', 4), ('TKT', '')) |
||
2625 | self.assertEqual(double_metaphone('Danvers', 4), ('TNFR', '')) |
||
2626 | self.assertEqual(double_metaphone('Darcy', 4), ('TRS', '')) |
||
2627 | self.assertEqual(double_metaphone('Davis', 4), ('TFS', '')) |
||
2628 | self.assertEqual(double_metaphone('Dawn', 4), ('TN', '')) |
||
2629 | self.assertEqual(double_metaphone('Dawson', 4), ('TSN', '')) |
||
2630 | self.assertEqual(double_metaphone('Day', 4), ('T', '')) |
||
2631 | self.assertEqual(double_metaphone('Daye', 4), ('T', '')) |
||
2632 | self.assertEqual(double_metaphone('DeGrenier', 4), ('TKRN', '')) |
||
2633 | self.assertEqual(double_metaphone('Dean', 4), ('TN', '')) |
||
2634 | self.assertEqual(double_metaphone('Deekindaugh', 4), ('TKNT', '')) |
||
2635 | self.assertEqual(double_metaphone('Dennis', 4), ('TNS', '')) |
||
2636 | self.assertEqual(double_metaphone('Denny', 4), ('TN', '')) |
||
2637 | self.assertEqual(double_metaphone('Denton', 4), ('TNTN', '')) |
||
2638 | self.assertEqual(double_metaphone('Desborough', 4), ('TSPR', '')) |
||
2639 | self.assertEqual(double_metaphone('Despenser', 4), ('TSPN', '')) |
||
2640 | self.assertEqual(double_metaphone('Deverill', 4), ('TFRL', '')) |
||
2641 | self.assertEqual(double_metaphone('Devine', 4), ('TFN', '')) |
||
2642 | self.assertEqual(double_metaphone('Dexter', 4), ('TKST', '')) |
||
2643 | self.assertEqual(double_metaphone('Dillaway', 4), ('TL', '')) |
||
2644 | self.assertEqual(double_metaphone('Dimmick', 4), ('TMK', '')) |
||
2645 | self.assertEqual(double_metaphone('Dinan', 4), ('TNN', '')) |
||
2646 | self.assertEqual(double_metaphone('Dix', 4), ('TKS', '')) |
||
2647 | self.assertEqual(double_metaphone('Doggett', 4), ('TKT', '')) |
||
2648 | self.assertEqual(double_metaphone('Donahue', 4), ('TNH', '')) |
||
2649 | self.assertEqual(double_metaphone('Dorfman', 4), ('TRFM', '')) |
||
2650 | self.assertEqual(double_metaphone('Dorris', 4), ('TRS', '')) |
||
2651 | self.assertEqual(double_metaphone('Dow', 4), ('T', 'TF')) |
||
2652 | self.assertEqual(double_metaphone('Downey', 4), ('TN', '')) |
||
2653 | self.assertEqual(double_metaphone('Downing', 4), ('TNNK', '')) |
||
2654 | self.assertEqual(double_metaphone('Dowsett', 4), ('TST', '')) |
||
2655 | self.assertEqual(double_metaphone('Duck?', 4), ('TK', '')) |
||
2656 | self.assertEqual(double_metaphone('Dudley', 4), ('TTL', '')) |
||
2657 | self.assertEqual(double_metaphone('Duffy', 4), ('TF', '')) |
||
2658 | self.assertEqual(double_metaphone('Dunn', 4), ('TN', '')) |
||
2659 | self.assertEqual(double_metaphone('Dunsterville', 4), ('TNST', '')) |
||
2660 | self.assertEqual(double_metaphone('Durrant', 4), ('TRNT', '')) |
||
2661 | self.assertEqual(double_metaphone('Durrin', 4), ('TRN', '')) |
||
2662 | self.assertEqual(double_metaphone('Dustin', 4), ('TSTN', '')) |
||
2663 | self.assertEqual(double_metaphone('Duston', 4), ('TSTN', '')) |
||
2664 | self.assertEqual(double_metaphone('Eames', 4), ('AMS', '')) |
||
2665 | self.assertEqual(double_metaphone('Early', 4), ('ARL', '')) |
||
2666 | self.assertEqual(double_metaphone('Easty', 4), ('AST', '')) |
||
2667 | self.assertEqual(double_metaphone('Ebbett', 4), ('APT', '')) |
||
2668 | self.assertEqual(double_metaphone('Eberbach', 4), ('APRP', '')) |
||
2669 | self.assertEqual(double_metaphone('Eberhard', 4), ('APRR', '')) |
||
2670 | self.assertEqual(double_metaphone('Eddy', 4), ('AT', '')) |
||
2671 | self.assertEqual(double_metaphone('Edenden', 4), ('ATNT', '')) |
||
2672 | self.assertEqual(double_metaphone('Edwards', 4), ('ATRT', '')) |
||
2673 | self.assertEqual(double_metaphone('Eglinton', 4), ('AKLN', 'ALNT')) |
||
2674 | self.assertEqual(double_metaphone('Eliot', 4), ('ALT', '')) |
||
2675 | self.assertEqual(double_metaphone('Elizabeth', 4), ('ALSP', '')) |
||
2676 | self.assertEqual(double_metaphone('Ellis', 4), ('ALS', '')) |
||
2677 | self.assertEqual(double_metaphone('Ellison', 4), ('ALSN', '')) |
||
2678 | self.assertEqual(double_metaphone('Ellot', 4), ('ALT', '')) |
||
2679 | self.assertEqual(double_metaphone('Elny', 4), ('ALN', '')) |
||
2680 | self.assertEqual(double_metaphone('Elsner', 4), ('ALSN', '')) |
||
2681 | self.assertEqual(double_metaphone('Emerson', 4), ('AMRS', '')) |
||
2682 | self.assertEqual(double_metaphone('Empson', 4), ('AMPS', '')) |
||
2683 | self.assertEqual(double_metaphone('Est', 4), ('AST', '')) |
||
2684 | self.assertEqual(double_metaphone('Estabrook', 4), ('ASTP', '')) |
||
2685 | self.assertEqual(double_metaphone('Estes', 4), ('ASTS', '')) |
||
2686 | self.assertEqual(double_metaphone('Estey', 4), ('AST', '')) |
||
2687 | self.assertEqual(double_metaphone('Evans', 4), ('AFNS', '')) |
||
2688 | self.assertEqual(double_metaphone('Fallowell', 4), ('FLL', '')) |
||
2689 | self.assertEqual(double_metaphone('Farnsworth', 4), ('FRNS', '')) |
||
2690 | self.assertEqual(double_metaphone('Feake', 4), ('FK', '')) |
||
2691 | self.assertEqual(double_metaphone('Feke', 4), ('FK', '')) |
||
2692 | self.assertEqual(double_metaphone('Fellows', 4), ('FLS', '')) |
||
2693 | self.assertEqual(double_metaphone('Fettiplace', 4), ('FTPL', '')) |
||
2694 | self.assertEqual(double_metaphone('Finney', 4), ('FN', '')) |
||
2695 | self.assertEqual(double_metaphone('Fischer', 4), ('FXR', 'FSKR')) |
||
2696 | self.assertEqual(double_metaphone('Fisher', 4), ('FXR', '')) |
||
2697 | self.assertEqual(double_metaphone('Fisk', 4), ('FSK', '')) |
||
2698 | self.assertEqual(double_metaphone('Fiske', 4), ('FSK', '')) |
||
2699 | self.assertEqual(double_metaphone('Fletcher', 4), ('FLXR', '')) |
||
2700 | self.assertEqual(double_metaphone('Folger', 4), ('FLKR', 'FLJR')) |
||
2701 | self.assertEqual(double_metaphone('Foliot', 4), ('FLT', '')) |
||
2702 | self.assertEqual(double_metaphone('Folyot', 4), ('FLT', '')) |
||
2703 | self.assertEqual(double_metaphone('Fones', 4), ('FNS', '')) |
||
2704 | self.assertEqual(double_metaphone('Fordham', 4), ('FRTM', '')) |
||
2705 | self.assertEqual(double_metaphone('Forstner', 4), ('FRST', '')) |
||
2706 | self.assertEqual(double_metaphone('Fosten', 4), ('FSTN', '')) |
||
2707 | self.assertEqual(double_metaphone('Foster', 4), ('FSTR', '')) |
||
2708 | self.assertEqual(double_metaphone('Foulke', 4), ('FLK', '')) |
||
2709 | self.assertEqual(double_metaphone('Fowler', 4), ('FLR', '')) |
||
2710 | self.assertEqual(double_metaphone('Foxwell', 4), ('FKSL', '')) |
||
2711 | self.assertEqual(double_metaphone('Fraley', 4), ('FRL', '')) |
||
2712 | self.assertEqual(double_metaphone('Franceys', 4), ('FRNS', '')) |
||
2713 | self.assertEqual(double_metaphone('Franke', 4), ('FRNK', '')) |
||
2714 | self.assertEqual(double_metaphone('Frascella', 4), ('FRSL', '')) |
||
2715 | self.assertEqual(double_metaphone('Frazer', 4), ('FRSR', '')) |
||
2716 | self.assertEqual(double_metaphone('Fredd', 4), ('FRT', '')) |
||
2717 | self.assertEqual(double_metaphone('Freeman', 4), ('FRMN', '')) |
||
2718 | self.assertEqual(double_metaphone('French', 4), ('FRNX', 'FRNK')) |
||
2719 | self.assertEqual(double_metaphone('Freville', 4), ('FRFL', '')) |
||
2720 | self.assertEqual(double_metaphone('Frey', 4), ('FR', '')) |
||
2721 | self.assertEqual(double_metaphone('Frick', 4), ('FRK', '')) |
||
2722 | self.assertEqual(double_metaphone('Frier', 4), ('FR', 'FRR')) |
||
2723 | self.assertEqual(double_metaphone('Froe', 4), ('FR', '')) |
||
2724 | self.assertEqual(double_metaphone('Frorer', 4), ('FRRR', '')) |
||
2725 | self.assertEqual(double_metaphone('Frost', 4), ('FRST', '')) |
||
2726 | self.assertEqual(double_metaphone('Frothingham', 4), ('FR0N', 'FRTN')) |
||
2727 | self.assertEqual(double_metaphone('Fry', 4), ('FR', '')) |
||
2728 | self.assertEqual(double_metaphone('Gaffney', 4), ('KFN', '')) |
||
2729 | self.assertEqual(double_metaphone('Gage', 4), ('KJ', 'KK')) |
||
2730 | self.assertEqual(double_metaphone('Gallion', 4), ('KLN', '')) |
||
2731 | self.assertEqual(double_metaphone('Gallishan', 4), ('KLXN', '')) |
||
2732 | self.assertEqual(double_metaphone('Gamble', 4), ('KMPL', '')) |
||
2733 | self.assertEqual(double_metaphone('Garbrand', 4), ('KRPR', '')) |
||
2734 | self.assertEqual(double_metaphone('Gardner', 4), ('KRTN', '')) |
||
2735 | self.assertEqual(double_metaphone('Garrett', 4), ('KRT', '')) |
||
2736 | self.assertEqual(double_metaphone('Gassner', 4), ('KSNR', '')) |
||
2737 | self.assertEqual(double_metaphone('Gater', 4), ('KTR', '')) |
||
2738 | self.assertEqual(double_metaphone('Gaunt', 4), ('KNT', '')) |
||
2739 | self.assertEqual(double_metaphone('Gayer', 4), ('KR', '')) |
||
2740 | self.assertEqual(double_metaphone('Gerken', 4), ('KRKN', 'JRKN')) |
||
2741 | self.assertEqual(double_metaphone('Gerritsen', 4), ('KRTS', 'JRTS')) |
||
2742 | self.assertEqual(double_metaphone('Gibbs', 4), ('KPS', 'JPS')) |
||
2743 | self.assertEqual(double_metaphone('Giffard', 4), ('JFRT', 'KFRT')) |
||
2744 | self.assertEqual(double_metaphone('Gilbert', 4), ('KLPR', 'JLPR')) |
||
2745 | self.assertEqual(double_metaphone('Gill', 4), ('KL', 'JL')) |
||
2746 | self.assertEqual(double_metaphone('Gilman', 4), ('KLMN', 'JLMN')) |
||
2747 | self.assertEqual(double_metaphone('Glass', 4), ('KLS', '')) |
||
2748 | self.assertEqual(double_metaphone('GoddardGifford', 4), ('KTRJ', '')) |
||
2749 | self.assertEqual(double_metaphone('Godfrey', 4), ('KTFR', '')) |
||
2750 | self.assertEqual(double_metaphone('Godwin', 4), ('KTN', '')) |
||
2751 | self.assertEqual(double_metaphone('Goodale', 4), ('KTL', '')) |
||
2752 | self.assertEqual(double_metaphone('Goodnow', 4), ('KTN', 'KTNF')) |
||
2753 | self.assertEqual(double_metaphone('Gorham', 4), ('KRM', '')) |
||
2754 | self.assertEqual(double_metaphone('Goseline', 4), ('KSLN', '')) |
||
2755 | self.assertEqual(double_metaphone('Gott', 4), ('KT', '')) |
||
2756 | self.assertEqual(double_metaphone('Gould', 4), ('KLT', '')) |
||
2757 | self.assertEqual(double_metaphone('Grafton', 4), ('KRFT', '')) |
||
2758 | self.assertEqual(double_metaphone('Grant', 4), ('KRNT', '')) |
||
2759 | self.assertEqual(double_metaphone('Gray', 4), ('KR', '')) |
||
2760 | self.assertEqual(double_metaphone('Green', 4), ('KRN', '')) |
||
2761 | self.assertEqual(double_metaphone('Griffin', 4), ('KRFN', '')) |
||
2762 | self.assertEqual(double_metaphone('Grill', 4), ('KRL', '')) |
||
2763 | self.assertEqual(double_metaphone('Grim', 4), ('KRM', '')) |
||
2764 | self.assertEqual(double_metaphone('Grisgonelle', 4), ('KRSK', '')) |
||
2765 | self.assertEqual(double_metaphone('Gross', 4), ('KRS', '')) |
||
2766 | self.assertEqual(double_metaphone('Guba', 4), ('KP', '')) |
||
2767 | self.assertEqual(double_metaphone('Gybbes', 4), ('KPS', 'JPS')) |
||
2768 | self.assertEqual(double_metaphone('Haburne', 4), ('HPRN', '')) |
||
2769 | self.assertEqual(double_metaphone('Hackburne', 4), ('HKPR', '')) |
||
2770 | self.assertEqual(double_metaphone('Haddon?', 4), ('HTN', '')) |
||
2771 | self.assertEqual(double_metaphone('Haines', 4), ('HNS', '')) |
||
2772 | self.assertEqual(double_metaphone('Hale', 4), ('HL', '')) |
||
2773 | self.assertEqual(double_metaphone('Hall', 4), ('HL', '')) |
||
2774 | self.assertEqual(double_metaphone('Hallet', 4), ('HLT', '')) |
||
2775 | self.assertEqual(double_metaphone('Hallock', 4), ('HLK', '')) |
||
2776 | self.assertEqual(double_metaphone('Halstead', 4), ('HLST', '')) |
||
2777 | self.assertEqual(double_metaphone('Hammond', 4), ('HMNT', '')) |
||
2778 | self.assertEqual(double_metaphone('Hance', 4), ('HNS', '')) |
||
2779 | self.assertEqual(double_metaphone('Handy', 4), ('HNT', '')) |
||
2780 | self.assertEqual(double_metaphone('Hanson', 4), ('HNSN', '')) |
||
2781 | self.assertEqual(double_metaphone('Harasek', 4), ('HRSK', '')) |
||
2782 | self.assertEqual(double_metaphone('Harcourt', 4), ('HRKR', '')) |
||
2783 | self.assertEqual(double_metaphone('Hardy', 4), ('HRT', '')) |
||
2784 | self.assertEqual(double_metaphone('Harlock', 4), ('HRLK', '')) |
||
2785 | self.assertEqual(double_metaphone('Harris', 4), ('HRS', '')) |
||
2786 | self.assertEqual(double_metaphone('Hartley', 4), ('HRTL', '')) |
||
2787 | self.assertEqual(double_metaphone('Harvey', 4), ('HRF', '')) |
||
2788 | self.assertEqual(double_metaphone('Harvie', 4), ('HRF', '')) |
||
2789 | self.assertEqual(double_metaphone('Harwood', 4), ('HRT', '')) |
||
2790 | self.assertEqual(double_metaphone('Hathaway', 4), ('H0', 'HT')) |
||
2791 | self.assertEqual(double_metaphone('Haukeness', 4), ('HKNS', '')) |
||
2792 | self.assertEqual(double_metaphone('Hawkes', 4), ('HKS', '')) |
||
2793 | self.assertEqual(double_metaphone('Hawkhurst', 4), ('HKRS', '')) |
||
2794 | self.assertEqual(double_metaphone('Hawkins', 4), ('HKNS', '')) |
||
2795 | self.assertEqual(double_metaphone('Hawley', 4), ('HL', '')) |
||
2796 | self.assertEqual(double_metaphone('Heald', 4), ('HLT', '')) |
||
2797 | self.assertEqual(double_metaphone('Helsdon', 4), ('HLST', '')) |
||
2798 | self.assertEqual(double_metaphone('Hemenway', 4), ('HMN', '')) |
||
2799 | self.assertEqual(double_metaphone('Hemmenway', 4), ('HMN', '')) |
||
2800 | self.assertEqual(double_metaphone('Henck', 4), ('HNK', '')) |
||
2801 | self.assertEqual(double_metaphone('Henderson', 4), ('HNTR', '')) |
||
2802 | self.assertEqual(double_metaphone('Hendricks', 4), ('HNTR', '')) |
||
2803 | self.assertEqual(double_metaphone('Hersey', 4), ('HRS', '')) |
||
2804 | self.assertEqual(double_metaphone('Hewes', 4), ('HS', '')) |
||
2805 | self.assertEqual(double_metaphone('Heyman', 4), ('HMN', '')) |
||
2806 | self.assertEqual(double_metaphone('Hicks', 4), ('HKS', '')) |
||
2807 | self.assertEqual(double_metaphone('Hidden', 4), ('HTN', '')) |
||
2808 | self.assertEqual(double_metaphone('Higgs', 4), ('HKS', '')) |
||
2809 | self.assertEqual(double_metaphone('Hill', 4), ('HL', '')) |
||
2810 | self.assertEqual(double_metaphone('Hills', 4), ('HLS', '')) |
||
2811 | self.assertEqual(double_metaphone('Hinckley', 4), ('HNKL', '')) |
||
2812 | self.assertEqual(double_metaphone('Hipwell', 4), ('HPL', '')) |
||
2813 | self.assertEqual(double_metaphone('Hobart', 4), ('HPRT', '')) |
||
2814 | self.assertEqual(double_metaphone('Hoben', 4), ('HPN', '')) |
||
2815 | self.assertEqual(double_metaphone('Hoffmann', 4), ('HFMN', '')) |
||
2816 | self.assertEqual(double_metaphone('Hogan', 4), ('HKN', '')) |
||
2817 | self.assertEqual(double_metaphone('Holmes', 4), ('HLMS', '')) |
||
2818 | self.assertEqual(double_metaphone('Hoo', 4), ('H', '')) |
||
2819 | self.assertEqual(double_metaphone('Hooker', 4), ('HKR', '')) |
||
2820 | self.assertEqual(double_metaphone('Hopcott', 4), ('HPKT', '')) |
||
2821 | self.assertEqual(double_metaphone('Hopkins', 4), ('HPKN', '')) |
||
2822 | self.assertEqual(double_metaphone('Hopkinson', 4), ('HPKN', '')) |
||
2823 | self.assertEqual(double_metaphone('Hornsey', 4), ('HRNS', '')) |
||
2824 | self.assertEqual(double_metaphone('Houckgeest', 4), ('HKJS', 'HKKS')) |
||
2825 | self.assertEqual(double_metaphone('Hough', 4), ('H', '')) |
||
2826 | self.assertEqual(double_metaphone('Houstin', 4), ('HSTN', '')) |
||
2827 | self.assertEqual(double_metaphone('How', 4), ('H', 'HF')) |
||
2828 | self.assertEqual(double_metaphone('Howe', 4), ('H', '')) |
||
2829 | self.assertEqual(double_metaphone('Howland', 4), ('HLNT', '')) |
||
2830 | self.assertEqual(double_metaphone('Hubner', 4), ('HPNR', '')) |
||
2831 | self.assertEqual(double_metaphone('Hudnut', 4), ('HTNT', '')) |
||
2832 | self.assertEqual(double_metaphone('Hughes', 4), ('HS', '')) |
||
2833 | self.assertEqual(double_metaphone('Hull', 4), ('HL', '')) |
||
2834 | self.assertEqual(double_metaphone('Hulme', 4), ('HLM', '')) |
||
2835 | self.assertEqual(double_metaphone('Hume', 4), ('HM', '')) |
||
2836 | self.assertEqual(double_metaphone('Hundertumark', 4), ('HNTR', '')) |
||
2837 | self.assertEqual(double_metaphone('Hundley', 4), ('HNTL', '')) |
||
2838 | self.assertEqual(double_metaphone('Hungerford', 4), ('HNKR', 'HNJR')) |
||
2839 | self.assertEqual(double_metaphone('Hunt', 4), ('HNT', '')) |
||
2840 | self.assertEqual(double_metaphone('Hurst', 4), ('HRST', '')) |
||
2841 | self.assertEqual(double_metaphone('Husbands', 4), ('HSPN', '')) |
||
2842 | self.assertEqual(double_metaphone('Hussey', 4), ('HS', '')) |
||
2843 | self.assertEqual(double_metaphone('Husted', 4), ('HSTT', '')) |
||
2844 | self.assertEqual(double_metaphone('Hutchins', 4), ('HXNS', '')) |
||
2845 | self.assertEqual(double_metaphone('Hutchinson', 4), ('HXNS', '')) |
||
2846 | self.assertEqual(double_metaphone('Huttinger', 4), ('HTNK', 'HTNJ')) |
||
2847 | self.assertEqual(double_metaphone('Huybertsen', 4), ('HPRT', '')) |
||
2848 | self.assertEqual(double_metaphone('Iddenden', 4), ('ATNT', '')) |
||
2849 | self.assertEqual(double_metaphone('Ingraham', 4), ('ANKR', '')) |
||
2850 | self.assertEqual(double_metaphone('Ives', 4), ('AFS', '')) |
||
2851 | self.assertEqual(double_metaphone('Jackson', 4), ('JKSN', 'AKSN')) |
||
2852 | self.assertEqual(double_metaphone('Jacob', 4), ('JKP', 'AKP')) |
||
2853 | self.assertEqual(double_metaphone('Jans', 4), ('JNS', 'ANS')) |
||
2854 | self.assertEqual(double_metaphone('Jenkins', 4), ('JNKN', 'ANKN')) |
||
2855 | self.assertEqual(double_metaphone('Jewett', 4), ('JT', 'AT')) |
||
2856 | self.assertEqual(double_metaphone('Jewitt', 4), ('JT', 'AT')) |
||
2857 | self.assertEqual(double_metaphone('Johnson', 4), ('JNSN', 'ANSN')) |
||
2858 | self.assertEqual(double_metaphone('Jones', 4), ('JNS', 'ANS')) |
||
2859 | self.assertEqual(double_metaphone('Josephine', 4), ('JSFN', 'HSFN')) |
||
2860 | self.assertEqual(double_metaphone('Judd', 4), ('JT', 'AT')) |
||
2861 | self.assertEqual(double_metaphone('June', 4), ('JN', 'AN')) |
||
2862 | self.assertEqual(double_metaphone('Kamarowska', 4), ('KMRS', '')) |
||
2863 | self.assertEqual(double_metaphone('Kay', 4), ('K', '')) |
||
2864 | self.assertEqual(double_metaphone('Kelley', 4), ('KL', '')) |
||
2865 | self.assertEqual(double_metaphone('Kelly', 4), ('KL', '')) |
||
2866 | self.assertEqual(double_metaphone('Keymber', 4), ('KMPR', '')) |
||
2867 | self.assertEqual(double_metaphone('Keynes', 4), ('KNS', '')) |
||
2868 | self.assertEqual(double_metaphone('Kilham', 4), ('KLM', '')) |
||
2869 | self.assertEqual(double_metaphone('Kim', 4), ('KM', '')) |
||
2870 | self.assertEqual(double_metaphone('Kimball', 4), ('KMPL', '')) |
||
2871 | self.assertEqual(double_metaphone('King', 4), ('KNK', '')) |
||
2872 | self.assertEqual(double_metaphone('Kinsey', 4), ('KNS', '')) |
||
2873 | self.assertEqual(double_metaphone('Kirk', 4), ('KRK', '')) |
||
2874 | self.assertEqual(double_metaphone('Kirton', 4), ('KRTN', '')) |
||
2875 | self.assertEqual(double_metaphone('Kistler', 4), ('KSTL', '')) |
||
2876 | self.assertEqual(double_metaphone('Kitchen', 4), ('KXN', '')) |
||
2877 | self.assertEqual(double_metaphone('Kitson', 4), ('KTSN', '')) |
||
2878 | self.assertEqual(double_metaphone('Klett', 4), ('KLT', '')) |
||
2879 | self.assertEqual(double_metaphone('Kline', 4), ('KLN', '')) |
||
2880 | self.assertEqual(double_metaphone('Knapp', 4), ('NP', '')) |
||
2881 | self.assertEqual(double_metaphone('Knight', 4), ('NT', '')) |
||
2882 | self.assertEqual(double_metaphone('Knote', 4), ('NT', '')) |
||
2883 | self.assertEqual(double_metaphone('Knott', 4), ('NT', '')) |
||
2884 | self.assertEqual(double_metaphone('Knox', 4), ('NKS', '')) |
||
2885 | self.assertEqual(double_metaphone('Koeller', 4), ('KLR', '')) |
||
2886 | self.assertEqual(double_metaphone('La Pointe', 4), ('LPNT', '')) |
||
2887 | self.assertEqual(double_metaphone('LaPlante', 4), ('LPLN', '')) |
||
2888 | self.assertEqual(double_metaphone('Laimbeer', 4), ('LMPR', '')) |
||
2889 | self.assertEqual(double_metaphone('Lamb', 4), ('LMP', '')) |
||
2890 | self.assertEqual(double_metaphone('Lambertson', 4), ('LMPR', '')) |
||
2891 | self.assertEqual(double_metaphone('Lancto', 4), ('LNKT', '')) |
||
2892 | self.assertEqual(double_metaphone('Landry', 4), ('LNTR', '')) |
||
2893 | self.assertEqual(double_metaphone('Lane', 4), ('LN', '')) |
||
2894 | self.assertEqual(double_metaphone('Langendyck', 4), ('LNJN', 'LNKN')) |
||
2895 | self.assertEqual(double_metaphone('Langer', 4), ('LNKR', 'LNJR')) |
||
2896 | self.assertEqual(double_metaphone('Langford', 4), ('LNKF', '')) |
||
2897 | self.assertEqual(double_metaphone('Lantersee', 4), ('LNTR', '')) |
||
2898 | self.assertEqual(double_metaphone('Laquer', 4), ('LKR', '')) |
||
2899 | self.assertEqual(double_metaphone('Larkin', 4), ('LRKN', '')) |
||
2900 | self.assertEqual(double_metaphone('Latham', 4), ('LTM', '')) |
||
2901 | self.assertEqual(double_metaphone('Lathrop', 4), ('L0RP', 'LTRP')) |
||
2902 | self.assertEqual(double_metaphone('Lauter', 4), ('LTR', '')) |
||
2903 | self.assertEqual(double_metaphone('Lawrence', 4), ('LRNS', '')) |
||
2904 | self.assertEqual(double_metaphone('Leach', 4), ('LK', '')) |
||
2905 | self.assertEqual(double_metaphone('Leager', 4), ('LKR', 'LJR')) |
||
2906 | self.assertEqual(double_metaphone('Learned', 4), ('LRNT', '')) |
||
2907 | self.assertEqual(double_metaphone('Leavitt', 4), ('LFT', '')) |
||
2908 | self.assertEqual(double_metaphone('Lee', 4), ('L', '')) |
||
2909 | self.assertEqual(double_metaphone('Leete', 4), ('LT', '')) |
||
2910 | self.assertEqual(double_metaphone('Leggett', 4), ('LKT', '')) |
||
2911 | self.assertEqual(double_metaphone('Leland', 4), ('LLNT', '')) |
||
2912 | self.assertEqual(double_metaphone('Leonard', 4), ('LNRT', '')) |
||
2913 | self.assertEqual(double_metaphone('Lester', 4), ('LSTR', '')) |
||
2914 | self.assertEqual(double_metaphone('Lestrange', 4), ('LSTR', '')) |
||
2915 | self.assertEqual(double_metaphone('Lethem', 4), ('L0M', 'LTM')) |
||
2916 | self.assertEqual(double_metaphone('Levine', 4), ('LFN', '')) |
||
2917 | self.assertEqual(double_metaphone('Lewes', 4), ('LS', '')) |
||
2918 | self.assertEqual(double_metaphone('Lewis', 4), ('LS', '')) |
||
2919 | self.assertEqual(double_metaphone('Lincoln', 4), ('LNKL', '')) |
||
2920 | self.assertEqual(double_metaphone('Lindsey', 4), ('LNTS', '')) |
||
2921 | self.assertEqual(double_metaphone('Linher', 4), ('LNR', '')) |
||
2922 | self.assertEqual(double_metaphone('Lippet', 4), ('LPT', '')) |
||
2923 | self.assertEqual(double_metaphone('Lippincott', 4), ('LPNK', '')) |
||
2924 | self.assertEqual(double_metaphone('Lockwood', 4), ('LKT', '')) |
||
2925 | self.assertEqual(double_metaphone('Loines', 4), ('LNS', '')) |
||
2926 | self.assertEqual(double_metaphone('Lombard', 4), ('LMPR', '')) |
||
2927 | self.assertEqual(double_metaphone('Long', 4), ('LNK', '')) |
||
2928 | self.assertEqual(double_metaphone('Longespee', 4), ('LNJS', 'LNKS')) |
||
2929 | self.assertEqual(double_metaphone('Look', 4), ('LK', '')) |
||
2930 | self.assertEqual(double_metaphone('Lounsberry', 4), ('LNSP', '')) |
||
2931 | self.assertEqual(double_metaphone('Lounsbury', 4), ('LNSP', '')) |
||
2932 | self.assertEqual(double_metaphone('Louthe', 4), ('L0', 'LT')) |
||
2933 | self.assertEqual(double_metaphone('Loveyne', 4), ('LFN', '')) |
||
2934 | self.assertEqual(double_metaphone('Lowe', 4), ('L', '')) |
||
2935 | self.assertEqual(double_metaphone('Ludlam', 4), ('LTLM', '')) |
||
2936 | self.assertEqual(double_metaphone('Lumbard', 4), ('LMPR', '')) |
||
2937 | self.assertEqual(double_metaphone('Lund', 4), ('LNT', '')) |
||
2938 | self.assertEqual(double_metaphone('Luno', 4), ('LN', '')) |
||
2939 | self.assertEqual(double_metaphone('Lutz', 4), ('LTS', '')) |
||
2940 | self.assertEqual(double_metaphone('Lydia', 4), ('LT', '')) |
||
2941 | self.assertEqual(double_metaphone('Lynne', 4), ('LN', '')) |
||
2942 | self.assertEqual(double_metaphone('Lyon', 4), ('LN', '')) |
||
2943 | self.assertEqual(double_metaphone('MacAlpin', 4), ('MKLP', '')) |
||
2944 | self.assertEqual(double_metaphone('MacBricc', 4), ('MKPR', '')) |
||
2945 | self.assertEqual(double_metaphone('MacCrinan', 4), ('MKRN', '')) |
||
2946 | self.assertEqual(double_metaphone('MacKenneth', 4), ('MKN0', 'MKNT')) |
||
2947 | self.assertEqual(double_metaphone('MacMael nam Bo', 4), ('MKML', '')) |
||
2948 | self.assertEqual(double_metaphone('MacMurchada', 4), ('MKMR', '')) |
||
2949 | self.assertEqual(double_metaphone('Macomber', 4), ('MKMP', '')) |
||
2950 | self.assertEqual(double_metaphone('Macy', 4), ('MS', '')) |
||
2951 | self.assertEqual(double_metaphone('Magnus', 4), ('MNS', 'MKNS')) |
||
2952 | self.assertEqual(double_metaphone('Mahien', 4), ('MHN', '')) |
||
2953 | self.assertEqual(double_metaphone('Malmains', 4), ('MLMN', '')) |
||
2954 | self.assertEqual(double_metaphone('Malory', 4), ('MLR', '')) |
||
2955 | self.assertEqual(double_metaphone('Mancinelli', 4), ('MNSN', '')) |
||
2956 | self.assertEqual(double_metaphone('Mancini', 4), ('MNSN', '')) |
||
2957 | self.assertEqual(double_metaphone('Mann', 4), ('MN', '')) |
||
2958 | self.assertEqual(double_metaphone('Manning', 4), ('MNNK', '')) |
||
2959 | self.assertEqual(double_metaphone('Manter', 4), ('MNTR', '')) |
||
2960 | self.assertEqual(double_metaphone('Marion', 4), ('MRN', '')) |
||
2961 | self.assertEqual(double_metaphone('Marley', 4), ('MRL', '')) |
||
2962 | self.assertEqual(double_metaphone('Marmion', 4), ('MRMN', '')) |
||
2963 | self.assertEqual(double_metaphone('Marquart', 4), ('MRKR', '')) |
||
2964 | self.assertEqual(double_metaphone('Marsh', 4), ('MRX', '')) |
||
2965 | self.assertEqual(double_metaphone('Marshal', 4), ('MRXL', '')) |
||
2966 | self.assertEqual(double_metaphone('Marshall', 4), ('MRXL', '')) |
||
2967 | self.assertEqual(double_metaphone('Martel', 4), ('MRTL', '')) |
||
2968 | self.assertEqual(double_metaphone('Martha', 4), ('MR0', 'MRT')) |
||
2969 | self.assertEqual(double_metaphone('Martin', 4), ('MRTN', '')) |
||
2970 | self.assertEqual(double_metaphone('Marturano', 4), ('MRTR', '')) |
||
2971 | self.assertEqual(double_metaphone('Marvin', 4), ('MRFN', '')) |
||
2972 | self.assertEqual(double_metaphone('Mary', 4), ('MR', '')) |
||
2973 | self.assertEqual(double_metaphone('Mason', 4), ('MSN', '')) |
||
2974 | self.assertEqual(double_metaphone('Maxwell', 4), ('MKSL', '')) |
||
2975 | self.assertEqual(double_metaphone('Mayhew', 4), ('MH', 'MHF')) |
||
2976 | self.assertEqual(double_metaphone('McAllaster', 4), ('MKLS', '')) |
||
2977 | self.assertEqual(double_metaphone('McAllister', 4), ('MKLS', '')) |
||
2978 | self.assertEqual(double_metaphone('McConnell', 4), ('MKNL', '')) |
||
2979 | self.assertEqual(double_metaphone('McFarland', 4), ('MKFR', '')) |
||
2980 | self.assertEqual(double_metaphone('McIlroy', 4), ('MSLR', '')) |
||
2981 | self.assertEqual(double_metaphone('McNair', 4), ('MKNR', '')) |
||
2982 | self.assertEqual(double_metaphone('McNair-Landry', 4), ('MKNR', '')) |
||
2983 | self.assertEqual(double_metaphone('McRaven', 4), ('MKRF', '')) |
||
2984 | self.assertEqual(double_metaphone('Mead', 4), ('MT', '')) |
||
2985 | self.assertEqual(double_metaphone('Meade', 4), ('MT', '')) |
||
2986 | self.assertEqual(double_metaphone('Meck', 4), ('MK', '')) |
||
2987 | self.assertEqual(double_metaphone('Melton', 4), ('MLTN', '')) |
||
2988 | self.assertEqual(double_metaphone('Mendenhall', 4), ('MNTN', '')) |
||
2989 | self.assertEqual(double_metaphone('Mering', 4), ('MRNK', '')) |
||
2990 | self.assertEqual(double_metaphone('Merrick', 4), ('MRK', '')) |
||
2991 | self.assertEqual(double_metaphone('Merry', 4), ('MR', '')) |
||
2992 | self.assertEqual(double_metaphone('Mighill', 4), ('ML', '')) |
||
2993 | self.assertEqual(double_metaphone('Miller', 4), ('MLR', '')) |
||
2994 | self.assertEqual(double_metaphone('Milton', 4), ('MLTN', '')) |
||
2995 | self.assertEqual(double_metaphone('Mohun', 4), ('MHN', '')) |
||
2996 | self.assertEqual(double_metaphone('Montague', 4), ('MNTK', '')) |
||
2997 | self.assertEqual(double_metaphone('Montboucher', 4), ('MNTP', '')) |
||
2998 | self.assertEqual(double_metaphone('Moore', 4), ('MR', '')) |
||
2999 | self.assertEqual(double_metaphone('Morrel', 4), ('MRL', '')) |
||
3000 | self.assertEqual(double_metaphone('Morrill', 4), ('MRL', '')) |
||
3001 | self.assertEqual(double_metaphone('Morris', 4), ('MRS', '')) |
||
3002 | self.assertEqual(double_metaphone('Morton', 4), ('MRTN', '')) |
||
3003 | self.assertEqual(double_metaphone('Moton', 4), ('MTN', '')) |
||
3004 | self.assertEqual(double_metaphone('Muir', 4), ('MR', '')) |
||
3005 | self.assertEqual(double_metaphone('Mulferd', 4), ('MLFR', '')) |
||
3006 | self.assertEqual(double_metaphone('Mullins', 4), ('MLNS', '')) |
||
3007 | self.assertEqual(double_metaphone('Mulso', 4), ('MLS', '')) |
||
3008 | self.assertEqual(double_metaphone('Munger', 4), ('MNKR', 'MNJR')) |
||
3009 | self.assertEqual(double_metaphone('Munt', 4), ('MNT', '')) |
||
3010 | self.assertEqual(double_metaphone('Murchad', 4), ('MRXT', 'MRKT')) |
||
3011 | self.assertEqual(double_metaphone('Murdock', 4), ('MRTK', '')) |
||
3012 | self.assertEqual(double_metaphone('Murray', 4), ('MR', '')) |
||
3013 | self.assertEqual(double_metaphone('Muskett', 4), ('MSKT', '')) |
||
3014 | self.assertEqual(double_metaphone('Myers', 4), ('MRS', '')) |
||
3015 | self.assertEqual(double_metaphone('Myrick', 4), ('MRK', '')) |
||
3016 | self.assertEqual(double_metaphone('NORRIS', 4), ('NRS', '')) |
||
3017 | self.assertEqual(double_metaphone('Nayle', 4), ('NL', '')) |
||
3018 | self.assertEqual(double_metaphone('Newcomb', 4), ('NKMP', '')) |
||
3019 | self.assertEqual(double_metaphone('Newcomb(e)', 4), ('NKMP', '')) |
||
3020 | self.assertEqual(double_metaphone('Newkirk', 4), ('NKRK', '')) |
||
3021 | self.assertEqual(double_metaphone('Newton', 4), ('NTN', '')) |
||
3022 | self.assertEqual(double_metaphone('Niles', 4), ('NLS', '')) |
||
3023 | self.assertEqual(double_metaphone('Noble', 4), ('NPL', '')) |
||
3024 | self.assertEqual(double_metaphone('Noel', 4), ('NL', '')) |
||
3025 | self.assertEqual(double_metaphone('Northend', 4), ('NR0N', 'NRTN')) |
||
3026 | self.assertEqual(double_metaphone('Norton', 4), ('NRTN', '')) |
||
3027 | self.assertEqual(double_metaphone('Nutter', 4), ('NTR', '')) |
||
3028 | self.assertEqual(double_metaphone('Odding', 4), ('ATNK', '')) |
||
3029 | self.assertEqual(double_metaphone('Odenbaugh', 4), ('ATNP', '')) |
||
3030 | self.assertEqual(double_metaphone('Ogborn', 4), ('AKPR', '')) |
||
3031 | self.assertEqual(double_metaphone('Oppenheimer', 4), ('APNM', '')) |
||
3032 | self.assertEqual(double_metaphone('Otis', 4), ('ATS', '')) |
||
3033 | self.assertEqual(double_metaphone('Oviatt', 4), ('AFT', '')) |
||
3034 | self.assertEqual(double_metaphone('PRUST?', 4), ('PRST', '')) |
||
3035 | self.assertEqual(double_metaphone('Paddock', 4), ('PTK', '')) |
||
3036 | self.assertEqual(double_metaphone('Page', 4), ('PJ', 'PK')) |
||
3037 | self.assertEqual(double_metaphone('Paine', 4), ('PN', '')) |
||
3038 | self.assertEqual(double_metaphone('Paist', 4), ('PST', '')) |
||
3039 | self.assertEqual(double_metaphone('Palmer', 4), ('PLMR', '')) |
||
3040 | self.assertEqual(double_metaphone('Park', 4), ('PRK', '')) |
||
3041 | self.assertEqual(double_metaphone('Parker', 4), ('PRKR', '')) |
||
3042 | self.assertEqual(double_metaphone('Parkhurst', 4), ('PRKR', '')) |
||
3043 | self.assertEqual(double_metaphone('Parrat', 4), ('PRT', '')) |
||
3044 | self.assertEqual(double_metaphone('Parsons', 4), ('PRSN', '')) |
||
3045 | self.assertEqual(double_metaphone('Partridge', 4), ('PRTR', '')) |
||
3046 | self.assertEqual(double_metaphone('Pashley', 4), ('PXL', '')) |
||
3047 | self.assertEqual(double_metaphone('Pasley', 4), ('PSL', '')) |
||
3048 | self.assertEqual(double_metaphone('Patrick', 4), ('PTRK', '')) |
||
3049 | self.assertEqual(double_metaphone('Pattee', 4), ('PT', '')) |
||
3050 | self.assertEqual(double_metaphone('Patten', 4), ('PTN', '')) |
||
3051 | self.assertEqual(double_metaphone('Pawley', 4), ('PL', '')) |
||
3052 | self.assertEqual(double_metaphone('Payne', 4), ('PN', '')) |
||
3053 | self.assertEqual(double_metaphone('Peabody', 4), ('PPT', '')) |
||
3054 | self.assertEqual(double_metaphone('Peake', 4), ('PK', '')) |
||
3055 | self.assertEqual(double_metaphone('Pearson', 4), ('PRSN', '')) |
||
3056 | self.assertEqual(double_metaphone('Peat', 4), ('PT', '')) |
||
3057 | self.assertEqual(double_metaphone('Pedersen', 4), ('PTRS', '')) |
||
3058 | self.assertEqual(double_metaphone('Percy', 4), ('PRS', '')) |
||
3059 | self.assertEqual(double_metaphone('Perkins', 4), ('PRKN', '')) |
||
3060 | self.assertEqual(double_metaphone('Perrine', 4), ('PRN', '')) |
||
3061 | self.assertEqual(double_metaphone('Perry', 4), ('PR', '')) |
||
3062 | self.assertEqual(double_metaphone('Peson', 4), ('PSN', '')) |
||
3063 | self.assertEqual(double_metaphone('Peterson', 4), ('PTRS', '')) |
||
3064 | self.assertEqual(double_metaphone('Peyton', 4), ('PTN', '')) |
||
3065 | self.assertEqual(double_metaphone('Phinney', 4), ('FN', '')) |
||
3066 | self.assertEqual(double_metaphone('Pickard', 4), ('PKRT', '')) |
||
3067 | self.assertEqual(double_metaphone('Pierce', 4), ('PRS', '')) |
||
3068 | self.assertEqual(double_metaphone('Pierrepont', 4), ('PRPN', '')) |
||
3069 | self.assertEqual(double_metaphone('Pike', 4), ('PK', '')) |
||
3070 | self.assertEqual(double_metaphone('Pinkham', 4), ('PNKM', '')) |
||
3071 | self.assertEqual(double_metaphone('Pitman', 4), ('PTMN', '')) |
||
3072 | self.assertEqual(double_metaphone('Pitt', 4), ('PT', '')) |
||
3073 | self.assertEqual(double_metaphone('Pitts', 4), ('PTS', '')) |
||
3074 | self.assertEqual(double_metaphone('Plantagenet', 4), ('PLNT', '')) |
||
3075 | self.assertEqual(double_metaphone('Platt', 4), ('PLT', '')) |
||
3076 | self.assertEqual(double_metaphone('Platts', 4), ('PLTS', '')) |
||
3077 | self.assertEqual(double_metaphone('Pleis', 4), ('PLS', '')) |
||
3078 | self.assertEqual(double_metaphone('Pleiss', 4), ('PLS', '')) |
||
3079 | self.assertEqual(double_metaphone('Plisko', 4), ('PLSK', '')) |
||
3080 | self.assertEqual(double_metaphone('Pliskovitch', 4), ('PLSK', '')) |
||
3081 | self.assertEqual(double_metaphone('Plum', 4), ('PLM', '')) |
||
3082 | self.assertEqual(double_metaphone('Plume', 4), ('PLM', '')) |
||
3083 | self.assertEqual(double_metaphone('Poitou', 4), ('PT', '')) |
||
3084 | self.assertEqual(double_metaphone('Pomeroy', 4), ('PMR', '')) |
||
3085 | self.assertEqual(double_metaphone('Poretiers', 4), ('PRTR', '')) |
||
3086 | self.assertEqual(double_metaphone('Pote', 4), ('PT', '')) |
||
3087 | self.assertEqual(double_metaphone('Potter', 4), ('PTR', '')) |
||
3088 | self.assertEqual(double_metaphone('Potts', 4), ('PTS', '')) |
||
3089 | self.assertEqual(double_metaphone('Powell', 4), ('PL', '')) |
||
3090 | self.assertEqual(double_metaphone('Pratt', 4), ('PRT', '')) |
||
3091 | self.assertEqual(double_metaphone('Presbury', 4), ('PRSP', '')) |
||
3092 | self.assertEqual(double_metaphone('Priest', 4), ('PRST', '')) |
||
3093 | self.assertEqual(double_metaphone('Prindle', 4), ('PRNT', '')) |
||
3094 | self.assertEqual(double_metaphone('Prior', 4), ('PRR', '')) |
||
3095 | self.assertEqual(double_metaphone('Profumo', 4), ('PRFM', '')) |
||
3096 | self.assertEqual(double_metaphone('Purdy', 4), ('PRT', '')) |
||
3097 | self.assertEqual(double_metaphone('Purefoy', 4), ('PRF', '')) |
||
3098 | self.assertEqual(double_metaphone('Pury', 4), ('PR', '')) |
||
3099 | self.assertEqual(double_metaphone('Quinter', 4), ('KNTR', '')) |
||
3100 | self.assertEqual(double_metaphone('Rachel', 4), ('RXL', 'RKL')) |
||
3101 | self.assertEqual(double_metaphone('Rand', 4), ('RNT', '')) |
||
3102 | self.assertEqual(double_metaphone('Rankin', 4), ('RNKN', '')) |
||
3103 | self.assertEqual(double_metaphone('Ravenscroft', 4), ('RFNS', '')) |
||
3104 | self.assertEqual(double_metaphone('Raynsford', 4), ('RNSF', '')) |
||
3105 | self.assertEqual(double_metaphone('Reakirt', 4), ('RKRT', '')) |
||
3106 | self.assertEqual(double_metaphone('Reaves', 4), ('RFS', '')) |
||
3107 | self.assertEqual(double_metaphone('Reeves', 4), ('RFS', '')) |
||
3108 | self.assertEqual(double_metaphone('Reichert', 4), ('RXRT', 'RKRT')) |
||
3109 | self.assertEqual(double_metaphone('Remmele', 4), ('RML', '')) |
||
3110 | self.assertEqual(double_metaphone('Reynolds', 4), ('RNLT', '')) |
||
3111 | self.assertEqual(double_metaphone('Rhodes', 4), ('RTS', '')) |
||
3112 | self.assertEqual(double_metaphone('Richards', 4), ('RXRT', 'RKRT')) |
||
3113 | self.assertEqual(double_metaphone('Richardson', 4), ('RXRT', 'RKRT')) |
||
3114 | self.assertEqual(double_metaphone('Ring', 4), ('RNK', '')) |
||
3115 | self.assertEqual(double_metaphone('Roberts', 4), ('RPRT', '')) |
||
3116 | self.assertEqual(double_metaphone('Robertson', 4), ('RPRT', '')) |
||
3117 | self.assertEqual(double_metaphone('Robson', 4), ('RPSN', '')) |
||
3118 | self.assertEqual(double_metaphone('Rodie', 4), ('RT', '')) |
||
3119 | self.assertEqual(double_metaphone('Rody', 4), ('RT', '')) |
||
3120 | self.assertEqual(double_metaphone('Rogers', 4), ('RKRS', 'RJRS')) |
||
3121 | self.assertEqual(double_metaphone('Ross', 4), ('RS', '')) |
||
3122 | self.assertEqual(double_metaphone('Rosslevin', 4), ('RSLF', '')) |
||
3123 | self.assertEqual(double_metaphone('Rowland', 4), ('RLNT', '')) |
||
3124 | self.assertEqual(double_metaphone('Ruehl', 4), ('RL', '')) |
||
3125 | self.assertEqual(double_metaphone('Russell', 4), ('RSL', '')) |
||
3126 | self.assertEqual(double_metaphone('Ruth', 4), ('R0', 'RT')) |
||
3127 | self.assertEqual(double_metaphone('Ryan', 4), ('RN', '')) |
||
3128 | self.assertEqual(double_metaphone('Rysse', 4), ('RS', '')) |
||
3129 | self.assertEqual(double_metaphone('Sadler', 4), ('STLR', '')) |
||
3130 | self.assertEqual(double_metaphone('Salmon', 4), ('SLMN', '')) |
||
3131 | self.assertEqual(double_metaphone('Salter', 4), ('SLTR', '')) |
||
3132 | self.assertEqual(double_metaphone('Salvatore', 4), ('SLFT', '')) |
||
3133 | self.assertEqual(double_metaphone('Sanders', 4), ('SNTR', '')) |
||
3134 | self.assertEqual(double_metaphone('Sands', 4), ('SNTS', '')) |
||
3135 | self.assertEqual(double_metaphone('Sanford', 4), ('SNFR', '')) |
||
3136 | self.assertEqual(double_metaphone('Sanger', 4), ('SNKR', 'SNJR')) |
||
3137 | self.assertEqual(double_metaphone('Sargent', 4), ('SRJN', 'SRKN')) |
||
3138 | self.assertEqual(double_metaphone('Saunders', 4), ('SNTR', '')) |
||
3139 | self.assertEqual(double_metaphone('Schilling', 4), ('XLNK', '')) |
||
3140 | self.assertEqual(double_metaphone('Schlegel', 4), ('XLKL', 'SLKL')) |
||
3141 | self.assertEqual(double_metaphone('Scott', 4), ('SKT', '')) |
||
3142 | self.assertEqual(double_metaphone('Sears', 4), ('SRS', '')) |
||
3143 | self.assertEqual(double_metaphone('Segersall', 4), ('SJRS', 'SKRS')) |
||
3144 | self.assertEqual(double_metaphone('Senecal', 4), ('SNKL', '')) |
||
3145 | self.assertEqual(double_metaphone('Sergeaux', 4), ('SRJ', 'SRK')) |
||
3146 | self.assertEqual(double_metaphone('Severance', 4), ('SFRN', '')) |
||
3147 | self.assertEqual(double_metaphone('Sharp', 4), ('XRP', '')) |
||
3148 | self.assertEqual(double_metaphone('Sharpe', 4), ('XRP', '')) |
||
3149 | self.assertEqual(double_metaphone('Sharply', 4), ('XRPL', '')) |
||
3150 | self.assertEqual(double_metaphone('Shatswell', 4), ('XTSL', '')) |
||
3151 | self.assertEqual(double_metaphone('Shattack', 4), ('XTK', '')) |
||
3152 | self.assertEqual(double_metaphone('Shattock', 4), ('XTK', '')) |
||
3153 | self.assertEqual(double_metaphone('Shattuck', 4), ('XTK', '')) |
||
3154 | self.assertEqual(double_metaphone('Shaw', 4), ('X', 'XF')) |
||
3155 | self.assertEqual(double_metaphone('Sheldon', 4), ('XLTN', '')) |
||
3156 | self.assertEqual(double_metaphone('Sherman', 4), ('XRMN', '')) |
||
3157 | self.assertEqual(double_metaphone('Shinn', 4), ('XN', '')) |
||
3158 | self.assertEqual(double_metaphone('Shirford', 4), ('XRFR', '')) |
||
3159 | self.assertEqual(double_metaphone('Shirley', 4), ('XRL', '')) |
||
3160 | self.assertEqual(double_metaphone('Shively', 4), ('XFL', '')) |
||
3161 | self.assertEqual(double_metaphone('Shoemaker', 4), ('XMKR', '')) |
||
3162 | self.assertEqual(double_metaphone('Short', 4), ('XRT', '')) |
||
3163 | self.assertEqual(double_metaphone('Shotwell', 4), ('XTL', '')) |
||
3164 | self.assertEqual(double_metaphone('Shute', 4), ('XT', '')) |
||
3165 | self.assertEqual(double_metaphone('Sibley', 4), ('SPL', '')) |
||
3166 | self.assertEqual(double_metaphone('Silver', 4), ('SLFR', '')) |
||
3167 | self.assertEqual(double_metaphone('Simes', 4), ('SMS', '')) |
||
3168 | self.assertEqual(double_metaphone('Sinken', 4), ('SNKN', '')) |
||
3169 | self.assertEqual(double_metaphone('Sinn', 4), ('SN', '')) |
||
3170 | self.assertEqual(double_metaphone('Skelton', 4), ('SKLT', '')) |
||
3171 | self.assertEqual(double_metaphone('Skiffe', 4), ('SKF', '')) |
||
3172 | self.assertEqual(double_metaphone('Skotkonung', 4), ('SKTK', '')) |
||
3173 | self.assertEqual(double_metaphone('Slade', 4), ('SLT', 'XLT')) |
||
3174 | self.assertEqual(double_metaphone('Slye', 4), ('SL', 'XL')) |
||
3175 | self.assertEqual(double_metaphone('Smedley', 4), ('SMTL', 'XMTL')) |
||
3176 | self.assertEqual(double_metaphone('Smith', 4), ('SM0', 'XMT')) |
||
3177 | self.assertEqual(double_metaphone('Snow', 4), ('SN', 'XNF')) |
||
3178 | self.assertEqual(double_metaphone('Soole', 4), ('SL', '')) |
||
3179 | self.assertEqual(double_metaphone('Soule', 4), ('SL', '')) |
||
3180 | self.assertEqual(double_metaphone('Southworth', 4), ('S0R0', 'STRT')) |
||
3181 | self.assertEqual(double_metaphone('Sowles', 4), ('SLS', '')) |
||
3182 | self.assertEqual(double_metaphone('Spalding', 4), ('SPLT', '')) |
||
3183 | self.assertEqual(double_metaphone('Spark', 4), ('SPRK', '')) |
||
3184 | self.assertEqual(double_metaphone('Spencer', 4), ('SPNS', '')) |
||
3185 | self.assertEqual(double_metaphone('Sperry', 4), ('SPR', '')) |
||
3186 | self.assertEqual(double_metaphone('Spofford', 4), ('SPFR', '')) |
||
3187 | self.assertEqual(double_metaphone('Spooner', 4), ('SPNR', '')) |
||
3188 | self.assertEqual(double_metaphone('Sprague', 4), ('SPRK', '')) |
||
3189 | self.assertEqual(double_metaphone('Springer', 4), ('SPRN', '')) |
||
3190 | self.assertEqual(double_metaphone('St. Clair', 4), ('STKL', '')) |
||
3191 | self.assertEqual(double_metaphone('St. Claire', 4), ('STKL', '')) |
||
3192 | self.assertEqual(double_metaphone('St. Leger', 4), ('STLJ', 'STLK')) |
||
3193 | self.assertEqual(double_metaphone('St. Omer', 4), ('STMR', '')) |
||
3194 | self.assertEqual(double_metaphone('Stafferton', 4), ('STFR', '')) |
||
3195 | self.assertEqual(double_metaphone('Stafford', 4), ('STFR', '')) |
||
3196 | self.assertEqual(double_metaphone('Stalham', 4), ('STLM', '')) |
||
3197 | self.assertEqual(double_metaphone('Stanford', 4), ('STNF', '')) |
||
3198 | self.assertEqual(double_metaphone('Stanton', 4), ('STNT', '')) |
||
3199 | self.assertEqual(double_metaphone('Star', 4), ('STR', '')) |
||
3200 | self.assertEqual(double_metaphone('Starbuck', 4), ('STRP', '')) |
||
3201 | self.assertEqual(double_metaphone('Starkey', 4), ('STRK', '')) |
||
3202 | self.assertEqual(double_metaphone('Starkweather', 4), ('STRK', '')) |
||
3203 | self.assertEqual(double_metaphone('Stearns', 4), ('STRN', '')) |
||
3204 | self.assertEqual(double_metaphone('Stebbins', 4), ('STPN', '')) |
||
3205 | self.assertEqual(double_metaphone('Steele', 4), ('STL', '')) |
||
3206 | self.assertEqual(double_metaphone('Stephenson', 4), ('STFN', '')) |
||
3207 | self.assertEqual(double_metaphone('Stevens', 4), ('STFN', '')) |
||
3208 | self.assertEqual(double_metaphone('Stoddard', 4), ('STTR', '')) |
||
3209 | self.assertEqual(double_metaphone('Stodder', 4), ('STTR', '')) |
||
3210 | self.assertEqual(double_metaphone('Stone', 4), ('STN', '')) |
||
3211 | self.assertEqual(double_metaphone('Storey', 4), ('STR', '')) |
||
3212 | self.assertEqual(double_metaphone('Storrada', 4), ('STRT', '')) |
||
3213 | self.assertEqual(double_metaphone('Story', 4), ('STR', '')) |
||
3214 | self.assertEqual(double_metaphone('Stoughton', 4), ('STFT', '')) |
||
3215 | self.assertEqual(double_metaphone('Stout', 4), ('STT', '')) |
||
3216 | self.assertEqual(double_metaphone('Stow', 4), ('ST', 'STF')) |
||
3217 | self.assertEqual(double_metaphone('Strong', 4), ('STRN', '')) |
||
3218 | self.assertEqual(double_metaphone('Strutt', 4), ('STRT', '')) |
||
3219 | self.assertEqual(double_metaphone('Stryker', 4), ('STRK', '')) |
||
3220 | self.assertEqual(double_metaphone('Stuckeley', 4), ('STKL', '')) |
||
3221 | self.assertEqual(double_metaphone('Sturges', 4), ('STRJ', 'STRK')) |
||
3222 | self.assertEqual(double_metaphone('Sturgess', 4), ('STRJ', 'STRK')) |
||
3223 | self.assertEqual(double_metaphone('Sturgis', 4), ('STRJ', 'STRK')) |
||
3224 | self.assertEqual(double_metaphone('Suevain', 4), ('SFN', '')) |
||
3225 | self.assertEqual(double_metaphone('Sulyard', 4), ('SLRT', '')) |
||
3226 | self.assertEqual(double_metaphone('Sutton', 4), ('STN', '')) |
||
3227 | self.assertEqual(double_metaphone('Swain', 4), ('SN', 'XN')) |
||
3228 | self.assertEqual(double_metaphone('Swayne', 4), ('SN', 'XN')) |
||
3229 | self.assertEqual(double_metaphone('Swayze', 4), ('SS', 'XTS')) |
||
3230 | self.assertEqual(double_metaphone('Swift', 4), ('SFT', 'XFT')) |
||
3231 | self.assertEqual(double_metaphone('Taber', 4), ('TPR', '')) |
||
3232 | self.assertEqual(double_metaphone('Talcott', 4), ('TLKT', '')) |
||
3233 | self.assertEqual(double_metaphone('Tarne', 4), ('TRN', '')) |
||
3234 | self.assertEqual(double_metaphone('Tatum', 4), ('TTM', '')) |
||
3235 | self.assertEqual(double_metaphone('Taverner', 4), ('TFRN', '')) |
||
3236 | self.assertEqual(double_metaphone('Taylor', 4), ('TLR', '')) |
||
3237 | self.assertEqual(double_metaphone('Tenney', 4), ('TN', '')) |
||
3238 | self.assertEqual(double_metaphone('Thayer', 4), ('0R', 'TR')) |
||
3239 | self.assertEqual(double_metaphone('Thember', 4), ('0MPR', 'TMPR')) |
||
3240 | self.assertEqual(double_metaphone('Thomas', 4), ('TMS', '')) |
||
3241 | self.assertEqual(double_metaphone('Thompson', 4), ('TMPS', '')) |
||
3242 | self.assertEqual(double_metaphone('Thorne', 4), ('0RN', 'TRN')) |
||
3243 | self.assertEqual(double_metaphone('Thornycraft', 4), ('0RNK', 'TRNK')) |
||
3244 | self.assertEqual(double_metaphone('Threlkeld', 4), ('0RLK', 'TRLK')) |
||
3245 | self.assertEqual(double_metaphone('Throckmorton', 4), ('0RKM', 'TRKM')) |
||
3246 | self.assertEqual(double_metaphone('Thwaits', 4), ('0TS', 'TTS')) |
||
3247 | self.assertEqual(double_metaphone('Tibbetts', 4), ('TPTS', '')) |
||
3248 | self.assertEqual(double_metaphone('Tidd', 4), ('TT', '')) |
||
3249 | self.assertEqual(double_metaphone('Tierney', 4), ('TRN', '')) |
||
3250 | self.assertEqual(double_metaphone('Tilley', 4), ('TL', '')) |
||
3251 | self.assertEqual(double_metaphone('Tillieres', 4), ('TLRS', '')) |
||
3252 | self.assertEqual(double_metaphone('Tilly', 4), ('TL', '')) |
||
3253 | self.assertEqual(double_metaphone('Tisdale', 4), ('TSTL', '')) |
||
3254 | self.assertEqual(double_metaphone('Titus', 4), ('TTS', '')) |
||
3255 | self.assertEqual(double_metaphone('Tobey', 4), ('TP', '')) |
||
3256 | self.assertEqual(double_metaphone('Tooker', 4), ('TKR', '')) |
||
3257 | self.assertEqual(double_metaphone('Towle', 4), ('TL', '')) |
||
3258 | self.assertEqual(double_metaphone('Towne', 4), ('TN', '')) |
||
3259 | self.assertEqual(double_metaphone('Townsend', 4), ('TNSN', '')) |
||
3260 | self.assertEqual(double_metaphone('Treadway', 4), ('TRT', '')) |
||
3261 | self.assertEqual(double_metaphone('Trelawney', 4), ('TRLN', '')) |
||
3262 | self.assertEqual(double_metaphone('Trinder', 4), ('TRNT', '')) |
||
3263 | self.assertEqual(double_metaphone('Tripp', 4), ('TRP', '')) |
||
3264 | self.assertEqual(double_metaphone('Trippe', 4), ('TRP', '')) |
||
3265 | self.assertEqual(double_metaphone('Trott', 4), ('TRT', '')) |
||
3266 | self.assertEqual(double_metaphone('True', 4), ('TR', '')) |
||
3267 | self.assertEqual(double_metaphone('Trussebut', 4), ('TRSP', '')) |
||
3268 | self.assertEqual(double_metaphone('Tucker', 4), ('TKR', '')) |
||
3269 | self.assertEqual(double_metaphone('Turgeon', 4), ('TRJN', 'TRKN')) |
||
3270 | self.assertEqual(double_metaphone('Turner', 4), ('TRNR', '')) |
||
3271 | self.assertEqual(double_metaphone('Tuttle', 4), ('TTL', '')) |
||
3272 | self.assertEqual(double_metaphone('Tyler', 4), ('TLR', '')) |
||
3273 | self.assertEqual(double_metaphone('Tylle', 4), ('TL', '')) |
||
3274 | self.assertEqual(double_metaphone('Tyrrel', 4), ('TRL', '')) |
||
3275 | self.assertEqual(double_metaphone('Ua Tuathail', 4), ('AT0L', 'ATTL')) |
||
3276 | self.assertEqual(double_metaphone('Ulrich', 4), ('ALRX', 'ALRK')) |
||
3277 | self.assertEqual(double_metaphone('Underhill', 4), ('ANTR', '')) |
||
3278 | self.assertEqual(double_metaphone('Underwood', 4), ('ANTR', '')) |
||
3279 | self.assertEqual(double_metaphone('Unknown', 4), ('ANKN', '')) |
||
3280 | self.assertEqual(double_metaphone('Valentine', 4), ('FLNT', '')) |
||
3281 | self.assertEqual(double_metaphone('Van Egmond', 4), ('FNKM', '')) |
||
3282 | self.assertEqual(double_metaphone('Van der Beek', 4), ('FNTR', '')) |
||
3283 | self.assertEqual(double_metaphone('Vaughan', 4), ('FKN', '')) |
||
3284 | self.assertEqual(double_metaphone('Vermenlen', 4), ('FRMN', '')) |
||
3285 | self.assertEqual(double_metaphone('Vincent', 4), ('FNSN', '')) |
||
3286 | self.assertEqual(double_metaphone('Volentine', 4), ('FLNT', '')) |
||
3287 | self.assertEqual(double_metaphone('Wagner', 4), ('AKNR', 'FKNR')) |
||
3288 | self.assertEqual(double_metaphone('Waite', 4), ('AT', 'FT')) |
||
3289 | self.assertEqual(double_metaphone('Walker', 4), ('ALKR', 'FLKR')) |
||
3290 | self.assertEqual(double_metaphone('Walter', 4), ('ALTR', 'FLTR')) |
||
3291 | self.assertEqual(double_metaphone('Wandell', 4), ('ANTL', 'FNTL')) |
||
3292 | self.assertEqual(double_metaphone('Wandesford', 4), ('ANTS', 'FNTS')) |
||
3293 | self.assertEqual(double_metaphone('Warbleton', 4), ('ARPL', 'FRPL')) |
||
3294 | self.assertEqual(double_metaphone('Ward', 4), ('ART', 'FRT')) |
||
3295 | self.assertEqual(double_metaphone('Warde', 4), ('ART', 'FRT')) |
||
3296 | self.assertEqual(double_metaphone('Ware', 4), ('AR', 'FR')) |
||
3297 | self.assertEqual(double_metaphone('Wareham', 4), ('ARHM', 'FRHM')) |
||
3298 | self.assertEqual(double_metaphone('Warner', 4), ('ARNR', 'FRNR')) |
||
3299 | self.assertEqual(double_metaphone('Warren', 4), ('ARN', 'FRN')) |
||
3300 | self.assertEqual(double_metaphone('Washburne', 4), ('AXPR', 'FXPR')) |
||
3301 | self.assertEqual(double_metaphone('Waterbury', 4), ('ATRP', 'FTRP')) |
||
3302 | self.assertEqual(double_metaphone('Watson', 4), ('ATSN', 'FTSN')) |
||
3303 | self.assertEqual(double_metaphone('WatsonEllithorpe', 4), |
||
3304 | ('ATSN', 'FTSN')) |
||
3305 | self.assertEqual(double_metaphone('Watts', 4), ('ATS', 'FTS')) |
||
3306 | self.assertEqual(double_metaphone('Wayne', 4), ('AN', 'FN')) |
||
3307 | self.assertEqual(double_metaphone('Webb', 4), ('AP', 'FP')) |
||
3308 | self.assertEqual(double_metaphone('Weber', 4), ('APR', 'FPR')) |
||
3309 | self.assertEqual(double_metaphone('Webster', 4), ('APST', 'FPST')) |
||
3310 | self.assertEqual(double_metaphone('Weed', 4), ('AT', 'FT')) |
||
3311 | self.assertEqual(double_metaphone('Weeks', 4), ('AKS', 'FKS')) |
||
3312 | self.assertEqual(double_metaphone('Wells', 4), ('ALS', 'FLS')) |
||
3313 | self.assertEqual(double_metaphone('Wenzell', 4), ('ANSL', 'FNTS')) |
||
3314 | self.assertEqual(double_metaphone('West', 4), ('AST', 'FST')) |
||
3315 | self.assertEqual(double_metaphone('Westbury', 4), ('ASTP', 'FSTP')) |
||
3316 | self.assertEqual(double_metaphone('Whatlocke', 4), ('ATLK', '')) |
||
3317 | self.assertEqual(double_metaphone('Wheeler', 4), ('ALR', '')) |
||
3318 | self.assertEqual(double_metaphone('Whiston', 4), ('ASTN', '')) |
||
3319 | self.assertEqual(double_metaphone('White', 4), ('AT', '')) |
||
3320 | self.assertEqual(double_metaphone('Whitman', 4), ('ATMN', '')) |
||
3321 | self.assertEqual(double_metaphone('Whiton', 4), ('ATN', '')) |
||
3322 | self.assertEqual(double_metaphone('Whitson', 4), ('ATSN', '')) |
||
3323 | self.assertEqual(double_metaphone('Wickes', 4), ('AKS', 'FKS')) |
||
3324 | self.assertEqual(double_metaphone('Wilbur', 4), ('ALPR', 'FLPR')) |
||
3325 | self.assertEqual(double_metaphone('Wilcotes', 4), ('ALKT', 'FLKT')) |
||
3326 | self.assertEqual(double_metaphone('Wilkinson', 4), ('ALKN', 'FLKN')) |
||
3327 | self.assertEqual(double_metaphone('Willets', 4), ('ALTS', 'FLTS')) |
||
3328 | self.assertEqual(double_metaphone('Willett', 4), ('ALT', 'FLT')) |
||
3329 | self.assertEqual(double_metaphone('Willey', 4), ('AL', 'FL')) |
||
3330 | self.assertEqual(double_metaphone('Williams', 4), ('ALMS', 'FLMS')) |
||
3331 | self.assertEqual(double_metaphone('Williston', 4), ('ALST', 'FLST')) |
||
3332 | self.assertEqual(double_metaphone('Wilson', 4), ('ALSN', 'FLSN')) |
||
3333 | self.assertEqual(double_metaphone('Wimes', 4), ('AMS', 'FMS')) |
||
3334 | self.assertEqual(double_metaphone('Winch', 4), ('ANX', 'FNK')) |
||
3335 | self.assertEqual(double_metaphone('Winegar', 4), ('ANKR', 'FNKR')) |
||
3336 | self.assertEqual(double_metaphone('Wing', 4), ('ANK', 'FNK')) |
||
3337 | self.assertEqual(double_metaphone('Winsley', 4), ('ANSL', 'FNSL')) |
||
3338 | self.assertEqual(double_metaphone('Winslow', 4), ('ANSL', 'FNSL')) |
||
3339 | self.assertEqual(double_metaphone('Winthrop', 4), ('AN0R', 'FNTR')) |
||
3340 | self.assertEqual(double_metaphone('Wise', 4), ('AS', 'FS')) |
||
3341 | self.assertEqual(double_metaphone('Wood', 4), ('AT', 'FT')) |
||
3342 | self.assertEqual(double_metaphone('Woodbridge', 4), ('ATPR', 'FTPR')) |
||
3343 | self.assertEqual(double_metaphone('Woodward', 4), ('ATRT', 'FTRT')) |
||
3344 | self.assertEqual(double_metaphone('Wooley', 4), ('AL', 'FL')) |
||
3345 | self.assertEqual(double_metaphone('Woolley', 4), ('AL', 'FL')) |
||
3346 | self.assertEqual(double_metaphone('Worth', 4), ('AR0', 'FRT')) |
||
3347 | self.assertEqual(double_metaphone('Worthen', 4), ('AR0N', 'FRTN')) |
||
3348 | self.assertEqual(double_metaphone('Worthley', 4), ('AR0L', 'FRTL')) |
||
3349 | self.assertEqual(double_metaphone('Wright', 4), ('RT', '')) |
||
3350 | self.assertEqual(double_metaphone('Wyer', 4), ('AR', 'FR')) |
||
3351 | self.assertEqual(double_metaphone('Wyere', 4), ('AR', 'FR')) |
||
3352 | self.assertEqual(double_metaphone('Wynkoop', 4), ('ANKP', 'FNKP')) |
||
3353 | self.assertEqual(double_metaphone('Yarnall', 4), ('ARNL', '')) |
||
3354 | self.assertEqual(double_metaphone('Yeoman', 4), ('AMN', '')) |
||
3355 | self.assertEqual(double_metaphone('Yorke', 4), ('ARK', '')) |
||
3356 | self.assertEqual(double_metaphone('Young', 4), ('ANK', '')) |
||
3357 | self.assertEqual(double_metaphone('ab Wennonwen', 4), ('APNN', '')) |
||
3358 | self.assertEqual(double_metaphone('ap Llewellyn', 4), ('APLL', '')) |
||
3359 | self.assertEqual(double_metaphone('ap Lorwerth', 4), ('APLR', '')) |
||
3360 | self.assertEqual(double_metaphone('d\'Angouleme', 4), ('TNKL', '')) |
||
3361 | self.assertEqual(double_metaphone('de Audeham', 4), ('TTHM', '')) |
||
3362 | self.assertEqual(double_metaphone('de Bavant', 4), ('TPFN', '')) |
||
3363 | self.assertEqual(double_metaphone('de Beauchamp', 4), ('TPXM', 'TPKM')) |
||
3364 | self.assertEqual(double_metaphone('de Beaumont', 4), ('TPMN', '')) |
||
3365 | self.assertEqual(double_metaphone('de Bolbec', 4), ('TPLP', '')) |
||
3366 | self.assertEqual(double_metaphone('de Braiose', 4), ('TPRS', '')) |
||
3367 | self.assertEqual(double_metaphone('de Braose', 4), ('TPRS', '')) |
||
3368 | self.assertEqual(double_metaphone('de Briwere', 4), ('TPRR', '')) |
||
3369 | self.assertEqual(double_metaphone('de Cantelou', 4), ('TKNT', '')) |
||
3370 | self.assertEqual(double_metaphone('de Cherelton', 4), ('TXRL', 'TKRL')) |
||
3371 | self.assertEqual(double_metaphone('de Cherleton', 4), ('TXRL', 'TKRL')) |
||
3372 | self.assertEqual(double_metaphone('de Clare', 4), ('TKLR', '')) |
||
3373 | self.assertEqual(double_metaphone('de Claremont', 4), ('TKLR', '')) |
||
3374 | self.assertEqual(double_metaphone('de Clifford', 4), ('TKLF', '')) |
||
3375 | self.assertEqual(double_metaphone('de Colville', 4), ('TKLF', '')) |
||
3376 | self.assertEqual(double_metaphone('de Courtenay', 4), ('TKRT', '')) |
||
3377 | self.assertEqual(double_metaphone('de Fauconberg', 4), ('TFKN', '')) |
||
3378 | self.assertEqual(double_metaphone('de Forest', 4), ('TFRS', '')) |
||
3379 | self.assertEqual(double_metaphone('de Gai', 4), ('TK', '')) |
||
3380 | self.assertEqual(double_metaphone('de Grey', 4), ('TKR', '')) |
||
3381 | self.assertEqual(double_metaphone('de Guernons', 4), ('TKRN', '')) |
||
3382 | self.assertEqual(double_metaphone('de Haia', 4), ('T', '')) |
||
3383 | self.assertEqual(double_metaphone('de Harcourt', 4), ('TRKR', '')) |
||
3384 | self.assertEqual(double_metaphone('de Hastings', 4), ('TSTN', '')) |
||
3385 | self.assertEqual(double_metaphone('de Hoke', 4), ('TK', '')) |
||
3386 | self.assertEqual(double_metaphone('de Hooch', 4), ('TK', '')) |
||
3387 | self.assertEqual(double_metaphone('de Hugelville', 4), |
||
3388 | ('TJLF', 'TKLF')) |
||
3389 | self.assertEqual(double_metaphone('de Huntingdon', 4), ('TNTN', '')) |
||
3390 | self.assertEqual(double_metaphone('de Insula', 4), ('TNSL', '')) |
||
3391 | self.assertEqual(double_metaphone('de Keynes', 4), ('TKNS', '')) |
||
3392 | self.assertEqual(double_metaphone('de Lacy', 4), ('TLS', '')) |
||
3393 | self.assertEqual(double_metaphone('de Lexington', 4), ('TLKS', '')) |
||
3394 | self.assertEqual(double_metaphone('de Lusignan', 4), ('TLSN', 'TLSK')) |
||
3395 | self.assertEqual(double_metaphone('de Manvers', 4), ('TMNF', '')) |
||
3396 | self.assertEqual(double_metaphone('de Montagu', 4), ('TMNT', '')) |
||
3397 | self.assertEqual(double_metaphone('de Montault', 4), ('TMNT', '')) |
||
3398 | self.assertEqual(double_metaphone('de Montfort', 4), ('TMNT', '')) |
||
3399 | self.assertEqual(double_metaphone('de Mortimer', 4), ('TMRT', '')) |
||
3400 | self.assertEqual(double_metaphone('de Morville', 4), ('TMRF', '')) |
||
3401 | self.assertEqual(double_metaphone('de Morvois', 4), ('TMRF', '')) |
||
3402 | self.assertEqual(double_metaphone('de Neufmarche', 4), ('TNFM', '')) |
||
3403 | self.assertEqual(double_metaphone('de Odingsells', 4), ('TTNK', '')) |
||
3404 | self.assertEqual(double_metaphone('de Odyngsells', 4), ('TTNK', '')) |
||
3405 | self.assertEqual(double_metaphone('de Percy', 4), ('TPRS', '')) |
||
3406 | self.assertEqual(double_metaphone('de Pierrepont', 4), ('TPRP', '')) |
||
3407 | self.assertEqual(double_metaphone('de Plessetis', 4), ('TPLS', '')) |
||
3408 | self.assertEqual(double_metaphone('de Porhoet', 4), ('TPRT', '')) |
||
3409 | self.assertEqual(double_metaphone('de Prouz', 4), ('TPRS', '')) |
||
3410 | self.assertEqual(double_metaphone('de Quincy', 4), ('TKNS', '')) |
||
3411 | self.assertEqual(double_metaphone('de Ripellis', 4), ('TRPL', '')) |
||
3412 | self.assertEqual(double_metaphone('de Ros', 4), ('TRS', '')) |
||
3413 | self.assertEqual(double_metaphone('de Salisbury', 4), ('TSLS', '')) |
||
3414 | self.assertEqual(double_metaphone('de Sanford', 4), ('TSNF', '')) |
||
3415 | self.assertEqual(double_metaphone('de Somery', 4), ('TSMR', '')) |
||
3416 | self.assertEqual(double_metaphone('de St. Hilary', 4), ('TSTL', '')) |
||
3417 | self.assertEqual(double_metaphone('de St. Liz', 4), ('TSTL', '')) |
||
3418 | self.assertEqual(double_metaphone('de Sutton', 4), ('TSTN', '')) |
||
3419 | self.assertEqual(double_metaphone('de Toeni', 4), ('TTN', '')) |
||
3420 | self.assertEqual(double_metaphone('de Tony', 4), ('TTN', '')) |
||
3421 | self.assertEqual(double_metaphone('de Umfreville', 4), ('TMFR', '')) |
||
3422 | self.assertEqual(double_metaphone('de Valognes', 4), ('TFLN', 'TFLK')) |
||
3423 | self.assertEqual(double_metaphone('de Vaux', 4), ('TF', '')) |
||
3424 | self.assertEqual(double_metaphone('de Vere', 4), ('TFR', '')) |
||
3425 | self.assertEqual(double_metaphone('de Vermandois', 4), ('TFRM', '')) |
||
3426 | self.assertEqual(double_metaphone('de Vernon', 4), ('TFRN', '')) |
||
3427 | self.assertEqual(double_metaphone('de Vexin', 4), ('TFKS', '')) |
||
3428 | self.assertEqual(double_metaphone('de Vitre', 4), ('TFTR', '')) |
||
3429 | self.assertEqual(double_metaphone('de Wandesford', 4), ('TNTS', '')) |
||
3430 | self.assertEqual(double_metaphone('de Warenne', 4), ('TRN', '')) |
||
3431 | self.assertEqual(double_metaphone('de Westbury', 4), ('TSTP', '')) |
||
3432 | self.assertEqual(double_metaphone('di Saluzzo', 4), ('TSLS', 'TSLT')) |
||
3433 | self.assertEqual(double_metaphone('fitz Alan', 4), ('FTSL', '')) |
||
3434 | self.assertEqual(double_metaphone('fitz Geoffrey', 4), |
||
3435 | ('FTSJ', 'FTSK')) |
||
3436 | self.assertEqual(double_metaphone('fitz Herbert', 4), ('FTSR', '')) |
||
3437 | self.assertEqual(double_metaphone('fitz John', 4), ('FTSJ', '')) |
||
3438 | self.assertEqual(double_metaphone('fitz Patrick', 4), ('FTSP', '')) |
||
3439 | self.assertEqual(double_metaphone('fitz Payn', 4), ('FTSP', '')) |
||
3440 | self.assertEqual(double_metaphone('fitz Piers', 4), ('FTSP', '')) |
||
3441 | self.assertEqual(double_metaphone('fitz Randolph', 4), ('FTSR', '')) |
||
3442 | self.assertEqual(double_metaphone('fitz Richard', 4), ('FTSR', '')) |
||
3443 | self.assertEqual(double_metaphone('fitz Robert', 4), ('FTSR', '')) |
||
3444 | self.assertEqual(double_metaphone('fitz Roy', 4), ('FTSR', '')) |
||
3445 | self.assertEqual(double_metaphone('fitz Scrob', 4), ('FTSS', '')) |
||
3446 | self.assertEqual(double_metaphone('fitz Walter', 4), ('FTSL', '')) |
||
3447 | self.assertEqual(double_metaphone('fitz Warin', 4), ('FTSR', '')) |
||
3448 | self.assertEqual(double_metaphone('fitz Williams', 4), ('FTSL', '')) |
||
3449 | self.assertEqual(double_metaphone('la Zouche', 4), ('LSX', 'LSK')) |
||
3450 | self.assertEqual(double_metaphone('le Botiller', 4), ('LPTL', '')) |
||
3451 | self.assertEqual(double_metaphone('le Despenser', 4), ('LTSP', '')) |
||
3452 | self.assertEqual(double_metaphone('le deSpencer', 4), ('LTSP', '')) |
||
3453 | self.assertEqual(double_metaphone('of Allendale', 4), ('AFLN', '')) |
||
3454 | self.assertEqual(double_metaphone('of Angouleme', 4), ('AFNK', '')) |
||
3455 | self.assertEqual(double_metaphone('of Anjou', 4), ('AFNJ', '')) |
||
3456 | self.assertEqual(double_metaphone('of Aquitaine', 4), ('AFKT', '')) |
||
3457 | self.assertEqual(double_metaphone('of Aumale', 4), ('AFML', '')) |
||
3458 | self.assertEqual(double_metaphone('of Bavaria', 4), ('AFPF', '')) |
||
3459 | self.assertEqual(double_metaphone('of Boulogne', 4), ('AFPL', '')) |
||
3460 | self.assertEqual(double_metaphone('of Brittany', 4), ('AFPR', '')) |
||
3461 | self.assertEqual(double_metaphone('of Brittary', 4), ('AFPR', '')) |
||
3462 | self.assertEqual(double_metaphone('of Castile', 4), ('AFKS', '')) |
||
3463 | self.assertEqual(double_metaphone('of Chester', 4), ('AFXS', 'AFKS')) |
||
3464 | self.assertEqual(double_metaphone('of Clermont', 4), ('AFKL', '')) |
||
3465 | self.assertEqual(double_metaphone('of Cologne', 4), ('AFKL', '')) |
||
3466 | self.assertEqual(double_metaphone('of Dinan', 4), ('AFTN', '')) |
||
3467 | self.assertEqual(double_metaphone('of Dunbar', 4), ('AFTN', '')) |
||
3468 | self.assertEqual(double_metaphone('of England', 4), ('AFNK', '')) |
||
3469 | self.assertEqual(double_metaphone('of Essex', 4), ('AFSK', '')) |
||
3470 | self.assertEqual(double_metaphone('of Falaise', 4), ('AFFL', '')) |
||
3471 | self.assertEqual(double_metaphone('of Flanders', 4), ('AFFL', '')) |
||
3472 | self.assertEqual(double_metaphone('of Galloway', 4), ('AFKL', '')) |
||
3473 | self.assertEqual(double_metaphone('of Germany', 4), ('AFKR', 'AFJR')) |
||
3474 | self.assertEqual(double_metaphone('of Gloucester', 4), ('AFKL', '')) |
||
3475 | self.assertEqual(double_metaphone('of Heristal', 4), ('AFRS', '')) |
||
3476 | self.assertEqual(double_metaphone('of Hungary', 4), ('AFNK', '')) |
||
3477 | self.assertEqual(double_metaphone('of Huntington', 4), ('AFNT', '')) |
||
3478 | self.assertEqual(double_metaphone('of Kiev', 4), ('AFKF', '')) |
||
3479 | self.assertEqual(double_metaphone('of Kuno', 4), ('AFKN', '')) |
||
3480 | self.assertEqual(double_metaphone('of Landen', 4), ('AFLN', '')) |
||
3481 | self.assertEqual(double_metaphone('of Laon', 4), ('AFLN', '')) |
||
3482 | self.assertEqual(double_metaphone('of Leinster', 4), ('AFLN', '')) |
||
3483 | self.assertEqual(double_metaphone('of Lens', 4), ('AFLN', '')) |
||
3484 | self.assertEqual(double_metaphone('of Lorraine', 4), ('AFLR', '')) |
||
3485 | self.assertEqual(double_metaphone('of Louvain', 4), ('AFLF', '')) |
||
3486 | self.assertEqual(double_metaphone('of Mercia', 4), ('AFMR', '')) |
||
3487 | self.assertEqual(double_metaphone('of Metz', 4), ('AFMT', '')) |
||
3488 | self.assertEqual(double_metaphone('of Meulan', 4), ('AFML', '')) |
||
3489 | self.assertEqual(double_metaphone('of Nass', 4), ('AFNS', '')) |
||
3490 | self.assertEqual(double_metaphone('of Normandy', 4), ('AFNR', '')) |
||
3491 | self.assertEqual(double_metaphone('of Ohningen', 4), ('AFNN', '')) |
||
3492 | self.assertEqual(double_metaphone('of Orleans', 4), ('AFRL', '')) |
||
3493 | self.assertEqual(double_metaphone('of Poitou', 4), ('AFPT', '')) |
||
3494 | self.assertEqual(double_metaphone('of Polotzk', 4), ('AFPL', '')) |
||
3495 | self.assertEqual(double_metaphone('of Provence', 4), ('AFPR', '')) |
||
3496 | self.assertEqual(double_metaphone('of Ringelheim', 4), ('AFRN', '')) |
||
3497 | self.assertEqual(double_metaphone('of Salisbury', 4), ('AFSL', '')) |
||
3498 | self.assertEqual(double_metaphone('of Saxony', 4), ('AFSK', '')) |
||
3499 | self.assertEqual(double_metaphone('of Scotland', 4), ('AFSK', '')) |
||
3500 | self.assertEqual(double_metaphone('of Senlis', 4), ('AFSN', '')) |
||
3501 | self.assertEqual(double_metaphone('of Stafford', 4), ('AFST', '')) |
||
3502 | self.assertEqual(double_metaphone('of Swabia', 4), ('AFSP', '')) |
||
3503 | self.assertEqual(double_metaphone('of Tongres', 4), ('AFTN', '')) |
||
3504 | self.assertEqual(double_metaphone('of the Tributes', 4), |
||
3505 | ('AF0T', 'AFTT')) |
||
3506 | self.assertEqual(double_metaphone('unknown', 4), ('ANKN', '')) |
||
3507 | self.assertEqual(double_metaphone('van der Gouda', 4), ('FNTR', '')) |
||
3508 | self.assertEqual(double_metaphone('von Adenbaugh', 4), ('FNTN', '')) |
||
3509 | self.assertEqual(double_metaphone('ARCHITure', 4), ('ARKT', '')) |
||
3510 | self.assertEqual(double_metaphone('Arnoff', 4), ('ARNF', '')) |
||
3511 | self.assertEqual(double_metaphone('Arnow', 4), ('ARN', 'ARNF')) |
||
3512 | self.assertEqual(double_metaphone('DANGER', 4), ('TNJR', 'TNKR')) |
||
3513 | self.assertEqual(double_metaphone('Jankelowicz', 4), ('JNKL', 'ANKL')) |
||
3514 | self.assertEqual(double_metaphone('MANGER', 4), ('MNJR', 'MNKR')) |
||
3515 | self.assertEqual(double_metaphone('McClellan', 4), ('MKLL', '')) |
||
3516 | self.assertEqual(double_metaphone('McHugh', 4), ('MK', '')) |
||
3517 | self.assertEqual(double_metaphone('McLaughlin', 4), ('MKLF', '')) |
||
3518 | self.assertEqual(double_metaphone('ORCHEStra', 4), ('ARKS', '')) |
||
3519 | self.assertEqual(double_metaphone('ORCHID', 4), ('ARKT', '')) |
||
3520 | self.assertEqual(double_metaphone('Pierce', 4), ('PRS', '')) |
||
3521 | self.assertEqual(double_metaphone('RANGER', 4), ('RNJR', 'RNKR')) |
||
3522 | self.assertEqual(double_metaphone('Schlesinger', 4), ('XLSN', 'SLSN')) |
||
3523 | self.assertEqual(double_metaphone('Uomo', 4), ('AM', '')) |
||
3524 | self.assertEqual(double_metaphone('Vasserman', 4), ('FSRM', '')) |
||
3525 | self.assertEqual(double_metaphone('Wasserman', 4), ('ASRM', 'FSRM')) |
||
3526 | self.assertEqual(double_metaphone('Womo', 4), ('AM', 'FM')) |
||
3527 | self.assertEqual(double_metaphone('Yankelovich', 4), ('ANKL', '')) |
||
3528 | self.assertEqual(double_metaphone('accede', 4), ('AKST', '')) |
||
3529 | self.assertEqual(double_metaphone('accident', 4), ('AKST', '')) |
||
3530 | self.assertEqual(double_metaphone('adelsheim', 4), ('ATLS', '')) |
||
3531 | self.assertEqual(double_metaphone('aged', 4), ('AJT', 'AKT')) |
||
3532 | self.assertEqual(double_metaphone('ageless', 4), ('AJLS', 'AKLS')) |
||
3533 | self.assertEqual(double_metaphone('agency', 4), ('AJNS', 'AKNS')) |
||
3534 | self.assertEqual(double_metaphone('aghast', 4), ('AKST', '')) |
||
3535 | self.assertEqual(double_metaphone('agio', 4), ('AJ', 'AK')) |
||
3536 | self.assertEqual(double_metaphone('agrimony', 4), ('AKRM', '')) |
||
3537 | self.assertEqual(double_metaphone('album', 4), ('ALPM', '')) |
||
3538 | self.assertEqual(double_metaphone('alcmene', 4), ('ALKM', '')) |
||
3539 | self.assertEqual(double_metaphone('alehouse', 4), ('ALHS', '')) |
||
3540 | self.assertEqual(double_metaphone('antique', 4), ('ANTK', '')) |
||
3541 | self.assertEqual(double_metaphone('artois', 4), ('ART', 'ARTS')) |
||
3542 | self.assertEqual(double_metaphone('automation', 4), ('ATMX', '')) |
||
3543 | self.assertEqual(double_metaphone('bacchus', 4), ('PKS', '')) |
||
3544 | self.assertEqual(double_metaphone('bacci', 4), ('PX', '')) |
||
3545 | self.assertEqual(double_metaphone('bajador', 4), ('PJTR', 'PHTR')) |
||
3546 | self.assertEqual(double_metaphone('bellocchio', 4), ('PLX', '')) |
||
3547 | self.assertEqual(double_metaphone('bertucci', 4), ('PRTX', '')) |
||
3548 | self.assertEqual(double_metaphone('biaggi', 4), ('PJ', 'PK')) |
||
3549 | self.assertEqual(double_metaphone('bough', 4), ('P', '')) |
||
3550 | self.assertEqual(double_metaphone('breaux', 4), ('PR', '')) |
||
3551 | self.assertEqual(double_metaphone('broughton', 4), ('PRTN', '')) |
||
3552 | self.assertEqual(double_metaphone('cabrillo', 4), ('KPRL', 'KPR')) |
||
3553 | self.assertEqual(double_metaphone('caesar', 4), ('SSR', '')) |
||
3554 | self.assertEqual(double_metaphone('cagney', 4), ('KKN', '')) |
||
3555 | self.assertEqual(double_metaphone('campbell', 4), ('KMPL', '')) |
||
3556 | self.assertEqual(double_metaphone('carlisle', 4), ('KRLL', '')) |
||
3557 | self.assertEqual(double_metaphone('carlysle', 4), ('KRLL', '')) |
||
3558 | self.assertEqual(double_metaphone('chemistry', 4), ('KMST', '')) |
||
3559 | self.assertEqual(double_metaphone('chianti', 4), ('KNT', '')) |
||
3560 | self.assertEqual(double_metaphone('chorus', 4), ('KRS', '')) |
||
3561 | self.assertEqual(double_metaphone('cough', 4), ('KF', '')) |
||
3562 | self.assertEqual(double_metaphone('czerny', 4), ('SRN', 'XRN')) |
||
3563 | self.assertEqual(double_metaphone('deffenbacher', 4), ('TFNP', '')) |
||
3564 | self.assertEqual(double_metaphone('dumb', 4), ('TM', '')) |
||
3565 | self.assertEqual(double_metaphone('edgar', 4), ('ATKR', '')) |
||
3566 | self.assertEqual(double_metaphone('edge', 4), ('AJ', '')) |
||
3567 | self.assertEqual(double_metaphone('filipowicz', 4), ('FLPT', 'FLPF')) |
||
3568 | self.assertEqual(double_metaphone('focaccia', 4), ('FKX', '')) |
||
3569 | self.assertEqual(double_metaphone('gallegos', 4), ('KLKS', 'KKS')) |
||
3570 | self.assertEqual(double_metaphone('gambrelli', 4), ('KMPR', '')) |
||
3571 | self.assertEqual(double_metaphone('geithain', 4), ('K0N', 'JTN')) |
||
3572 | self.assertEqual(double_metaphone('ghiradelli', 4), ('JRTL', '')) |
||
3573 | self.assertEqual(double_metaphone('ghislane', 4), ('JLN', '')) |
||
3574 | self.assertEqual(double_metaphone('gough', 4), ('KF', '')) |
||
3575 | self.assertEqual(double_metaphone('hartheim', 4), ('HR0M', 'HRTM')) |
||
3576 | self.assertEqual(double_metaphone('heimsheim', 4), ('HMSM', '')) |
||
3577 | self.assertEqual(double_metaphone('hochmeier', 4), ('HKMR', '')) |
||
3578 | self.assertEqual(double_metaphone('hugh', 4), ('H', '')) |
||
3579 | self.assertEqual(double_metaphone('hunger', 4), ('HNKR', 'HNJR')) |
||
3580 | self.assertEqual(double_metaphone('hungry', 4), ('HNKR', '')) |
||
3581 | self.assertEqual(double_metaphone('island', 4), ('ALNT', '')) |
||
3582 | self.assertEqual(double_metaphone('isle', 4), ('AL', '')) |
||
3583 | self.assertEqual(double_metaphone('jose', 4), ('HS', '')) |
||
3584 | self.assertEqual(double_metaphone('laugh', 4), ('LF', '')) |
||
3585 | self.assertEqual(double_metaphone('mac caffrey', 4), ('MKFR', '')) |
||
3586 | self.assertEqual(double_metaphone('mac gregor', 4), ('MKRK', '')) |
||
3587 | self.assertEqual(double_metaphone('pegnitz', 4), ('PNTS', 'PKNT')) |
||
3588 | self.assertEqual(double_metaphone('piskowitz', 4), ('PSKT', 'PSKF')) |
||
3589 | self.assertEqual(double_metaphone('queen', 4), ('KN', '')) |
||
3590 | self.assertEqual(double_metaphone('raspberry', 4), ('RSPR', '')) |
||
3591 | self.assertEqual(double_metaphone('resnais', 4), ('RSN', 'RSNS')) |
||
3592 | self.assertEqual(double_metaphone('rogier', 4), ('RJ', 'RJR')) |
||
3593 | self.assertEqual(double_metaphone('rough', 4), ('RF', '')) |
||
3594 | self.assertEqual(double_metaphone('san jacinto', 4), ('SNHS', '')) |
||
3595 | self.assertEqual(double_metaphone('schenker', 4), ('XNKR', 'SKNK')) |
||
3596 | self.assertEqual(double_metaphone('schermerhorn', 4), ('XRMR', 'SKRM')) |
||
3597 | self.assertEqual(double_metaphone('schmidt', 4), ('XMT', 'SMT')) |
||
3598 | self.assertEqual(double_metaphone('schneider', 4), ('XNTR', 'SNTR')) |
||
3599 | self.assertEqual(double_metaphone('school', 4), ('SKL', '')) |
||
3600 | self.assertEqual(double_metaphone('schooner', 4), ('SKNR', '')) |
||
3601 | self.assertEqual(double_metaphone('schrozberg', 4), ('XRSP', 'SRSP')) |
||
3602 | self.assertEqual(double_metaphone('schulman', 4), ('XLMN', '')) |
||
3603 | self.assertEqual(double_metaphone('schwabach', 4), ('XPK', 'XFPK')) |
||
3604 | self.assertEqual(double_metaphone('schwarzach', 4), ('XRSK', 'XFRT')) |
||
3605 | self.assertEqual(double_metaphone('smith', 4), ('SM0', 'XMT')) |
||
3606 | self.assertEqual(double_metaphone('snider', 4), ('SNTR', 'XNTR')) |
||
3607 | self.assertEqual(double_metaphone('succeed', 4), ('SKST', '')) |
||
3608 | self.assertEqual(double_metaphone('sugarcane', 4), ('XKRK', 'SKRK')) |
||
3609 | self.assertEqual(double_metaphone('svobodka', 4), ('SFPT', '')) |
||
3610 | self.assertEqual(double_metaphone('tagliaro', 4), ('TKLR', 'TLR')) |
||
3611 | self.assertEqual(double_metaphone('thames', 4), ('TMS', '')) |
||
3612 | self.assertEqual(double_metaphone('theilheim', 4), ('0LM', 'TLM')) |
||
3613 | self.assertEqual(double_metaphone('thomas', 4), ('TMS', '')) |
||
3614 | self.assertEqual(double_metaphone('thumb', 4), ('0M', 'TM')) |
||
3615 | self.assertEqual(double_metaphone('tichner', 4), ('TXNR', 'TKNR')) |
||
3616 | self.assertEqual(double_metaphone('tough', 4), ('TF', '')) |
||
3617 | self.assertEqual(double_metaphone('umbrella', 4), ('AMPR', '')) |
||
3618 | self.assertEqual(double_metaphone('vilshofen', 4), ('FLXF', '')) |
||
3619 | self.assertEqual(double_metaphone('von schuller', 4), ('FNXL', '')) |
||
3620 | self.assertEqual(double_metaphone('wachtler', 4), ('AKTL', 'FKTL')) |
||
3621 | self.assertEqual(double_metaphone('wechsler', 4), ('AKSL', 'FKSL')) |
||
3622 | self.assertEqual(double_metaphone('weikersheim', 4), ('AKRS', 'FKRS')) |
||
3623 | self.assertEqual(double_metaphone('zhao', 4), ('J', '')) |
||
3624 | |||
3625 | |||
3626 | class CaverphoneTestCases(unittest.TestCase): |
||
3627 | """Test Caverphone functions. |
||
3628 | |||
3629 | test cases for abydos.phonetic.caverphone |
||
3630 | """ |
||
3631 | |||
3632 | def test_caverphone2(self): |
||
3633 | """Test abydos.phonetic.caverphone (Caverphone 2).""" |
||
3634 | self.assertEqual(caverphone(''), '1111111111') |
||
3635 | self.assertEqual(caverphone('', 2), '1111111111') |
||
3636 | self.assertEqual(caverphone('', version=2), '1111111111') |
||
3637 | |||
3638 | # http://ntz-develop.blogspot.com/2011/03/phonetic-algorithms.html |
||
3639 | self.assertEqual(caverphone('Henrichsen'), 'ANRKSN1111') |
||
3640 | self.assertEqual(caverphone('Henricsson'), 'ANRKSN1111') |
||
3641 | self.assertEqual(caverphone('Henriksson'), 'ANRKSN1111') |
||
3642 | self.assertEqual(caverphone('Hinrichsen'), 'ANRKSN1111') |
||
3643 | self.assertEqual(caverphone('Izchaki'), 'ASKKA11111') |
||
3644 | self.assertEqual(caverphone('Maclaverty'), 'MKLFTA1111') |
||
3645 | self.assertEqual(caverphone('Mccleverty'), 'MKLFTA1111') |
||
3646 | self.assertEqual(caverphone('Mcclifferty'), 'MKLFTA1111') |
||
3647 | self.assertEqual(caverphone('Mclafferty'), 'MKLFTA1111') |
||
3648 | self.assertEqual(caverphone('Mclaverty'), 'MKLFTA1111') |
||
3649 | self.assertEqual(caverphone('Slocomb'), 'SLKM111111') |
||
3650 | self.assertEqual(caverphone('Slocombe'), 'SLKM111111') |
||
3651 | self.assertEqual(caverphone('Slocumb'), 'SLKM111111') |
||
3652 | self.assertEqual(caverphone('Whitlam'), 'WTLM111111') |
||
3653 | |||
3654 | # http://caversham.otago.ac.nz/files/working/ctp150804.pdf |
||
3655 | self.assertEqual(caverphone('Stevenson'), 'STFNSN1111') |
||
3656 | self.assertEqual(caverphone('Peter'), 'PTA1111111') |
||
3657 | for word in ('Darda', 'Datha', 'Dedie', 'Deedee', 'Deerdre', 'Deidre', |
||
3658 | 'Deirdre', 'Detta', 'Didi', 'Didier', 'Dido', 'Dierdre', |
||
3659 | 'Dieter', 'Dita', 'Ditter', 'Dodi', 'Dodie', 'Dody', |
||
3660 | 'Doherty', 'Dorthea', 'Dorthy', 'Doti', 'Dotti', 'Dottie', |
||
3661 | 'Dotty', 'Doty', 'Doughty', 'Douty', 'Dowdell', 'Duthie', |
||
3662 | 'Tada', 'Taddeo', 'Tadeo', 'Tadio', 'Tati', 'Teador', |
||
3663 | 'Tedda', 'Tedder', 'Teddi', 'Teddie', 'Teddy', 'Tedi', |
||
3664 | 'Tedie', 'Teeter', 'Teodoor', 'Teodor', 'Terti', 'Theda', |
||
3665 | 'Theodor', 'Theodore', 'Theta', 'Thilda', 'Thordia', |
||
3666 | 'Tilda', 'Tildi', 'Tildie', 'Tildy', 'Tita', 'Tito', |
||
3667 | 'Tjader', 'Toddie', 'Toddy', 'Torto', 'Tuddor', 'Tudor', |
||
3668 | 'Turtle', 'Tuttle', 'Tutto'): |
||
3669 | self.assertEqual(caverphone(word), 'TTA1111111') |
||
3670 | self.assertEqual(caverphone(word, 2), 'TTA1111111') |
||
3671 | self.assertEqual(caverphone(word, version=2), 'TTA1111111') |
||
3672 | for word in ('Cailean', 'Calan', 'Calen', 'Callahan', 'Callan', |
||
3673 | 'Callean', 'Carleen', 'Carlen', 'Carlene', 'Carlin', |
||
3674 | 'Carline', 'Carlyn', 'Carlynn', 'Carlynne', 'Charlean', |
||
3675 | 'Charleen', 'Charlene', 'Charline', 'Cherlyn', 'Chirlin', |
||
3676 | 'Clein', 'Cleon', 'Cline', 'Cohleen', 'Colan', 'Coleen', |
||
3677 | 'Colene', 'Colin', 'Colleen', 'Collen', 'Collin', |
||
3678 | 'Colline', 'Colon', 'Cullan', 'Cullen', 'Cullin', |
||
3679 | 'Gaelan', 'Galan', 'Galen', 'Garlan', 'Garlen', 'Gaulin', |
||
3680 | 'Gayleen', 'Gaylene', 'Giliane', 'Gillan', 'Gillian', |
||
3681 | 'Glen', 'Glenn', 'Glyn', 'Glynn', 'Gollin', 'Gorlin', |
||
3682 | 'Kalin', 'Karlan', 'Karleen', 'Karlen', 'Karlene', |
||
3683 | 'Karlin', 'Karlyn', 'Kaylyn', 'Keelin', 'Kellen', |
||
3684 | 'Kellene', 'Kellyann', 'Kellyn', 'Khalin', 'Kilan', |
||
3685 | 'Kilian', 'Killen', 'Killian', 'Killion', 'Klein', |
||
3686 | 'Kleon', 'Kline', 'Koerlin', 'Kylen', 'Kylynn', 'Quillan', |
||
3687 | 'Quillon', 'Qulllon', 'Xylon'): |
||
3688 | self.assertEqual(caverphone(word), 'KLN1111111') |
||
3689 | self.assertEqual(caverphone(word, 2), 'KLN1111111') |
||
3690 | self.assertEqual(caverphone(word, version=2), 'KLN1111111') |
||
3691 | for word in ('Dan', 'Dane', 'Dann', 'Darn', 'Daune', 'Dawn', 'Ddene', |
||
3692 | 'Dean', 'Deane', 'Deanne', 'DeeAnn', 'Deeann', 'Deeanne', |
||
3693 | 'Deeyn', 'Den', 'Dene', 'Denn', 'Deonne', 'Diahann', |
||
3694 | 'Dian', 'Diane', 'Diann', 'Dianne', 'Diannne', 'Dine', |
||
3695 | 'Dion', 'Dione', 'Dionne', 'Doane', 'Doehne', 'Don', |
||
3696 | 'Donn', 'Doone', 'Dorn', 'Down', 'Downe', 'Duane', 'Dun', |
||
3697 | 'Dunn', 'Duyne', 'Dyan', 'Dyane', 'Dyann', 'Dyanne', |
||
3698 | 'Dyun', 'Tan', 'Tann', 'Teahan', 'Ten', 'Tenn', 'Terhune', |
||
3699 | 'Thain', 'Thaine', 'Thane', 'Thanh', 'Thayne', 'Theone', |
||
3700 | 'Thin', 'Thorn', 'Thorne', 'Thun', 'Thynne', 'Tien', |
||
3701 | 'Tine', 'Tjon', 'Town', 'Towne', 'Turne', 'Tyne'): |
||
3702 | self.assertEqual(caverphone(word), 'TN11111111') |
||
3703 | self.assertEqual(caverphone(word, 2), 'TN11111111') |
||
3704 | self.assertEqual(caverphone(word, version=2), 'TN11111111') |
||
3705 | |||
3706 | # etc. (for code coverage) |
||
3707 | self.assertEqual(caverphone('enough'), 'ANF1111111') |
||
3708 | self.assertEqual(caverphone('trough'), 'TRF1111111') |
||
3709 | self.assertEqual(caverphone('gnu'), 'NA11111111') |
||
3710 | |||
3711 | def test_caverphone2_php_testset(self): |
||
3712 | """Test abydos.phonetic.caverphone (PHP version testset).""" |
||
3713 | # https://raw.githubusercontent.com/kiphughes/caverphone/master/unit_tests.php |
||
3714 | with open(TESTDIR + '/corpora/php_caverphone.csv') as php_testset: |
||
3715 | for php_line in php_testset: |
||
3716 | (word, caver) = php_line.strip().split(',') |
||
3717 | self.assertEqual(caverphone(word), caver) |
||
3718 | |||
3719 | def test_caverphone1(self): |
||
3720 | """Test abydos.phonetic.caverphone (Caverphone 1).""" |
||
3721 | self.assertEqual(caverphone('', 1), '111111') |
||
3722 | self.assertEqual(caverphone('', version=1), '111111') |
||
3723 | |||
3724 | # http://caversham.otago.ac.nz/files/working/ctp060902.pdf |
||
3725 | self.assertEqual(caverphone('David', version=1), 'TFT111') |
||
3726 | self.assertEqual(caverphone('Whittle', version=1), 'WTL111') |
||
3727 | |||
3728 | def test_caversham(self): |
||
3729 | """Test using Caversham test set (SoundEx, Metaphone, & Caverphone).""" |
||
3730 | with open(TESTDIR + '/corpora/variantNames.csv') as cav_testset: |
||
3731 | next(cav_testset) |
||
3732 | for cav_line in cav_testset: |
||
3733 | (name1, soundex1, metaphone1, caverphone1, |
||
3734 | name2, soundex2, metaphone2, caverphone2, |
||
3735 | soundex_same, metaphone_same, caverphone_same) = \ |
||
3736 | cav_line.strip().split(',') |
||
3737 | |||
3738 | self.assertEqual(soundex(name1), soundex1) |
||
3739 | self.assertEqual(soundex(name2), soundex2) |
||
3740 | if soundex_same == '1': |
||
3741 | self.assertEqual(soundex(name1), soundex(name2)) |
||
3742 | else: |
||
3743 | self.assertNotEqual(soundex(name1), soundex(name2)) |
||
3744 | |||
3745 | self.assertEqual(metaphone(name1), metaphone1) |
||
3746 | self.assertEqual(metaphone(name2), metaphone2) |
||
3747 | if metaphone_same == '1': |
||
3748 | self.assertEqual(metaphone(name1), metaphone(name2)) |
||
3749 | else: |
||
3750 | self.assertNotEqual(metaphone(name1), metaphone(name2)) |
||
3751 | |||
3752 | self.assertEqual(caverphone(name1), caverphone1) |
||
3753 | self.assertEqual(caverphone(name2), caverphone2) |
||
3754 | if caverphone_same == '1': |
||
3755 | self.assertEqual(caverphone(name1), caverphone(name2)) |
||
3756 | else: |
||
3757 | self.assertNotEqual(caverphone(name1), caverphone(name2)) |
||
3758 | |||
3759 | |||
3760 | class AlphaSisTestCases(unittest.TestCase): |
||
3761 | """Test Alpha-SIS functions. |
||
3762 | |||
3763 | test cases for abydos.phonetic.alpha_sis |
||
3764 | """ |
||
3765 | |||
3766 | def test_alpha_sis(self): |
||
3767 | """Test abydos.phonetic.alpha_sis.""" |
||
3768 | self.assertEqual(alpha_sis('')[0], '00000000000000') |
||
3769 | |||
3770 | self.assertEqual(alpha_sis('Rodgers')[0], '04740000000000') |
||
3771 | self.assertEqual(alpha_sis('Rogers')[0], '04740000000000') |
||
3772 | self.assertEqual(alpha_sis('Kant')[0], '07210000000000') |
||
3773 | self.assertEqual(alpha_sis('Knuth')[0], '02100000000000') |
||
3774 | self.assertEqual(alpha_sis('Harper')[0], '24940000000000') |
||
3775 | self.assertEqual(alpha_sis('Collier')[0], '07540000000000') |
||
3776 | self.assertEqual(alpha_sis('Schultz')[0], '06500000000000') |
||
3777 | self.assertEqual(alpha_sis('Livingston')[0], '05827012000000') |
||
3778 | |||
3779 | # tests of repeated letters |
||
3780 | self.assertEqual(alpha_sis('Colllier')[0], '07554000000000') |
||
3781 | self.assertEqual(alpha_sis('Collllier')[0], '07554000000000') |
||
3782 | self.assertEqual(alpha_sis('Colllllier')[0], '07555400000000') |
||
3783 | self.assertEqual(alpha_sis('Collllllier')[0], '07555400000000') |
||
3784 | self.assertEqual(alpha_sis('Colalalier')[0], '07555400000000') |
||
3785 | |||
3786 | # maxlength bounds tests |
||
3787 | self.assertEqual(alpha_sis('Niall', maxlength=float('inf'))[0], |
||
3788 | '02500000000000000000000000000000000000000000000000' + |
||
3789 | '00000000000000') |
||
3790 | self.assertEqual(alpha_sis('Niall', maxlength=None)[0], |
||
3791 | '02500000000000000000000000000000000000000000000000' + |
||
3792 | '00000000000000') |
||
3793 | self.assertEqual(alpha_sis('Niall', maxlength=0)[0], '0250') |
||
3794 | |||
3795 | |||
3796 | class FuzzySoundexTestCases(unittest.TestCase): |
||
3797 | """Test Fuzzy Soundex functions. |
||
3798 | |||
3799 | test cases for abydos.phonetic.fuzzy_soundex |
||
3800 | """ |
||
3801 | |||
3802 | def test_fuzzy_soundex(self): |
||
3803 | """Test abydos.phonetic.fuzzy_soundex.""" |
||
3804 | self.assertEqual(fuzzy_soundex(''), '00000') |
||
3805 | # http://wayback.archive.org/web/20100629121128/http://www.ir.iit.edu/publications/downloads/IEEESoundexV5.pdf |
||
3806 | self.assertEqual(fuzzy_soundex('Kristen'), 'K6935') |
||
3807 | self.assertEqual(fuzzy_soundex('Krissy'), 'K6900') |
||
3808 | self.assertEqual(fuzzy_soundex('Christen'), 'K6935') |
||
3809 | |||
3810 | # http://books.google.com/books?id=LZrT6eWf9NMC&lpg=PA76&ots=Tex3FqNwGP&dq=%22phonix%20algorithm%22&pg=PA75#v=onepage&q=%22phonix%20algorithm%22&f=false |
||
3811 | self.assertEqual(fuzzy_soundex('peter', 4), 'P360') |
||
3812 | self.assertEqual(fuzzy_soundex('pete', 4), 'P300') |
||
3813 | self.assertEqual(fuzzy_soundex('pedro', 4), 'P360') |
||
3814 | self.assertEqual(fuzzy_soundex('stephen', 4), 'S315') |
||
3815 | self.assertEqual(fuzzy_soundex('steve', 4), 'S310') |
||
3816 | self.assertEqual(fuzzy_soundex('smith', 4), 'S530') |
||
3817 | self.assertEqual(fuzzy_soundex('smythe', 4), 'S530') |
||
3818 | self.assertEqual(fuzzy_soundex('gail', 4), 'G400') |
||
3819 | self.assertEqual(fuzzy_soundex('gayle', 4), 'G400') |
||
3820 | self.assertEqual(fuzzy_soundex('christine', 4), 'K693') |
||
3821 | self.assertEqual(fuzzy_soundex('christina', 4), 'K693') |
||
3822 | self.assertEqual(fuzzy_soundex('kristina', 4), 'K693') |
||
3823 | |||
3824 | # etc. (for code coverage) |
||
3825 | self.assertEqual(fuzzy_soundex('Wight'), 'W3000') |
||
3826 | self.assertEqual(fuzzy_soundex('Hardt'), 'H6000') |
||
3827 | self.assertEqual(fuzzy_soundex('Knight'), 'N3000') |
||
3828 | self.assertEqual(fuzzy_soundex('Czech'), 'S7000') |
||
3829 | self.assertEqual(fuzzy_soundex('Tsech'), 'S7000') |
||
3830 | self.assertEqual(fuzzy_soundex('gnomic'), 'N5900') |
||
3831 | self.assertEqual(fuzzy_soundex('Wright'), 'R3000') |
||
3832 | self.assertEqual(fuzzy_soundex('Hrothgar'), 'R3760') |
||
3833 | self.assertEqual(fuzzy_soundex('Hwaet'), 'W3000') |
||
3834 | self.assertEqual(fuzzy_soundex('Grant'), 'G6300') |
||
3835 | self.assertEqual(fuzzy_soundex('Hart'), 'H6000') |
||
3836 | self.assertEqual(fuzzy_soundex('Hardt'), 'H6000') |
||
3837 | |||
3838 | # maxlength bounds tests |
||
3839 | self.assertEqual(fuzzy_soundex('Niall', maxlength=float('inf')), |
||
3840 | 'N4000000000000000000000000000000000000000000000000' + |
||
3841 | '00000000000000') |
||
3842 | self.assertEqual(fuzzy_soundex('Niall', maxlength=None), |
||
3843 | 'N4000000000000000000000000000000000000000000000000' + |
||
3844 | '00000000000000') |
||
3845 | self.assertEqual(fuzzy_soundex('Niall', maxlength=0), 'N400') |
||
3846 | |||
3847 | # zero_pad tests |
||
3848 | self.assertEqual(fuzzy_soundex('Niall', maxlength=float('inf'), |
||
3849 | zero_pad=False), 'N4') |
||
3850 | self.assertEqual(fuzzy_soundex('Niall', maxlength=None, |
||
3851 | zero_pad=False), 'N4') |
||
3852 | self.assertEqual(fuzzy_soundex('Niall', maxlength=0, |
||
3853 | zero_pad=False), 'N4') |
||
3854 | self.assertEqual(fuzzy_soundex('Niall', maxlength=0, |
||
3855 | zero_pad=True), 'N400') |
||
3856 | self.assertEqual(fuzzy_soundex('', maxlength=4, zero_pad=False), '0') |
||
3857 | self.assertEqual(fuzzy_soundex('', maxlength=4, zero_pad=True), '0000') |
||
3858 | |||
3859 | |||
3860 | class PhonexTestCases(unittest.TestCase): |
||
3861 | """Test Phonex functions. |
||
3862 | |||
3863 | test cases for abydos.phonetic.phonex |
||
3864 | """ |
||
3865 | |||
3866 | def test_phonex(self): |
||
3867 | """Test abydos.phonetic.phonex.""" |
||
3868 | self.assertEqual(phonex(''), '0000') |
||
3869 | |||
3870 | # http://homepages.cs.ncl.ac.uk/brian.randell/Genealogy/NameMatching.pdf |
||
3871 | self.assertEqual(phonex('Ewell'), 'A400') |
||
3872 | self.assertEqual(phonex('Filp'), 'F100') |
||
3873 | self.assertEqual(phonex('Heames'), 'A500') |
||
3874 | self.assertEqual(phonex('Kneves'), 'N100') |
||
3875 | self.assertEqual(phonex('River'), 'R160') |
||
3876 | self.assertEqual(phonex('Corley'), 'C400') |
||
3877 | self.assertEqual(phonex('Carton'), 'C350') |
||
3878 | self.assertEqual(phonex('Cachpole'), 'C214') |
||
3879 | |||
3880 | self.assertEqual(phonex('Ewell'), phonex('Ule')) |
||
3881 | self.assertEqual(phonex('Filp'), phonex('Philp')) |
||
3882 | self.assertEqual(phonex('Yule'), phonex('Ewell')) |
||
3883 | self.assertEqual(phonex('Heames'), phonex('Eames')) |
||
3884 | self.assertEqual(phonex('Kneves'), phonex('Neves')) |
||
3885 | self.assertEqual(phonex('River'), phonex('Rivers')) |
||
3886 | self.assertEqual(phonex('Corley'), phonex('Coley')) |
||
3887 | self.assertEqual(phonex('Carton'), phonex('Carlton')) |
||
3888 | self.assertEqual(phonex('Cachpole'), phonex('Catchpole')) |
||
3889 | |||
3890 | # etc. (for code coverage) |
||
3891 | self.assertEqual(phonex('Saxon'), 'S250') |
||
3892 | self.assertEqual(phonex('Wright'), 'R230') |
||
3893 | self.assertEqual(phonex('Ai'), 'A000') |
||
3894 | self.assertEqual(phonex('Barth'), 'B300') |
||
3895 | self.assertEqual(phonex('Perry'), 'B600') |
||
3896 | self.assertEqual(phonex('Garth'), 'G300') |
||
3897 | self.assertEqual(phonex('Jerry'), 'G600') |
||
3898 | self.assertEqual(phonex('Gerry'), 'G600') |
||
3899 | self.assertEqual(phonex('Camden'), 'C500') |
||
3900 | self.assertEqual(phonex('Ganges'), 'G500') |
||
3901 | self.assertEqual(phonex('A-1'), 'A000') |
||
3902 | |||
3903 | # maxlength bounds tests |
||
3904 | self.assertEqual(phonex('Niall', maxlength=float('inf')), |
||
3905 | 'N4000000000000000000000000000000000000000000000000' + |
||
3906 | '00000000000000') |
||
3907 | self.assertEqual(phonex('Niall', maxlength=None), |
||
3908 | 'N4000000000000000000000000000000000000000000000000' + |
||
3909 | '00000000000000') |
||
3910 | self.assertEqual(phonex('Niall', maxlength=0), 'N400') |
||
3911 | |||
3912 | # zero_pad tests |
||
3913 | self.assertEqual(phonex('Niall', maxlength=float('inf'), |
||
3914 | zero_pad=False), 'N4') |
||
3915 | self.assertEqual(phonex('Niall', maxlength=None, zero_pad=False), |
||
3916 | 'N4') |
||
3917 | self.assertEqual(phonex('Niall', maxlength=0, zero_pad=False), |
||
3918 | 'N4') |
||
3919 | self.assertEqual(phonex('Niall', maxlength=0, zero_pad=True), |
||
3920 | 'N400') |
||
3921 | self.assertEqual(phonex('', maxlength=4, zero_pad=False), '0') |
||
3922 | self.assertEqual(phonex('', maxlength=4, zero_pad=True), '0000') |
||
3923 | |||
3924 | |||
3925 | class PhonemTestCases(unittest.TestCase): |
||
3926 | """Test Phonem functions. |
||
3927 | |||
3928 | test cases for abydos.phonetic.phonem |
||
3929 | """ |
||
3930 | |||
3931 | def test_phonem(self): |
||
3932 | """Test abydos.phonetic.phonem.""" |
||
3933 | self.assertEqual(phonem(''), '') |
||
3934 | |||
3935 | # http://phonetik.phil-fak.uni-koeln.de/fileadmin/home/ritters/Allgemeine_Dateien/Martin_Wilz.pdf |
||
3936 | self.assertEqual(phonem('müller'), 'MYLR') |
||
3937 | self.assertEqual(phonem('schmidt'), 'CMYD') |
||
3938 | self.assertEqual(phonem('schneider'), 'CNAYDR') |
||
3939 | self.assertEqual(phonem('fischer'), 'VYCR') |
||
3940 | self.assertEqual(phonem('weber'), 'VBR') |
||
3941 | self.assertEqual(phonem('meyer'), 'MAYR') |
||
3942 | self.assertEqual(phonem('wagner'), 'VACNR') |
||
3943 | self.assertEqual(phonem('schulz'), 'CULC') |
||
3944 | self.assertEqual(phonem('becker'), 'BCR') |
||
3945 | self.assertEqual(phonem('hoffmann'), 'OVMAN') |
||
3946 | self.assertEqual(phonem('schäfer'), 'CVR') |
||
3947 | |||
3948 | # http://cpansearch.perl.org/src/MAROS/Text-Phonetic-2.05/t/008_phonem.t |
||
3949 | self.assertEqual(phonem('mair'), 'MAYR') |
||
3950 | self.assertEqual(phonem('bäker'), 'BCR') |
||
3951 | self.assertEqual(phonem('schaeffer'), 'CVR') |
||
3952 | self.assertEqual(phonem('computer'), 'COMBUDR') |
||
3953 | self.assertEqual(phonem('pfeifer'), 'VAYVR') |
||
3954 | self.assertEqual(phonem('pfeiffer'), 'VAYVR') |
||
3955 | |||
3956 | |||
3957 | class PhonixTestCases(unittest.TestCase): |
||
3958 | """Test Phonix functions. |
||
3959 | |||
3960 | test cases for abydos.phonetic.phonix |
||
3961 | """ |
||
3962 | |||
3963 | def test_phonix(self): |
||
3964 | """Test abydos.phonetic.phonix.""" |
||
3965 | self.assertEqual(phonix(''), '0000') |
||
3966 | |||
3967 | # http://cpansearch.perl.org/src/MAROS/Text-Phonetic-2.05/t/007_phonix.t |
||
3968 | self.assertEqual(phonix('Müller'), 'M400') |
||
3969 | self.assertEqual(phonix('schneider'), 'S530') |
||
3970 | self.assertEqual(phonix('fischer'), 'F800') |
||
3971 | self.assertEqual(phonix('weber'), 'W100') |
||
3972 | self.assertEqual(phonix('meyer'), 'M000') |
||
3973 | self.assertEqual(phonix('wagner'), 'W250') |
||
3974 | self.assertEqual(phonix('schulz'), 'S480') |
||
3975 | self.assertEqual(phonix('becker'), 'B200') |
||
3976 | self.assertEqual(phonix('hoffmann'), 'H755') |
||
3977 | self.assertEqual(phonix('schäfer'), 'S700') |
||
3978 | self.assertEqual(phonix('schmidt'), 'S530') |
||
3979 | |||
3980 | # http://cpansearch.perl.org/src/MAROS/Text-Phonetic-2.05/t/007_phonix.t: |
||
3981 | # testcases from Wais Module |
||
3982 | self.assertEqual(phonix('computer'), 'K513') |
||
3983 | self.assertEqual(phonix('computers'), 'K513') |
||
3984 | self.assertEqual(phonix('computers', 5), 'K5138') |
||
3985 | self.assertEqual(phonix('pfeifer'), 'F700') |
||
3986 | self.assertEqual(phonix('pfeiffer'), 'F700') |
||
3987 | self.assertEqual(phonix('knight'), 'N300') |
||
3988 | self.assertEqual(phonix('night'), 'N300') |
||
3989 | |||
3990 | # http://cpansearch.perl.org/src/MAROS/Text-Phonetic-2.05/t/007_phonix.t: |
||
3991 | # testcases from |
||
3992 | # http://www.cl.uni-heidelberg.de/~bormann/documents/phono/ |
||
3993 | # They use a sliglty different algorithm (first char is not included in |
||
3994 | # num code here) |
||
3995 | self.assertEqual(phonix('wait'), 'W300') |
||
3996 | self.assertEqual(phonix('weight'), 'W300') |
||
3997 | self.assertEqual(phonix('gnome'), 'N500') |
||
3998 | self.assertEqual(phonix('noam'), 'N500') |
||
3999 | self.assertEqual(phonix('rees'), 'R800') |
||
4000 | self.assertEqual(phonix('reece'), 'R800') |
||
4001 | self.assertEqual(phonix('yaeger'), 'v200') |
||
4002 | |||
4003 | # http://books.google.com/books?id=xtWPI7Is9wIC&lpg=PA29&ots=DXhaL7ZkvK&dq=phonix%20gadd&pg=PA29#v=onepage&q=phonix%20gadd&f=false |
||
4004 | self.assertEqual(phonix('alam'), 'v450') |
||
4005 | self.assertEqual(phonix('berkpakaian'), 'B212') |
||
4006 | self.assertEqual(phonix('capaian'), 'K150') |
||
4007 | |||
4008 | # http://books.google.com/books?id=LZrT6eWf9NMC&lpg=PA76&ots=Tex3FqNwGP&dq=%22phonix%20algorithm%22&pg=PA75#v=onepage&q=%22phonix%20algorithm%22&f=false |
||
4009 | self.assertEqual(phonix('peter'), 'P300') |
||
4010 | self.assertEqual(phonix('pete'), 'P300') |
||
4011 | self.assertEqual(phonix('pedro'), 'P360') |
||
4012 | self.assertEqual(phonix('stephen'), 'S375') |
||
4013 | self.assertEqual(phonix('steve'), 'S370') |
||
4014 | self.assertEqual(phonix('smith'), 'S530') |
||
4015 | self.assertEqual(phonix('smythe'), 'S530') |
||
4016 | self.assertEqual(phonix('gail'), 'G400') |
||
4017 | self.assertEqual(phonix('gayle'), 'G400') |
||
4018 | self.assertEqual(phonix('christine'), 'K683') |
||
4019 | self.assertEqual(phonix('christina'), 'K683') |
||
4020 | self.assertEqual(phonix('kristina'), 'K683') |
||
4021 | |||
4022 | # maxlength bounds tests |
||
4023 | self.assertEqual(phonix('Niall', maxlength=float('inf')), 'N4'+'0'*62) |
||
4024 | self.assertEqual(phonix('Niall', maxlength=None), 'N4'+'0'*62) |
||
4025 | self.assertEqual(phonix('Niall', maxlength=0), 'N400') |
||
4026 | |||
4027 | # zero_pad tests |
||
4028 | self.assertEqual(phonix('Niall', maxlength=float('inf'), |
||
4029 | zero_pad=False), 'N4') |
||
4030 | self.assertEqual(phonix('Niall', maxlength=None, zero_pad=False), |
||
4031 | 'N4') |
||
4032 | self.assertEqual(phonix('Niall', maxlength=0, zero_pad=False), |
||
4033 | 'N4') |
||
4034 | self.assertEqual(phonix('Niall', maxlength=0, zero_pad=True), |
||
4035 | 'N400') |
||
4036 | self.assertEqual(phonix('', maxlength=4, zero_pad=False), '0') |
||
4037 | self.assertEqual(phonix('', maxlength=4, zero_pad=True), '0000') |
||
4038 | |||
4039 | |||
4040 | class SfinxBisTestCases(unittest.TestCase): |
||
4041 | """Test SfinxBis functions. |
||
4042 | |||
4043 | test cases for abydos.phonetic.sfinxbis |
||
4044 | """ |
||
4045 | |||
4046 | def test_sfinxbis(self): |
||
4047 | """Test abydos.phonetic.sfinxbis.""" |
||
4048 | self.assertEqual(sfinxbis(''), ('',)) |
||
4049 | |||
4050 | # http://www.swami.se/download/18.248ad5af12aa81365338000106/TestSfinx.txt |
||
4051 | # cases where the gold standard gave clearly wrong values have been |
||
4052 | # corrected below (marked with '# wrong' |
||
4053 | self.assertEqual(sfinxbis('af Sandeberg'), ('S53162',)) |
||
4054 | self.assertEqual(sfinxbis('av Ekenstam'), ('$25835',)) |
||
4055 | self.assertEqual(sfinxbis('Da Costa'), ('K83',)) |
||
4056 | self.assertEqual(sfinxbis('Das Neves'), ('D8', 'N78')) |
||
4057 | self.assertEqual(sfinxbis('de Besche'), ('B8',)) |
||
4058 | self.assertEqual(sfinxbis('de la Motte'), ('M3',)) |
||
4059 | self.assertEqual(sfinxbis('de Las Heras'), ('H68',)) # wrong |
||
4060 | self.assertEqual(sfinxbis('de Los Santos'), ('S538',)) |
||
4061 | self.assertEqual(sfinxbis('del Rosario'), ('R862',)) |
||
4062 | self.assertEqual(sfinxbis('Den Boer'), ('B6',)) |
||
4063 | self.assertEqual(sfinxbis('Der de Kazinczy'), |
||
4064 | ('D6', 'K8528',)) # wrong |
||
4065 | self.assertEqual(sfinxbis('des Rieux'), ('R28',)) |
||
4066 | self.assertEqual(sfinxbis('Di Luca'), ('L2',)) |
||
4067 | self.assertEqual(sfinxbis('Do Rosario'), ('R862',)) |
||
4068 | self.assertEqual(sfinxbis('Don Lind'), ('L53',)) |
||
4069 | self.assertEqual(sfinxbis('Dos Santos'), ('S538',)) |
||
4070 | self.assertEqual(sfinxbis('du Rietz'), ('R38',)) |
||
4071 | self.assertEqual(sfinxbis('in de Betou'), ('B3',)) |
||
4072 | self.assertEqual(sfinxbis('La Fleur'), ('F46',)) |
||
4073 | self.assertEqual(sfinxbis('Le Grand'), ('G653',)) |
||
4074 | self.assertEqual(sfinxbis('li Puma'), ('L', 'P5')) |
||
4075 | self.assertEqual(sfinxbis('lo Martire'), ('L', 'M636')) |
||
4076 | self.assertEqual(sfinxbis('mac Donald'), ('D543',)) |
||
4077 | self.assertEqual(sfinxbis('mc Intosh'), ('$538',)) |
||
4078 | self.assertEqual(sfinxbis('S:t Cyr'), ('S6',)) |
||
4079 | self.assertEqual(sfinxbis('Van Doom'), ('D5',)) |
||
4080 | self.assertEqual(sfinxbis('Van de Peppel'), ('P14',)) |
||
4081 | self.assertEqual(sfinxbis('Van den Berg'), ('B62',)) |
||
4082 | self.assertEqual(sfinxbis('Van Der Kwast'), ('K783',)) |
||
4083 | self.assertEqual(sfinxbis('von Ahn'), ('$5',)) |
||
4084 | self.assertEqual(sfinxbis('von Dem Knesebeck'), ('K5812',)) |
||
4085 | self.assertEqual(sfinxbis('von Der Burg'), ('B62',)) |
||
4086 | self.assertEqual(sfinxbis('D\'Angelo'), ('D524',)) |
||
4087 | self.assertEqual(sfinxbis('O\'Conner'), ('$256',)) |
||
4088 | self.assertEqual(sfinxbis('Los'), ('L8',)) |
||
4089 | self.assertEqual(sfinxbis('Mac'), ('M2',)) |
||
4090 | self.assertEqual(sfinxbis('Till'), ('T4',)) |
||
4091 | self.assertEqual(sfinxbis('Van'), ('V5',)) |
||
4092 | self.assertEqual(sfinxbis('Von'), ('V5',)) |
||
4093 | self.assertEqual(sfinxbis('Bernadotte af Wisborg'), ('B6533', 'V8162')) |
||
4094 | self.assertEqual(sfinxbis('Hjort af Ornäs'), ('J63', '$658')) |
||
4095 | self.assertEqual(sfinxbis('Horn af Åminne'), ('H65', '$55')) |
||
4096 | self.assertEqual(sfinxbis('Horn av Åminne'), ('H65', '$55')) |
||
4097 | self.assertEqual(sfinxbis('Hård af Segerstad'), ('H63', 'S26833')) |
||
4098 | self.assertEqual(sfinxbis('Hård av Segerstad'), ('H63', 'S26833')) |
||
4099 | self.assertEqual(sfinxbis('Stael von Holstein'), ('S34', 'H48325')) |
||
4100 | self.assertEqual(sfinxbis('de Oliveira e Silva'), ('$4726', 'S47')) |
||
4101 | self.assertEqual(sfinxbis('de Alfaro y Gómez'), ('$476', 'G58')) |
||
4102 | self.assertEqual(sfinxbis('Arjaliès-de la Lande'), ('$6248', 'L53')) |
||
4103 | self.assertEqual(sfinxbis('Dominicus van den Bussche'), |
||
4104 | ('D5528', 'B8')) |
||
4105 | self.assertEqual(sfinxbis('Edebol Eeg-Olofsson'), |
||
4106 | ('$314', '$2', '$4785')) |
||
4107 | self.assertEqual(sfinxbis('Jonsson-Blomqvist'), ('J585', 'B452783')) |
||
4108 | self.assertEqual(sfinxbis('Kiviniemi Birgersson'), ('#755', 'B62685')) |
||
4109 | self.assertEqual(sfinxbis('Massena Serpa dos Santos'), |
||
4110 | ('M85', 'S61', 'S538')) |
||
4111 | self.assertEqual(sfinxbis('S:t Clair Renard'), ('K426', 'R563')) |
||
4112 | self.assertEqual(sfinxbis('Skoog H Andersson'), ('S22', 'H', '$53685')) |
||
4113 | self.assertEqual(sfinxbis('von Post-Skagegård'), ('P83', 'S22263')) |
||
4114 | self.assertEqual(sfinxbis('von Zur-Mühlen'), ('S6', 'M45')) |
||
4115 | self.assertEqual(sfinxbis('Waltå O:son'), ('V43', '$85')) |
||
4116 | self.assertEqual(sfinxbis('Zardán Gómez de la Torre'), |
||
4117 | ('S635', 'G58', 'T6')) |
||
4118 | self.assertEqual(sfinxbis('af Jochnick'), ('J252',)) |
||
4119 | self.assertEqual(sfinxbis('af Ioscnick'), ('J8252',)) |
||
4120 | self.assertEqual(sfinxbis('Aabakken'), ('$125',)) |
||
4121 | self.assertEqual(sfinxbis('Åbacken'), ('$125',)) |
||
4122 | self.assertEqual(sfinxbis('Ahlen'), ('$45',)) |
||
4123 | self.assertEqual(sfinxbis('Aleen'), ('$45',)) |
||
4124 | self.assertEqual(sfinxbis('Braunerhielm'), ('B656245',)) |
||
4125 | self.assertEqual(sfinxbis('Branneerhielm'), ('B656245',)) |
||
4126 | self.assertEqual(sfinxbis('Carlzon'), ('K6485',)) |
||
4127 | self.assertEqual(sfinxbis('Karlsson'), ('K6485',)) |
||
4128 | self.assertEqual(sfinxbis('Enochsson'), ('$5285',)) |
||
4129 | self.assertEqual(sfinxbis('Ericsson'), ('$6285',)) |
||
4130 | self.assertEqual(sfinxbis('Ericksson'), ('$6285',)) |
||
4131 | self.assertEqual(sfinxbis('Erixson'), ('$6285',)) |
||
4132 | self.assertEqual(sfinxbis('Filipsson'), ('F4185',)) |
||
4133 | self.assertEqual(sfinxbis('Philipson'), ('F4185',)) |
||
4134 | self.assertEqual(sfinxbis('Flycht'), ('F423',)) |
||
4135 | self.assertEqual(sfinxbis('Flygt'), ('F423',)) |
||
4136 | self.assertEqual(sfinxbis('Flykt'), ('F423',)) |
||
4137 | self.assertEqual(sfinxbis('Fröijer'), ('F626',)) |
||
4138 | self.assertEqual(sfinxbis('Fröjer'), ('F626',)) |
||
4139 | self.assertEqual(sfinxbis('Gertner'), ('J6356',)) |
||
4140 | self.assertEqual(sfinxbis('Hiertner'), ('J6356',)) |
||
4141 | self.assertEqual(sfinxbis('Hirch'), ('H62',)) |
||
4142 | self.assertEqual(sfinxbis('Hirsch'), ('H68',)) |
||
4143 | self.assertEqual(sfinxbis('Haegermarck'), ('H26562',)) |
||
4144 | self.assertEqual(sfinxbis('Hägermark'), ('H26562',)) |
||
4145 | self.assertEqual(sfinxbis('Isaxon'), ('$8285',)) |
||
4146 | self.assertEqual(sfinxbis('Isacsson'), ('$8285',)) |
||
4147 | self.assertEqual(sfinxbis('Joachimsson'), ('J2585',)) |
||
4148 | self.assertEqual(sfinxbis('Joakimson'), ('J2585',)) |
||
4149 | self.assertEqual(sfinxbis('Kjell'), ('#4',)) |
||
4150 | self.assertEqual(sfinxbis('Käll'), ('#4',)) |
||
4151 | self.assertEqual(sfinxbis('Knapp'), ('K51',)) |
||
4152 | self.assertEqual(sfinxbis('Krans'), ('K658',)) |
||
4153 | self.assertEqual(sfinxbis('Krantz'), ('K6538',)) |
||
4154 | self.assertEqual(sfinxbis('Kvist'), ('K783',)) |
||
4155 | self.assertEqual(sfinxbis('Quist'), ('K783',)) |
||
4156 | self.assertEqual(sfinxbis('Lidbeck'), ('L312',)) |
||
4157 | self.assertEqual(sfinxbis('Lidbäck'), ('L312',)) |
||
4158 | self.assertEqual(sfinxbis('Linnér'), ('L56',)) |
||
4159 | self.assertEqual(sfinxbis('Linner'), ('L56',)) |
||
4160 | self.assertEqual(sfinxbis('Lorenzsonn'), ('L6585',)) |
||
4161 | self.assertEqual(sfinxbis('Lorentzon'), ('L65385',)) |
||
4162 | self.assertEqual(sfinxbis('Lorenßon'), ('L6585',)) |
||
4163 | self.assertEqual(sfinxbis('Lyxell'), ('L284',)) |
||
4164 | self.assertEqual(sfinxbis('Lycksell'), ('L284',)) |
||
4165 | self.assertEqual(sfinxbis('Marcström'), ('M628365',)) |
||
4166 | self.assertEqual(sfinxbis('Markström'), ('M628365',)) |
||
4167 | self.assertEqual(sfinxbis('Michaelsson'), ('M2485',)) |
||
4168 | self.assertEqual(sfinxbis('Mikaelson'), ('M2485',)) |
||
4169 | self.assertEqual(sfinxbis('Mörch'), ('M62',)) |
||
4170 | self.assertEqual(sfinxbis('Mörck'), ('M62',)) |
||
4171 | self.assertEqual(sfinxbis('Mörk'), ('M62',)) |
||
4172 | self.assertEqual(sfinxbis('Mørk'), ('M62',)) |
||
4173 | self.assertEqual(sfinxbis('Nääs'), ('N8',)) |
||
4174 | self.assertEqual(sfinxbis('Naess'), ('N8',)) |
||
4175 | self.assertEqual(sfinxbis('Nordstedt'), ('N63833',)) |
||
4176 | self.assertEqual(sfinxbis('Oxenstierna'), ('$28583265',)) |
||
4177 | self.assertEqual(sfinxbis('Palmçrañtz'), ('P4526538',)) |
||
4178 | self.assertEqual(sfinxbis('Palmcrantz'), ('P4526538',)) |
||
4179 | self.assertEqual(sfinxbis('Palmkrantz'), ('P4526538',)) |
||
4180 | self.assertEqual(sfinxbis('Preuss'), ('P68',)) |
||
4181 | self.assertEqual(sfinxbis('Preutz'), ('P638',)) |
||
4182 | self.assertEqual(sfinxbis('Richardson'), ('R26385',)) |
||
4183 | self.assertEqual(sfinxbis('Rikardson'), ('R26385',)) |
||
4184 | self.assertEqual(sfinxbis('Ruuth'), ('R3',)) |
||
4185 | self.assertEqual(sfinxbis('Ruth'), ('R3',)) |
||
4186 | self.assertEqual(sfinxbis('Sæter'), ('S36',)) |
||
4187 | self.assertEqual(sfinxbis('Zäter'), ('S36',)) |
||
4188 | self.assertEqual(sfinxbis('Schedin'), ('#35',)) |
||
4189 | self.assertEqual(sfinxbis('Sjödin'), ('#35',)) |
||
4190 | self.assertEqual(sfinxbis('Siöö'), ('#',)) |
||
4191 | self.assertEqual(sfinxbis('Sjöh'), ('#',)) |
||
4192 | self.assertEqual(sfinxbis('Svedberg'), ('S73162',)) |
||
4193 | self.assertEqual(sfinxbis('Zwedberg'), ('S73162',)) |
||
4194 | self.assertEqual(sfinxbis('Tjäder'), ('#36',)) |
||
4195 | self.assertEqual(sfinxbis('þornquist'), ('T652783',)) |
||
4196 | self.assertEqual(sfinxbis('Thörnqvist'), ('T652783',)) |
||
4197 | self.assertEqual(sfinxbis('Törnkvist'), ('T652783',)) |
||
4198 | self.assertEqual(sfinxbis('Wichman'), ('V255',)) |
||
4199 | self.assertEqual(sfinxbis('Wickman'), ('V255',)) |
||
4200 | self.assertEqual(sfinxbis('Wictorin'), ('V2365',)) |
||
4201 | self.assertEqual(sfinxbis('Wictorsson'), ('V23685',)) |
||
4202 | self.assertEqual(sfinxbis('Viktorson'), ('V23685',)) |
||
4203 | self.assertEqual(sfinxbis('Zachrisson'), ('S2685',)) |
||
4204 | self.assertEqual(sfinxbis('Zakrison'), ('S2685',)) |
||
4205 | self.assertEqual(sfinxbis('Övragård'), ('$76263',)) |
||
4206 | self.assertEqual(sfinxbis('Öfvragårdh'), ('$76263',)) |
||
4207 | self.assertEqual(sfinxbis('Bogdanovic'), ('B23572',)) |
||
4208 | self.assertEqual(sfinxbis('Bogdanovitch'), ('B235732',)) |
||
4209 | self.assertEqual(sfinxbis('Dieterich'), ('D362',)) |
||
4210 | self.assertEqual(sfinxbis('Eichorn'), ('$265',)) |
||
4211 | self.assertEqual(sfinxbis('Friedrich'), ('F6362',)) |
||
4212 | self.assertEqual(sfinxbis('Grantcharova'), ('G653267',)) |
||
4213 | self.assertEqual(sfinxbis('Ilichev'), ('$427',)) |
||
4214 | self.assertEqual(sfinxbis('Ivankovic'), ('$75272',)) |
||
4215 | self.assertEqual(sfinxbis('Ivangurich'), ('$75262',)) |
||
4216 | self.assertEqual(sfinxbis('Kinch'), ('#52',)) |
||
4217 | self.assertEqual(sfinxbis('Kirchmann'), ('#6255',)) |
||
4218 | self.assertEqual(sfinxbis('Machado'), ('M23',)) |
||
4219 | self.assertEqual(sfinxbis('Reich'), ('R2',)) |
||
4220 | self.assertEqual(sfinxbis('Roche'), ('R2',)) |
||
4221 | self.assertEqual(sfinxbis('Rubaszkin'), ('R1825',)) |
||
4222 | self.assertEqual(sfinxbis('Rubaschkin'), ('R1825',)) |
||
4223 | self.assertEqual(sfinxbis('Sanchez'), ('S528',)) |
||
4224 | self.assertEqual(sfinxbis('Walukiewicz'), ('V42728',)) |
||
4225 | self.assertEqual(sfinxbis('Valukievitch'), ('V42732',)) |
||
4226 | self.assertEqual(sfinxbis('K'), ('K',)) |
||
4227 | self.assertEqual(sfinxbis('2010'), ('',)) |
||
4228 | self.assertEqual(sfinxbis('cese'), ('S8',)) |
||
4229 | |||
4230 | # a few maxlength tests |
||
4231 | self.assertEqual(sfinxbis('Kiviniemi Birgersson', 3), ('#75', 'B62')) |
||
4232 | self.assertEqual(sfinxbis('Eichorn', 4), ('$265',)) |
||
4233 | self.assertEqual(sfinxbis('Friedrich', 4), ('F636',)) |
||
4234 | self.assertEqual(sfinxbis('Grantcharova', 4), ('G653',)) |
||
4235 | self.assertEqual(sfinxbis('Ilichev', 4), ('$427',)) |
||
4236 | self.assertEqual(sfinxbis('Ivankovic', 4), ('$752',)) |
||
4237 | self.assertEqual(sfinxbis('Ivangurich', 4), ('$752',)) |
||
4238 | self.assertEqual(sfinxbis('Kinch', 4), ('#52',)) |
||
4239 | self.assertEqual(sfinxbis('Kirchmann', 4), ('#625',)) |
||
4240 | self.assertEqual(sfinxbis('Machado', 4), ('M23',)) |
||
4241 | self.assertEqual(sfinxbis('Reich', 4), ('R2',)) |
||
4242 | self.assertEqual(sfinxbis('Roche', 4), ('R2',)) |
||
4243 | self.assertEqual(sfinxbis('Rubaszkin', 4), ('R182',)) |
||
4244 | self.assertEqual(sfinxbis('Rubaschkin', 4), ('R182',)) |
||
4245 | self.assertEqual(sfinxbis('Sanchez', 4), ('S528',)) |
||
4246 | self.assertEqual(sfinxbis('Walukiewicz', 4), ('V427',)) |
||
4247 | self.assertEqual(sfinxbis('Valukievitch', 4), ('V427',)) |
||
4248 | self.assertEqual(sfinxbis('K', 4), ('K',)) |
||
4249 | self.assertEqual(sfinxbis('2010', 4), ('',)) |
||
4250 | self.assertEqual(sfinxbis('cese', 4), ('S8',)) |
||
4251 | |||
4252 | # etc. (for code coverage) |
||
4253 | self.assertEqual(sfinxbis('chans'), ('#58',)) |
||
4254 | self.assertEqual(sfinxbis('ljud'), ('J3',)) |
||
4255 | self.assertEqual(sfinxbis('qi'), ('K',)) |
||
4256 | self.assertEqual(sfinxbis('xavier'), ('S76',)) |
||
4257 | self.assertEqual(sfinxbis('skjul'), ('#4',)) |
||
4258 | self.assertEqual(sfinxbis('schul'), ('#4',)) |
||
4259 | self.assertEqual(sfinxbis('skil'), ('#4',)) |
||
4260 | |||
4261 | # maxlength bounds tests |
||
4262 | self.assertEqual(sfinxbis('Niall', maxlength=float('inf')), ('N4',)) |
||
4263 | self.assertEqual(sfinxbis('Niall', maxlength=None), ('N4',)) |
||
4264 | self.assertEqual(sfinxbis('Niall', maxlength=0), ('N4',)) |
||
4265 | |||
4266 | |||
4267 | class PhonetTestCases(unittest.TestCase): |
||
4268 | """Test Phonet functions. |
||
4269 | |||
4270 | test cases for abydos.phonetic.phonet |
||
4271 | """ |
||
4272 | |||
4273 | def test_phonet_german(self): |
||
4274 | """Test abydos.phonetic.phonet (German).""" |
||
4275 | self.assertEqual(phonet(''), '') |
||
4276 | |||
4277 | # https://code.google.com/p/phonet4java/source/browse/trunk/src/test/java/com/googlecode/phonet4java/Phonet1Test.java |
||
4278 | self.assertEqual(phonet('', 1), '') |
||
4279 | self.assertEqual(phonet('Zedlitz', 1), 'ZETLIZ') |
||
4280 | self.assertEqual(phonet('Bremerhaven', 1), 'BREMAHAFN') |
||
4281 | self.assertEqual(phonet('Hamburger Hafen', 1), 'HAMBURGA HAFN') |
||
4282 | self.assertEqual(phonet('Jesper', 1), 'IESPA') |
||
4283 | self.assertEqual(phonet('elisabeth', 1), 'ELISABET') |
||
4284 | self.assertEqual(phonet('elisabet', 1), 'ELISABET') |
||
4285 | self.assertEqual(phonet('Ziegler', 1), 'ZIKLA') |
||
4286 | self.assertEqual(phonet('Scherer', 1), 'SHERA') |
||
4287 | self.assertEqual(phonet('Bartels', 1), 'BARTLS') |
||
4288 | self.assertEqual(phonet('Jansen', 1), 'IANSN') |
||
4289 | self.assertEqual(phonet('Sievers', 1), 'SIWAS') |
||
4290 | self.assertEqual(phonet('Michels', 1), 'MICHLS') |
||
4291 | self.assertEqual(phonet('Ewers', 1), 'EWERS') |
||
4292 | self.assertEqual(phonet('Evers', 1), 'EWERS') |
||
4293 | self.assertEqual(phonet('Wessels', 1), 'WESLS') |
||
4294 | self.assertEqual(phonet('Gottschalk', 1), 'GOSHALK') |
||
4295 | self.assertEqual(phonet('Brückmann', 1), 'BRÜKMAN') |
||
4296 | self.assertEqual(phonet('Blechschmidt', 1), 'BLECHSHMIT') |
||
4297 | self.assertEqual(phonet('Kolodziej', 1), 'KOLOTZI') |
||
4298 | self.assertEqual(phonet('Krauße', 1), 'KRAUSE') |
||
4299 | self.assertEqual(phonet('Cachel', 1), 'KESHL') |
||
4300 | |||
4301 | self.assertEqual(phonet('', 2), '') |
||
4302 | self.assertEqual(phonet('Zedlitz', 2), 'ZETLIZ') |
||
4303 | self.assertEqual(phonet('Bremerhaven', 2), 'BRENAFN') |
||
4304 | self.assertEqual(phonet('Schönberg', 2), 'ZÖNBAK') |
||
4305 | self.assertEqual(phonet('Hamburger Hafen', 2), 'ANBURKA AFN') |
||
4306 | self.assertEqual(phonet('Ziegler', 2), 'ZIKLA') |
||
4307 | self.assertEqual(phonet('Scherer', 2), 'ZERA') |
||
4308 | self.assertEqual(phonet('Jansen', 2), 'IANZN') |
||
4309 | self.assertEqual(phonet('Eberhardt', 2), 'EBART') |
||
4310 | self.assertEqual(phonet('Gottschalk', 2), 'KUZALK') |
||
4311 | self.assertEqual(phonet('Brückmann', 2), 'BRIKNAN') |
||
4312 | self.assertEqual(phonet('Blechschmidt', 2), 'BLEKZNIT') |
||
4313 | self.assertEqual(phonet('Kolodziej', 2), 'KULUTZI') |
||
4314 | self.assertEqual(phonet('Krauße', 2), 'KRAUZE') |
||
4315 | |||
4316 | # etc. (for code coverage) |
||
4317 | self.assertEqual(phonet('Jesper', 1), 'IESPA') |
||
4318 | self.assertEqual(phonet('Glacéhandschuh', 1), 'GLAZANSHU') |
||
4319 | self.assertEqual(phonet('Blechschmidt', 1), 'BLECHSHMIT') |
||
4320 | self.assertEqual(phonet('Burgdorf', 1), 'BURKDORF') |
||
4321 | self.assertEqual(phonet('Holzschuh', 1), 'HOLSHU') |
||
4322 | self.assertEqual(phonet('Aachen', 1), 'ACHN') |
||
4323 | self.assertEqual(phonet('Abendspaziergang', 1), 'ABENTSPAZIRGANK') |
||
4324 | |||
4325 | def test_phonet_nolang(self): |
||
4326 | """Test abydos.phonetic.phonet (no language).""" |
||
4327 | self.assertEqual(phonet('', lang='none'), '') |
||
4328 | |||
4329 | # https://code.google.com/p/phonet4java/source/browse/trunk/src/test/java/com/googlecode/phonet4java/Phonet1Test.java |
||
4330 | self.assertEqual(phonet('', 1, 'none'), '') |
||
4331 | self.assertEqual(phonet('Zedlitz', 1, 'none'), 'ZEDLITZ') |
||
4332 | self.assertEqual(phonet('Bremerhaven', 1, 'none'), 'BREMERHAVEN') |
||
4333 | self.assertEqual(phonet('Schönberg', 2, 'none'), 'SCHOENBERG') |
||
4334 | self.assertEqual(phonet('Brückmann', 1, 'none'), 'BRUECKMAN') |
||
4335 | self.assertEqual(phonet('Krauße', 1, 'none'), 'KRAUSE') |
||
4336 | |||
4337 | self.assertEqual(phonet('', 2, 'none'), '') |
||
4338 | self.assertEqual(phonet('Zedlitz', 2, 'none'), 'ZEDLITZ') |
||
4339 | self.assertEqual(phonet('Bremerhaven', 2, 'none'), 'BREMERHAVEN') |
||
4340 | self.assertEqual(phonet('Schönberg', 2, 'none'), 'SCHOENBERG') |
||
4341 | self.assertEqual(phonet('Brückmann', 2, 'none'), 'BRUECKMAN') |
||
4342 | self.assertEqual(phonet('Krauße', 2, 'none'), 'KRAUSE') |
||
4343 | |||
4344 | def test_phonet_nachnamen(self): |
||
4345 | """Test abydos.phonetic.phonet (Nachnamen set).""" |
||
4346 | if not ALLOW_RANDOM: |
||
4347 | return |
||
4348 | with codecs.open(TESTDIR + '/corpora/nachnamen.csv', |
||
4349 | encoding='utf-8') as nachnamen_testset: |
||
4350 | for nn_line in nachnamen_testset: |
||
4351 | if nn_line[0] != '#': |
||
4352 | nn_line = nn_line.strip().split(',') |
||
4353 | # This test set is very large (~10000 entries) |
||
4354 | # so let's just randomly select about 100 for testing |
||
4355 | if len(nn_line) >= 3 and one_in(100): |
||
4356 | (term, ph1, ph2) = nn_line |
||
4357 | self.assertEqual(phonet(term, 1), ph1) |
||
4358 | self.assertEqual(phonet(term, 2), ph2) |
||
4359 | |||
4360 | def test_phonet_ngerman(self): |
||
4361 | """Test abydos.phonetic.phonet (ngerman set).""" |
||
4362 | if not ALLOW_RANDOM: |
||
4363 | return |
||
4364 | with codecs.open(TESTDIR + '/corpora/ngerman.csv', |
||
4365 | encoding='utf-8') as ngerman_testset: |
||
4366 | for ng_line in ngerman_testset: |
||
4367 | if ng_line[0] != '#': |
||
4368 | ng_line = ng_line.strip().split(',') |
||
4369 | # This test set is very large (~3000000 entries) |
||
4370 | # so let's just randomly select about 30 for testing |
||
4371 | if len(ng_line) >= 3 and one_in(10000): |
||
4372 | (term, ph1, ph2) = ng_line |
||
4373 | self.assertEqual(phonet(term, 1), ph1) |
||
4374 | self.assertEqual(phonet(term, 2), ph2) |
||
4375 | |||
4376 | |||
4377 | class SPFCTestCases(unittest.TestCase): |
||
4378 | """Test SPFC functions. |
||
4379 | |||
4380 | test cases for abydos.phonetic.spfc |
||
4381 | """ |
||
4382 | |||
4383 | def test_spfc(self): |
||
4384 | """Test abydos.phonetic.spfc.""" |
||
4385 | self.assertEqual(spfc(''), '') |
||
4386 | |||
4387 | # https://archive.org/stream/accessingindivid00moor#page/19/mode/1up |
||
4388 | self.assertEqual(spfc(('J', 'KUHNS')), '16760') |
||
4389 | self.assertEqual(spfc(('G', 'ALTSHULER')), '35797') |
||
4390 | self.assertEqual(spfc('J KUHNS'), '16760') |
||
4391 | self.assertEqual(spfc('G ALTSHULER'), '35797') |
||
4392 | self.assertEqual(spfc('J. KUHNS'), '16760') |
||
4393 | self.assertEqual(spfc('G. ALTSHULER'), '35797') |
||
4394 | self.assertEqual(spfc('J. Kuhns'), '16760') |
||
4395 | self.assertEqual(spfc('G. Altshuler'), '35797') |
||
4396 | self.assertEqual(spfc('T. Vines'), '16760') |
||
4397 | self.assertEqual(spfc('J. Butler'), '35779') |
||
4398 | self.assertNotEqual(spfc('J. Kuhns'), spfc('J. Kuntz')) |
||
4399 | self.assertEqual(spfc('Jon Kuhns'), '16760') |
||
4400 | self.assertEqual(spfc('James Kuhns'), '16760') |
||
4401 | |||
4402 | self.assertRaises(AttributeError, spfc, ('J', 'A', 'Kuhns')) |
||
4403 | self.assertRaises(AttributeError, spfc, 'JKuhns') |
||
4404 | self.assertRaises(AttributeError, spfc, 5) |
||
4405 | |||
4406 | # etc. (for code coverage) |
||
4407 | self.assertEqual(spfc('James Goldstein'), '78795') |
||
4408 | self.assertEqual(spfc('James Hansen'), '58760') |
||
4409 | self.assertEqual(spfc('James Hester'), '59700') |
||
4410 | self.assertEqual(spfc('James Bardot'), '31745') |
||
4411 | self.assertEqual(spfc('James Windsor'), '29765') |
||
4412 | self.assertEqual(spfc('James Wenders'), '27760') |
||
4413 | self.assertEqual(spfc('James Ventor'), '17760') |
||
4414 | self.assertEqual(spfc('þ þ'), '00') |
||
4415 | |||
4416 | |||
4417 | class StatisticsCanadaTestCases(unittest.TestCase): |
||
4418 | """Test Statistics Canada functions. |
||
4419 | |||
4420 | test cases for abydos.phonetic.statistics_canada |
||
4421 | """ |
||
4422 | |||
4423 | def test_statistics_canada(self): |
||
4424 | """Test abydos.phonetic.statistics_canada.""" |
||
4425 | self.assertEqual(statistics_canada(''), '') |
||
4426 | |||
4427 | # https://naldc.nal.usda.gov/download/27833/PDF |
||
4428 | self.assertEqual(statistics_canada('Daves'), 'DVS') |
||
4429 | self.assertEqual(statistics_canada('Davies'), 'DVS') |
||
4430 | self.assertEqual(statistics_canada('Devese'), 'DVS') |
||
4431 | self.assertEqual(statistics_canada('Devies'), 'DVS') |
||
4432 | self.assertEqual(statistics_canada('Devos'), 'DVS') |
||
4433 | |||
4434 | self.assertEqual(statistics_canada('Smathers'), 'SMTH') |
||
4435 | self.assertEqual(statistics_canada('Smithart'), 'SMTH') |
||
4436 | self.assertEqual(statistics_canada('Smithbower'), 'SMTH') |
||
4437 | self.assertEqual(statistics_canada('Smitherman'), 'SMTH') |
||
4438 | self.assertEqual(statistics_canada('Smithey'), 'SMTH') |
||
4439 | self.assertEqual(statistics_canada('Smithgall'), 'SMTH') |
||
4440 | self.assertEqual(statistics_canada('Smithingall'), 'SMTH') |
||
4441 | self.assertEqual(statistics_canada('Smithmyer'), 'SMTH') |
||
4442 | self.assertEqual(statistics_canada('Smithpeter'), 'SMTH') |
||
4443 | self.assertEqual(statistics_canada('Smithson'), 'SMTH') |
||
4444 | self.assertEqual(statistics_canada('Smithy'), 'SMTH') |
||
4445 | self.assertEqual(statistics_canada('Smotherman'), 'SMTH') |
||
4446 | self.assertEqual(statistics_canada('Smothers'), 'SMTH') |
||
4447 | self.assertEqual(statistics_canada('Smyth'), 'SMTH') |
||
4448 | |||
4449 | # Additional tests from @Yomguithereal's talisman |
||
4450 | # https://github.com/Yomguithereal/talisman/blob/master/test/phonetics/statcan.js |
||
4451 | self.assertEqual(statistics_canada('Guillaume'), 'GLM') |
||
4452 | self.assertEqual(statistics_canada('Arlène'), 'ARLN') |
||
4453 | self.assertEqual(statistics_canada('Lüdenscheidt'), 'LDNS') |
||
4454 | |||
4455 | |||
4456 | class LeinTestCases(unittest.TestCase): |
||
4457 | """Test Lein functions. |
||
4458 | |||
4459 | test cases for abydos.phonetic.lein |
||
4460 | """ |
||
4461 | |||
4462 | def test_lein(self): |
||
4463 | """Test abydos.phonetic.lein.""" |
||
4464 | self.assertEqual(lein(''), '') |
||
4465 | |||
4466 | # https://naldc.nal.usda.gov/download/27833/PDF |
||
4467 | self.assertEqual(lein('Dubose'), 'D450') |
||
4468 | self.assertEqual(lein('Dubs'), 'D450') |
||
4469 | self.assertEqual(lein('Dubbs'), 'D450') |
||
4470 | self.assertEqual(lein('Doviak'), 'D450') |
||
4471 | self.assertEqual(lein('Dubke'), 'D450') |
||
4472 | self.assertEqual(lein('Dubus'), 'D450') |
||
4473 | self.assertEqual(lein('Dubois'), 'D450') |
||
4474 | self.assertEqual(lein('Duboise'), 'D450') |
||
4475 | self.assertEqual(lein('Doubek'), 'D450') |
||
4476 | self.assertEqual(lein('Defigh'), 'D450') |
||
4477 | self.assertEqual(lein('Defazio'), 'D450') |
||
4478 | self.assertEqual(lein('Debaca'), 'D450') |
||
4479 | self.assertEqual(lein('Dabbs'), 'D450') |
||
4480 | self.assertEqual(lein('Davies'), 'D450') |
||
4481 | self.assertEqual(lein('Dubukey'), 'D450') |
||
4482 | self.assertEqual(lein('Debus'), 'D450') |
||
4483 | self.assertEqual(lein('Debose'), 'D450') |
||
4484 | self.assertEqual(lein('Daves'), 'D450') |
||
4485 | self.assertEqual(lein('Dipiazza'), 'D450') |
||
4486 | self.assertEqual(lein('Dobbs'), 'D450') |
||
4487 | self.assertEqual(lein('Dobak'), 'D450') |
||
4488 | self.assertEqual(lein('Dobis'), 'D450') |
||
4489 | self.assertEqual(lein('Dobish'), 'D450') |
||
4490 | self.assertEqual(lein('Doepke'), 'D450') |
||
4491 | self.assertEqual(lein('Divish'), 'D450') |
||
4492 | self.assertEqual(lein('Dobosh'), 'D450') |
||
4493 | self.assertEqual(lein('Dupois'), 'D450') |
||
4494 | self.assertEqual(lein('Dufek'), 'D450') |
||
4495 | self.assertEqual(lein('Duffek'), 'D450') |
||
4496 | self.assertEqual(lein('Dupuis'), 'D450') |
||
4497 | self.assertEqual(lein('Dupas'), 'D450') |
||
4498 | self.assertEqual(lein('Devese'), 'D450') |
||
4499 | self.assertEqual(lein('Devos'), 'D450') |
||
4500 | self.assertEqual(lein('Deveaux'), 'D450') |
||
4501 | self.assertEqual(lein('Devies'), 'D450') |
||
4502 | |||
4503 | self.assertEqual(lein('Sand'), 'S210') |
||
4504 | self.assertEqual(lein('Sandau'), 'S210') |
||
4505 | self.assertEqual(lein('Sande'), 'S210') |
||
4506 | self.assertEqual(lein('Sandia'), 'S210') |
||
4507 | self.assertEqual(lein('Sando'), 'S210') |
||
4508 | self.assertEqual(lein('Sandoe'), 'S210') |
||
4509 | self.assertEqual(lein('Sandy'), 'S210') |
||
4510 | self.assertEqual(lein('Santee'), 'S210') |
||
4511 | self.assertEqual(lein('Santi'), 'S210') |
||
4512 | self.assertEqual(lein('Santo'), 'S210') |
||
4513 | self.assertEqual(lein('Send'), 'S210') |
||
4514 | self.assertEqual(lein('Sennet'), 'S210') |
||
4515 | self.assertEqual(lein('Shemoit'), 'S210') |
||
4516 | self.assertEqual(lein('Shenot'), 'S210') |
||
4517 | self.assertEqual(lein('Shumate'), 'S210') |
||
4518 | self.assertEqual(lein('Simmet'), 'S210') |
||
4519 | self.assertEqual(lein('Simot'), 'S210') |
||
4520 | self.assertEqual(lein('Sineath'), 'S210') |
||
4521 | self.assertEqual(lein('Sinnott'), 'S210') |
||
4522 | self.assertEqual(lein('Sintay'), 'S210') |
||
4523 | self.assertEqual(lein('Smead'), 'S210') |
||
4524 | self.assertEqual(lein('Smeda'), 'S210') |
||
4525 | self.assertEqual(lein('Smit'), 'S210') |
||
4526 | |||
4527 | # Additional tests from @Yomguithereal's talisman |
||
4528 | # https://github.com/Yomguithereal/talisman/blob/master/test/phonetics/lein.js |
||
4529 | self.assertEqual(lein('Guillaume'), 'G320') |
||
4530 | self.assertEqual(lein('Arlène'), 'A332') |
||
4531 | self.assertEqual(lein('Lüdenscheidt'), 'L125') |
||
4532 | |||
4533 | # Coverage |
||
4534 | self.assertEqual(lein('Lüdenscheidt', zero_pad=False), 'L125') |
||
4535 | self.assertEqual(lein('Smith', zero_pad=False), 'S21') |
||
4536 | |||
4537 | |||
4538 | class RogerRootTestCases(unittest.TestCase): |
||
4539 | """Test Roger Root functions. |
||
4540 | |||
4541 | test cases for abydos.phonetic.roger_root |
||
4542 | """ |
||
4543 | |||
4544 | def test_roger_root(self): |
||
4545 | """Test abydos.phonetic.roger_root.""" |
||
4546 | self.assertEqual(roger_root(''), '00000') |
||
4547 | |||
4548 | # https://naldc.nal.usda.gov/download/27833/PDF |
||
4549 | self.assertEqual(roger_root('BROWNER'), '09424') |
||
4550 | self.assertEqual(roger_root('STANLEY'), '00125') |
||
4551 | self.assertEqual(roger_root('CHALMAN'), '06532') |
||
4552 | self.assertEqual(roger_root('CHING'), '06270') |
||
4553 | self.assertEqual(roger_root('ANDERSON'), '12140') |
||
4554 | self.assertEqual(roger_root('OVERSTREET'), '18401') |
||
4555 | self.assertEqual(roger_root('HECKEL'), '27500') |
||
4556 | self.assertEqual(roger_root('WYSZYNSKI'), '40207') |
||
4557 | self.assertEqual(roger_root('WHITTED'), '41100') |
||
4558 | self.assertEqual(roger_root('ONGOQO'), '12770') # PDF had a typo? |
||
4559 | self.assertEqual(roger_root('JOHNSON'), '32020') |
||
4560 | self.assertEqual(roger_root('WILLIAMS'), '45300') |
||
4561 | self.assertEqual(roger_root('SMITH'), '00310') |
||
4562 | self.assertEqual(roger_root('JONES'), '32000') |
||
4563 | self.assertEqual(roger_root('BROWN'), '09420') |
||
4564 | self.assertEqual(roger_root('DAVIS'), '01800') |
||
4565 | self.assertEqual(roger_root('JACKSON'), '37020') |
||
4566 | self.assertEqual(roger_root('WILSON'), '45020') |
||
4567 | self.assertEqual(roger_root('LEE'), '05000') |
||
4568 | self.assertEqual(roger_root('THOMAS'), '01300') |
||
4569 | |||
4570 | self.assertEqual(roger_root('Defouw'), '01800') |
||
4571 | self.assertEqual(roger_root('Dauphi'), '01800') |
||
4572 | self.assertEqual(roger_root('Defazio'), '01800') |
||
4573 | self.assertEqual(roger_root('Defay'), '01800') |
||
4574 | self.assertEqual(roger_root('Davy'), '01800') |
||
4575 | self.assertEqual(roger_root('Defee'), '01800') |
||
4576 | self.assertEqual(roger_root('Dayhoff'), '01800') |
||
4577 | self.assertEqual(roger_root('Davie'), '01800') |
||
4578 | self.assertEqual(roger_root('Davey'), '01800') |
||
4579 | self.assertEqual(roger_root('Davies'), '01800') |
||
4580 | self.assertEqual(roger_root('Daves'), '01800') |
||
4581 | self.assertEqual(roger_root('Deife'), '01800') |
||
4582 | self.assertEqual(roger_root('Dehoff'), '01800') |
||
4583 | self.assertEqual(roger_root('Devese'), '01800') |
||
4584 | self.assertEqual(roger_root('Devoe'), '01800') |
||
4585 | self.assertEqual(roger_root('Devee'), '01800') |
||
4586 | self.assertEqual(roger_root('Devies'), '01800') |
||
4587 | self.assertEqual(roger_root('Devos'), '01800') |
||
4588 | self.assertEqual(roger_root('Dafoe'), '01800') |
||
4589 | self.assertEqual(roger_root('Dove'), '01800') |
||
4590 | self.assertEqual(roger_root('Duff'), '01800') |
||
4591 | self.assertEqual(roger_root('Duffey'), '01800') |
||
4592 | self.assertEqual(roger_root('Duffie'), '01800') |
||
4593 | self.assertEqual(roger_root('Duffy'), '01800') |
||
4594 | self.assertEqual(roger_root('Duyava'), '01800') |
||
4595 | self.assertEqual(roger_root('Tafoya'), '01800') |
||
4596 | self.assertEqual(roger_root('Tevis'), '01800') |
||
4597 | self.assertEqual(roger_root('Tiffee'), '01800') |
||
4598 | self.assertEqual(roger_root('Tivis'), '01800') |
||
4599 | self.assertEqual(roger_root('Thevis'), '01800') |
||
4600 | self.assertEqual(roger_root('Tovey'), '01800') |
||
4601 | self.assertEqual(roger_root('Toeves'), '01800') |
||
4602 | self.assertEqual(roger_root('Tuffs'), '01800') |
||
4603 | |||
4604 | self.assertEqual(roger_root('Samotid'), '00311') |
||
4605 | self.assertEqual(roger_root('Simmet'), '00310') |
||
4606 | self.assertEqual(roger_root('Simot'), '00310') |
||
4607 | self.assertEqual(roger_root('Smead'), '00310') |
||
4608 | self.assertEqual(roger_root('Smeda'), '00310') |
||
4609 | self.assertEqual(roger_root('Smit'), '00310') |
||
4610 | self.assertEqual(roger_root('Smite'), '00310') |
||
4611 | self.assertEqual(roger_root('Smithe'), '00310') |
||
4612 | self.assertEqual(roger_root('Smithey'), '00310') |
||
4613 | self.assertEqual(roger_root('Smithson'), '00310') |
||
4614 | self.assertEqual(roger_root('Smithy'), '00310') |
||
4615 | self.assertEqual(roger_root('Smoot'), '00310') |
||
4616 | self.assertEqual(roger_root('Smyth'), '00310') |
||
4617 | self.assertEqual(roger_root('Szmodis'), '00310') |
||
4618 | self.assertEqual(roger_root('Zemaitis'), '00310') |
||
4619 | self.assertEqual(roger_root('Zmuda'), '00310') |
||
4620 | |||
4621 | # Additional tests from @Yomguithereal's talisman |
||
4622 | # https://github.com/Yomguithereal/talisman/blob/master/test/phonetics/roger-root.js |
||
4623 | self.assertEqual(roger_root('Guillaume'), '07530') |
||
4624 | self.assertEqual(roger_root('Arlène'), '14520') |
||
4625 | self.assertEqual(roger_root('Lüdenscheidt'), '05126') |
||
4626 | |||
4627 | # no zero_pad |
||
4628 | self.assertEqual(roger_root('BROWNER', zero_pad=False), '09424') |
||
4629 | self.assertEqual(roger_root('STANLEY', zero_pad=False), '00125') |
||
4630 | self.assertEqual(roger_root('CHALMAN', zero_pad=False), '06532') |
||
4631 | self.assertEqual(roger_root('CHING', zero_pad=False), '0627') |
||
4632 | self.assertEqual(roger_root('ANDERSON', zero_pad=False), '12140') |
||
4633 | self.assertEqual(roger_root('OVERSTREET, zero_pad=False'), '18401') |
||
4634 | self.assertEqual(roger_root('HECKEL', zero_pad=False), '275') |
||
4635 | self.assertEqual(roger_root('WYSZYNSKI', zero_pad=False), '40207') |
||
4636 | self.assertEqual(roger_root('WHITTED', zero_pad=False), '411') |
||
4637 | self.assertEqual(roger_root('ONGOQO', zero_pad=False), '1277') |
||
4638 | self.assertEqual(roger_root('JOHNSON', zero_pad=False), '3202') |
||
4639 | self.assertEqual(roger_root('WILLIAMS', zero_pad=False), '4530') |
||
4640 | self.assertEqual(roger_root('SMITH', zero_pad=False), '0031') |
||
4641 | self.assertEqual(roger_root('JONES', zero_pad=False), '320') |
||
4642 | self.assertEqual(roger_root('BROWN', zero_pad=False), '0942') |
||
4643 | self.assertEqual(roger_root('DAVIS', zero_pad=False), '0180') |
||
4644 | self.assertEqual(roger_root('JACKSON', zero_pad=False), '3702') |
||
4645 | self.assertEqual(roger_root('WILSON', zero_pad=False), '4502') |
||
4646 | self.assertEqual(roger_root('LEE', zero_pad=False), '05') |
||
4647 | self.assertEqual(roger_root('THOMAS', zero_pad=False), '0130') |
||
4648 | |||
4649 | |||
4650 | class ONCATestCases(unittest.TestCase): |
||
4651 | """Test ONCA functions. |
||
4652 | |||
4653 | test cases for abydos.phonetic.onca |
||
4654 | """ |
||
4655 | |||
4656 | def test_onca(self): |
||
4657 | """Test abydos.phonetic.onca.""" |
||
4658 | # https://nces.ed.gov/FCSM/pdf/RLT97.pdf |
||
4659 | self.assertEqual(onca('HALL'), 'H400') |
||
4660 | self.assertEqual(onca('SMITH'), 'S530') |
||
4661 | |||
4662 | # http://nchod.uhce.ox.ac.uk/NCHOD%20Oxford%20E5%20Report%201st%20Feb_VerAM2.pdf |
||
4663 | self.assertEqual(onca('HAWTON'), 'H350') |
||
4664 | self.assertEqual(onca('HORTON'), 'H635') |
||
4665 | self.assertEqual(onca('HOUGHTON'), 'H235') |
||
4666 | |||
4667 | |||
4668 | class EudexTestCases(unittest.TestCase): |
||
4669 | """Test eudex functions. |
||
4670 | |||
4671 | test cases for abydos.phonetic.eudex |
||
4672 | """ |
||
4673 | |||
4674 | def test_eudex(self): |
||
4675 | """Test abydos.phonetic.eudex.""" |
||
4676 | # exact & mismatch cases from |
||
4677 | # https://github.com/ticki/eudex/blob/master/src/tests.rs |
||
4678 | self.assertEqual(eudex('JAva'), eudex('jAva')) |
||
4679 | self.assertEqual(eudex('co!mputer'), eudex('computer')) |
||
4680 | self.assertEqual(eudex('comp-uter'), eudex('computer')) |
||
4681 | self.assertEqual(eudex('comp@u#te?r'), eudex('computer')) |
||
4682 | self.assertEqual(eudex('lal'), eudex('lel')) |
||
4683 | self.assertEqual(eudex('rindom'), eudex('ryndom')) |
||
4684 | self.assertEqual(eudex('riiiindom'), eudex('ryyyyyndom')) |
||
4685 | self.assertEqual(eudex('riyiyiiindom'), eudex('ryyyyyndom')) |
||
4686 | self.assertEqual(eudex('triggered'), eudex('TRIGGERED')) |
||
4687 | self.assertEqual(eudex('repert'), eudex('ropert')) |
||
4688 | |||
4689 | self.assertNotEqual(eudex('reddit'), eudex('eddit')) |
||
4690 | self.assertNotEqual(eudex('lol'), eudex('lulz')) |
||
4691 | self.assertNotEqual(eudex('ijava'), eudex('java')) |
||
4692 | self.assertNotEqual(eudex('jiva'), eudex('java')) |
||
4693 | self.assertNotEqual(eudex('jesus'), eudex('iesus')) |
||
4694 | self.assertNotEqual(eudex('aesus'), eudex('iesus')) |
||
4695 | self.assertNotEqual(eudex('iesus'), eudex('yesus')) |
||
4696 | self.assertNotEqual(eudex('rupirt'), eudex('ropert')) |
||
4697 | self.assertNotEqual(eudex('ripert'), eudex('ropyrt')) |
||
4698 | self.assertNotEqual(eudex('rrr'), eudex('rraaaa')) |
||
4699 | self.assertNotEqual(eudex('randomal'), eudex('randomai')) |
||
4700 | |||
4701 | # manually checked against algorithm |
||
4702 | self.assertEqual(eudex('guillaume'), 288230383131034112) |
||
4703 | self.assertEqual(eudex('niall'), 648518346341351840) |
||
4704 | self.assertEqual(eudex('hello'), 144115188075896832) |
||
4705 | self.assertEqual(eudex('christopher'), 433648490138894409) |
||
4706 | self.assertEqual(eudex('colin'), 432345564238053650) |
||
4707 | |||
4708 | |||
4709 | class HaasePhonetikTestCases(unittest.TestCase): |
||
4710 | """Test Haase Phonetik functions. |
||
4711 | |||
4712 | test cases for abydos.phonetic.haase_phonetik |
||
4713 | """ |
||
4714 | |||
4715 | def test_haase_phonetik(self): |
||
4716 | """Test abydos.phonetic.haase_phonetik.""" |
||
4717 | # Base cases |
||
4718 | self.assertEqual(haase_phonetik(''), ('',)) |
||
4719 | |||
4720 | # equivalents |
||
4721 | self.assertEqual(haase_phonetik('Häschen'), haase_phonetik('Haeschen')) |
||
4722 | self.assertEqual(haase_phonetik('Schloß'), haase_phonetik('Schloss')) |
||
4723 | self.assertEqual(haase_phonetik('üben'), haase_phonetik('ueben')) |
||
4724 | self.assertEqual(haase_phonetik('Eichörnchen'), |
||
4725 | haase_phonetik('Eichoernchen')) |
||
4726 | |||
4727 | # coverage completion |
||
4728 | self.assertEqual(haase_phonetik('Häschen'), ('9896', '9496')) |
||
4729 | self.assertEqual(haase_phonetik('Häschen', primary_only=True), |
||
4730 | ('9896',)) |
||
4731 | self.assertEqual(haase_phonetik('Eichörnchen'), ('94976496',)) |
||
4732 | self.assertEqual(haase_phonetik('Hexe'), ('9489',)) |
||
4733 | self.assertEqual(haase_phonetik('Chemie'), ('4969', '8969')) |
||
4734 | |||
4735 | self.assertEqual(haase_phonetik('Brille'), ('17959', '179')) |
||
4736 | self.assertEqual(haase_phonetik('Brilleille'), |
||
4737 | ('1795959', '17959', '179')) |
||
4738 | self.assertEqual(haase_phonetik('Niveau'), ('6939',)) |
||
4739 | self.assertEqual(haase_phonetik('Korb'), ('4971', '4973')) |
||
4740 | self.assertEqual(haase_phonetik('Heino'), ('969', '9693')) |
||
4741 | self.assertEqual(haase_phonetik('Nekka'), ('6949', '69497')) |
||
4742 | self.assertEqual(haase_phonetik('Aleph'), ('9593',)) |
||
4743 | self.assertEqual(haase_phonetik('Aleppo'), ('95919', '959193')) |
||
4744 | self.assertEqual(haase_phonetik('Endzipfel'), ('96891395',)) |
||
4745 | self.assertEqual(haase_phonetik('verbrandt'), ('39717962', '39737962')) |
||
4746 | self.assertEqual(haase_phonetik('Cent'), ('8962',)) |
||
4747 | self.assertEqual(haase_phonetik('addiscendae'), ('92989629',)) |
||
4748 | self.assertEqual(haase_phonetik('kickx'), ('4948',)) |
||
4749 | self.assertEqual(haase_phonetik('sanctionen'), ('896829696',)) |
||
4750 | |||
4751 | |||
4752 | class RethSchekTestCases(unittest.TestCase): |
||
4753 | """Test Reth-Schek Phonetik functions. |
||
4754 | |||
4755 | test cases for abydos.phonetic.reth_schek_phonetik |
||
4756 | """ |
||
4757 | |||
4758 | def test_reth_schek_phonetik(self): |
||
4759 | """Test abydos.phonetic.reth_schek_phonetik.""" |
||
4760 | # Base cases |
||
4761 | self.assertEqual(reth_schek_phonetik(''), '') |
||
4762 | |||
4763 | # equivalents |
||
4764 | self.assertEqual(reth_schek_phonetik('Häschen'), |
||
4765 | reth_schek_phonetik('Haeschen')) |
||
4766 | self.assertEqual(reth_schek_phonetik('Schloß'), |
||
4767 | reth_schek_phonetik('Schloss')) |
||
4768 | self.assertEqual(reth_schek_phonetik('üben'), |
||
4769 | reth_schek_phonetik('ueben')) |
||
4770 | self.assertEqual(reth_schek_phonetik('Eichörnchen'), |
||
4771 | reth_schek_phonetik('Eichoernchen')) |
||
4772 | |||
4773 | self.assertEqual(reth_schek_phonetik('Häschen'), 'HESCHEN') |
||
4774 | self.assertEqual(reth_schek_phonetik('Eichörnchen'), 'AIGHOERNGHEN') |
||
4775 | self.assertEqual(reth_schek_phonetik('Hexe'), 'HEXE') |
||
4776 | self.assertEqual(reth_schek_phonetik('Chemie'), 'GHEMI') |
||
4777 | self.assertEqual(reth_schek_phonetik('Brille'), 'BRILE') |
||
4778 | self.assertEqual(reth_schek_phonetik('Brilleille'), 'BRILAILE') |
||
4779 | self.assertEqual(reth_schek_phonetik('Niveau'), 'NIFEAU') |
||
4780 | self.assertEqual(reth_schek_phonetik('Korb'), 'GORB') |
||
4781 | self.assertEqual(reth_schek_phonetik('Heino'), 'HAINO') |
||
4782 | self.assertEqual(reth_schek_phonetik('Nekka'), 'NEKA') |
||
4783 | self.assertEqual(reth_schek_phonetik('Aleph'), 'ALEF') |
||
4784 | self.assertEqual(reth_schek_phonetik('Aleppo'), 'ALEBO') |
||
4785 | self.assertEqual(reth_schek_phonetik('Endzipfel'), 'ENDZIBFL') |
||
4786 | self.assertEqual(reth_schek_phonetik('verbrandt'), 'FERBRAND') |
||
4787 | self.assertEqual(reth_schek_phonetik('Cent'), 'GEND') |
||
4788 | self.assertEqual(reth_schek_phonetik('addiscendae'), 'ADISGENDE') |
||
4789 | self.assertEqual(reth_schek_phonetik('kickx'), 'GIGX') |
||
4790 | self.assertEqual(reth_schek_phonetik('sanctionen'), 'SANGDIONEN') |
||
4791 | self.assertEqual(reth_schek_phonetik('Kuh'), 'GU') |
||
4792 | self.assertEqual(reth_schek_phonetik('lecker'), 'LEGR') |
||
4793 | self.assertEqual(reth_schek_phonetik('rödlich'), 'ROEDLIG') |
||
4794 | |||
4795 | |||
4796 | class FonemTestCases(unittest.TestCase): |
||
4797 | """Test FONEM functions. |
||
4798 | |||
4799 | test cases for abydos.phonetic.fonem |
||
4800 | """ |
||
4801 | |||
4802 | def test_fonem(self): |
||
4803 | """Test abydos.phonetic.fonem.""" |
||
4804 | # Base cases |
||
4805 | self.assertEqual(fonem(''), '') |
||
4806 | |||
4807 | # Test cases, mostly from the FONEM specification, |
||
4808 | # but copied from Talisman: |
||
4809 | # https://github.com/Yomguithereal/talisman/blob/master/test/phonetics/french/fonem.js |
||
4810 | test_cases = ( |
||
4811 | ('BEAULAC', 'BOLAK'), |
||
4812 | ('BAULAC', 'BOLAK'), |
||
4813 | ('IMBEAULT', 'INBO'), |
||
4814 | ('DUFAUT', 'DUFO'), |
||
4815 | ('THIBOUTOT', 'TIBOUTOT'), |
||
4816 | ('DEVAUX', 'DEVO'), |
||
4817 | ('RONDEAUX', 'RONDO'), |
||
4818 | ('BOURGAULX', 'BOURGO'), |
||
4819 | ('PINCHAUD', 'PINCHO'), |
||
4820 | ('PEDNAULD', 'PEDNO'), |
||
4821 | ('MAZENOD', 'MASENOD'), |
||
4822 | ('ARNOLD', 'ARNOL'), |
||
4823 | ('BERTOLD', 'BERTOL'), |
||
4824 | ('BELLAY', 'BELE'), |
||
4825 | ('SANDAY', 'SENDE'), |
||
4826 | ('GAY', 'GAI'), |
||
4827 | ('FAYARD', 'FAYAR'), |
||
4828 | ('LEMIEUX', 'LEMIEU'), |
||
4829 | ('LHEUREUX', 'LEUREU'), |
||
4830 | ('BELLEY', 'BELE'), |
||
4831 | ('WELLEY', 'WELE'), |
||
4832 | ('MEYER', 'MEYER'), |
||
4833 | ('BOILY', 'BOILI'), |
||
4834 | ('LOYSEAU', 'LOISO'), |
||
4835 | ('MAYRAND', 'MAIREN'), |
||
4836 | ('GUYON', 'GUYON'), |
||
4837 | ('FAILLARD', 'FAYAR'), |
||
4838 | ('FAIARD', 'FAYAR'), |
||
4839 | ('MEIER', 'MEYER'), |
||
4840 | ('MEILLER', 'MEYER'), |
||
4841 | ('GUILLON', 'GUYON'), |
||
4842 | ('LAVILLE', 'LAVILLE'), |
||
4843 | ('COUET', 'CWET'), |
||
4844 | ('EDOUARD', 'EDWAR'), |
||
4845 | ('GIROUARD', 'JIRWAR'), |
||
4846 | ('OZOUADE', 'OSWADE'), # differs from test set |
||
4847 | ('BOUILLE', 'BOUYE'), |
||
4848 | ('POUYEZ', 'POUYES'), # differs from test set |
||
4849 | ('LEMEE', 'LEME'), |
||
4850 | ('ABRAAM', 'ABRAM'), |
||
4851 | ('ARCHEMBAULT', 'ARCHENBO'), |
||
4852 | ('AMTHIME', 'ENTIME'), |
||
4853 | ('ROMPRE', 'RONPRE'), |
||
4854 | ('BOMSECOURS', 'BONSECOURS'), |
||
4855 | ('BOULANGER', 'BOULENJER'), |
||
4856 | ('TANCREDE', 'TENKREDE'), |
||
4857 | ('BLAIN', 'BLIN'), |
||
4858 | ('BLAINVILLE', 'BLINVILLE'), |
||
4859 | ('MAINARD', 'MAINAR'), |
||
4860 | ('RAIMOND', 'RAIMON'), |
||
4861 | ('BLACKBORN', 'BLAKBURN'), |
||
4862 | ('SEABOURNE', 'SEABURN'), |
||
4863 | ('IMBO', 'INBO'), |
||
4864 | ('RIMFRET', 'RINFRET'), |
||
4865 | ('LEFEBVRE', 'LEFEVRE'), |
||
4866 | ('MACE', 'MASSE'), |
||
4867 | ('MACON', 'MACON'), |
||
4868 | ('MARCELIN', 'MARSELIN'), |
||
4869 | ('MARCEAU', 'MARSO'), |
||
4870 | ('VINCELETTE', 'VINSELETE'), |
||
4871 | ('FORCADE', 'FORCADE'), |
||
4872 | ('CELINE', 'SELINE'), |
||
4873 | ('CERAPHIN', 'SERAFIN'), |
||
4874 | ('CAMILLE', 'KAMILLE'), |
||
4875 | ('CAYETTE', 'KAYETE'), |
||
4876 | ('CARINE', 'KARINE'), |
||
4877 | ('LUC', 'LUK'), |
||
4878 | ('LEBLANC', 'LEBLEN'), |
||
4879 | ('VICTOR', 'VIKTOR'), |
||
4880 | ('LACCOULINE', 'LAKOULINE'), |
||
4881 | ('MACCIMILIEN', 'MAXIMILIEN'), |
||
4882 | ('MAGELLA', 'MAJELA'), |
||
4883 | ('GINETTE', 'JINETE'), |
||
4884 | ('GANDET', 'GANDET'), |
||
4885 | ('GEORGES', 'JORJES'), |
||
4886 | ('GEOFFROID', 'JOFROID'), |
||
4887 | ('PAGEAU', 'PAJO'), |
||
4888 | ('GAGNION', 'GAGNON'), |
||
4889 | ('MIGNIER', 'MIGNER'), |
||
4890 | ('HALLEY', 'ALE'), |
||
4891 | ('GAUTHIER', 'GOTIER'), |
||
4892 | ('CHARTIER', 'CHARTIER'), |
||
4893 | ('JEANNE', 'JANE'), |
||
4894 | ('MACGREGOR', 'MACGREGOR'), |
||
4895 | ('MACKAY', 'MACKE'), |
||
4896 | ('MCNICOL', 'MACNICOL'), |
||
4897 | ('MCNEIL', 'MACNEIL'), |
||
4898 | ('PHANEUF', 'FANEUF'), |
||
4899 | ('PHILIPPE', 'FILIPE'), |
||
4900 | ('QUENNEVILLE', 'KENEVILLE'), |
||
4901 | ('LAROCQUE', 'LAROKE'), |
||
4902 | ('SCIPION', 'SIPION'), |
||
4903 | ('ASCELIN', 'ASSELIN'), |
||
4904 | ('VASCO', 'VASKO'), |
||
4905 | ('PASCALINE', 'PASKALINE'), |
||
4906 | ('ESHEMBACK', 'ECHENBAK'), |
||
4907 | ('ASHED', 'ACHED'), |
||
4908 | ('GRATIA', 'GRASSIA'), |
||
4909 | ('PATRITIA', 'PATRISSIA'), |
||
4910 | ('BERTIO', 'BERTIO'), |
||
4911 | ('MATIEU', 'MATIEU'), |
||
4912 | ('BERTIAUME', 'BERTIOME'), |
||
4913 | ('MUNROW', 'MUNRO'), |
||
4914 | ('BRANISLAW', 'BRANISLA'), |
||
4915 | ('LOWMEN', 'LOMEN'), |
||
4916 | ('ANDREW', 'ENDREW'), |
||
4917 | ('EXCEL', 'EXEL'), |
||
4918 | ('EXCERINE', 'EXERINE'), |
||
4919 | ('EXSILDA', 'EXILDA'), |
||
4920 | ('EXZELDA', 'EXELDA'), |
||
4921 | ('CAZEAU', 'KASO'), |
||
4922 | ('BRAZEAU', 'BRASO'), |
||
4923 | ('FITZPATRICK', 'FITSPATRIK'), |
||
4924 | ('SINGELAIS', 'ST-JELAIS'), |
||
4925 | ('CINQMARS', 'ST-MARS'), |
||
4926 | ('SAINT-AMAND', 'ST-AMEN'), |
||
4927 | ('SAINTECROIX', 'STE-KROIX'), |
||
4928 | ('ST-HILAIRE', 'ST-ILAIRE'), |
||
4929 | ('STE-CROIX', 'STE-KROIX'), |
||
4930 | ('LAVALLEE', 'LAVALE'), |
||
4931 | ('CORINNE', 'KORINE'), |
||
4932 | ('DUTILE', 'DUTILLE') |
||
4933 | ) |
||
4934 | for name, encoding in test_cases: |
||
4935 | self.assertEqual(fonem(name), encoding) |
||
4936 | |||
4937 | |||
4938 | class ParmarKumbharanaTestCases(unittest.TestCase): |
||
4939 | """Test Parmar-Kumbharana functions. |
||
4940 | |||
4941 | test cases for abydos.phonetic.parmar_kumbharana |
||
4942 | """ |
||
4943 | |||
4944 | def test_parmar_kumbharana(self): |
||
4945 | """Test abydos.phonetic.parmar_kumbharana.""" |
||
4946 | # Base cases |
||
4947 | self.assertEqual(fonem(''), '') |
||
4948 | |||
4949 | # Test cases from Parmar & Kumbharana (2014) |
||
4950 | test_cases = ( |
||
4951 | ('Week', 'WK'), |
||
4952 | ('Weak', 'WK'), |
||
4953 | ('Piece', 'PS'), |
||
4954 | ('Peace', 'PS'), |
||
4955 | ('Bed', 'BD'), |
||
4956 | ('Bad', 'BD'), |
||
4957 | ('Would', 'WD'), |
||
4958 | ('Wood', 'WD'), |
||
4959 | ('Sun', 'SN'), |
||
4960 | ('Son', 'SN'), |
||
4961 | ('Ship', 'SP'), |
||
4962 | ('Sheep', 'SP'), |
||
4963 | ('Later', 'LTR'), |
||
4964 | ('Letter', 'LTR'), |
||
4965 | ('Low', 'LW'), |
||
4966 | ('Law', 'LW'), |
||
4967 | ('She', 'S'), |
||
4968 | ('See', 'S'), |
||
4969 | ('Sea', 'S'), |
||
4970 | ('Case', 'CS'), |
||
4971 | ('Cash', 'CS'), |
||
4972 | ('Of', 'OF'), |
||
4973 | ('Off', 'OF'), |
||
4974 | ('Live', 'LV'), |
||
4975 | ('Leave', 'LV'), |
||
4976 | ('Sign', 'SN'), |
||
4977 | ('Sine', 'SN'), |
||
4978 | ('Sin', 'SN'), |
||
4979 | ('Seen', 'SN'), |
||
4980 | ('By', 'B'), |
||
4981 | ('Bye', 'B'), |
||
4982 | ('Reach', 'RCH'), |
||
4983 | ('Rich', 'RCH'), |
||
4984 | ('Sort', 'SRT'), |
||
4985 | ('Short', 'SRT'), |
||
4986 | ('Center', 'SNTR'), |
||
4987 | ('Centre', 'SNTR'), |
||
4988 | ('Full', 'FL'), |
||
4989 | ('Fool', 'FL'), |
||
4990 | ('Then', 'THN'), |
||
4991 | ('Than', 'THN'), |
||
4992 | ('Fill', 'FL'), |
||
4993 | ('Feel', 'FL'), |
||
4994 | ('Two', 'TW'), |
||
4995 | ('To', 'T'), |
||
4996 | ('Too', 'T'), |
||
4997 | ('Four', 'FR'), |
||
4998 | ('For', 'FR'), |
||
4999 | ('Mat', 'MT'), |
||
5000 | ('Met', 'MT'), |
||
5001 | ('Merry', 'MR'), |
||
5002 | ('Marry', 'MR') |
||
5003 | ) |
||
5004 | for word, encoding in test_cases: |
||
5005 | self.assertEqual(parmar_kumbharana(word), encoding) |
||
5006 | |||
5007 | |||
5008 | class DavidsonTestCases(unittest.TestCase): |
||
5009 | """Test class Davidson functions. |
||
5010 | |||
5011 | test cases for abydos.phonetic.davidson |
||
5012 | """ |
||
5013 | |||
5014 | def test_davidson(self): |
||
5015 | """Test abydos.phonetic.davidson.""" |
||
5016 | # Base cases |
||
5017 | self.assertEqual(davidson('', omit_fname=True), ' ') |
||
5018 | self.assertEqual(davidson(''), ' .') |
||
5019 | |||
5020 | # Test cases from Gadd (1988) "'Fisching fore werds': phonetic |
||
5021 | # retrieval of written text in information systems." Program, |
||
5022 | # 22(3). 222--237. |
||
5023 | # doi:10.1108/eb046999 |
||
5024 | test_cases = ( |
||
5025 | ('WAIT', 'WT '), |
||
5026 | ('WEIGHT', 'WGT '), |
||
5027 | ('KNIGHT', 'KNGT'), |
||
5028 | ('NIGHT', 'NGT '), |
||
5029 | ('NITE', 'NT '), |
||
5030 | ('GNOME', 'GNM '), |
||
5031 | ('NOAM', 'NM '), |
||
5032 | ('SMIDT', 'SMDT'), |
||
5033 | ('SMIT', 'SMT '), |
||
5034 | ('SMITH', 'SMT '), |
||
5035 | ('SCHMIT', 'SCMT'), |
||
5036 | ('CRAFT', 'CRFT'), |
||
5037 | ('KRAFT', 'KRFT'), |
||
5038 | ('REES', 'RS '), |
||
5039 | ('REECE', 'RC '), |
||
5040 | ) |
||
5041 | for word, encoding in test_cases: |
||
5042 | self.assertEqual(davidson(word, omit_fname=True), encoding) |
||
5043 | |||
5044 | |||
5045 | class SoundDTestCases(unittest.TestCase): |
||
5046 | """Test class SoundD functions. |
||
5047 | |||
5048 | test cases for abydos.phonetic.sound_d |
||
5049 | """ |
||
5050 | |||
5051 | def test_sound_d(self): |
||
5052 | """Test abydos.phonetic.sound_d.""" |
||
5053 | # Base cases |
||
5054 | self.assertEqual(sound_d(''), '0000') |
||
5055 | self.assertEqual(sound_d('', maxlength=6), '000000') |
||
5056 | |||
5057 | self.assertEqual(sound_d('knight'), '5300') |
||
5058 | self.assertEqual(sound_d('accept'), '2130') |
||
5059 | self.assertEqual(sound_d('pneuma'), '5500') |
||
5060 | self.assertEqual(sound_d('ax'), '2000') |
||
5061 | self.assertEqual(sound_d('wherever'), '6160') |
||
5062 | self.assertEqual(sound_d('pox'), '1200') |
||
5063 | self.assertEqual(sound_d('anywhere'), '5600') |
||
5064 | self.assertEqual(sound_d('adenosine'), '3525') |
||
5065 | self.assertEqual(sound_d('judge'), '2200') |
||
5066 | self.assertEqual(sound_d('rough'), '6000') |
||
5067 | self.assertEqual(sound_d('x-ray'), '2600') |
||
5068 | self.assertEqual(sound_d('acetylcholine', maxlength=None), '234245') |
||
5069 | self.assertEqual(sound_d('rough', maxlength=None), '6') |
||
5070 | |||
5071 | |||
5072 | class PSHPSoundexTestCases(unittest.TestCase): |
||
5073 | """Test PSHP Soundex functions. |
||
5074 | |||
5075 | test cases for abydos.phonetic.pshp_soundex_last & pshp_soundex_first |
||
5076 | """ |
||
5077 | |||
5078 | def test_pshp_soundex_last(self): |
||
5079 | """Test abydos.phonetic.pshp_soundex_last.""" |
||
5080 | # Base case |
||
5081 | self.assertEqual(pshp_soundex_last(''), '0000') |
||
5082 | |||
5083 | self.assertEqual(pshp_soundex_last('JAMES'), 'J500') |
||
5084 | self.assertEqual(pshp_soundex_last('JOHN'), 'J500') |
||
5085 | self.assertEqual(pshp_soundex_last('PAT'), 'P300') |
||
5086 | self.assertEqual(pshp_soundex_last('PETER'), 'P350') |
||
5087 | |||
5088 | self.assertEqual(pshp_soundex_last('Smith'), 'S530') |
||
5089 | self.assertEqual(pshp_soundex_last('van Damme'), 'D500') |
||
5090 | self.assertEqual(pshp_soundex_last('MacNeil'), 'M400') |
||
5091 | self.assertEqual(pshp_soundex_last('McNeil'), 'M400') |
||
5092 | self.assertEqual(pshp_soundex_last('Edwards'), 'A353') |
||
5093 | self.assertEqual(pshp_soundex_last('Gin'), 'J500') |
||
5094 | self.assertEqual(pshp_soundex_last('Cillian'), 'S450') |
||
5095 | self.assertEqual(pshp_soundex_last('Christopher'), 'K523') |
||
5096 | self.assertEqual(pshp_soundex_last('Carme'), 'K500') |
||
5097 | self.assertEqual(pshp_soundex_last('Knight'), 'N230') |
||
5098 | self.assertEqual(pshp_soundex_last('Phillip'), 'F410') |
||
5099 | self.assertEqual(pshp_soundex_last('Wein'), 'V500') |
||
5100 | self.assertEqual(pshp_soundex_last('Wagner', german=True), 'V255') |
||
5101 | self.assertEqual(pshp_soundex_last('Pence'), 'P500') |
||
5102 | self.assertEqual(pshp_soundex_last('Less'), 'L000') |
||
5103 | self.assertEqual(pshp_soundex_last('Simpson'), 'S525') |
||
5104 | self.assertEqual(pshp_soundex_last('Samson'), 'S250') |
||
5105 | self.assertEqual(pshp_soundex_last('Lang'), 'L500') |
||
5106 | self.assertEqual(pshp_soundex_last('Hagan'), 'H500') |
||
5107 | self.assertEqual(pshp_soundex_last('Cartes', german=True), 'K500') |
||
5108 | self.assertEqual(pshp_soundex_last('Kats', german=True), 'K000') |
||
5109 | self.assertEqual(pshp_soundex_last('Schultze', german=True), 'S400') |
||
5110 | self.assertEqual(pshp_soundex_last('Alze', german=True), 'A400') |
||
5111 | self.assertEqual(pshp_soundex_last('Galz', german=True), 'G400') |
||
5112 | self.assertEqual(pshp_soundex_last('Alte', german=True), 'A400') |
||
5113 | self.assertEqual(pshp_soundex_last('Alte', maxlength=None), 'A43') |
||
5114 | self.assertEqual(pshp_soundex_last('Altemaier', maxlength=None), |
||
5115 | 'A4355') |
||
5116 | |||
5117 | def test_pshp_soundex_first(self): |
||
5118 | """Test abydos.phonetic.pshp_soundex_first.""" |
||
5119 | # Base case |
||
5120 | self.assertEqual(pshp_soundex_first(''), '0000') |
||
5121 | |||
5122 | # Examples given in defining paper (Hershberg, et al. 1976) |
||
5123 | self.assertEqual(pshp_soundex_first('JAMES'), 'J700') |
||
5124 | self.assertEqual(pshp_soundex_first('JOHN'), 'J500') |
||
5125 | self.assertEqual(pshp_soundex_first('PAT'), 'P700') |
||
5126 | self.assertEqual(pshp_soundex_first('PETER'), 'P300') |
||
5127 | |||
5128 | # Additions for coverage |
||
5129 | self.assertEqual(pshp_soundex_first('Giles'), 'J400') |
||
5130 | self.assertEqual(pshp_soundex_first('Cy'), 'S000') |
||
5131 | self.assertEqual(pshp_soundex_first('Chris'), 'K500') |
||
5132 | self.assertEqual(pshp_soundex_first('Caleb'), 'K400') |
||
5133 | self.assertEqual(pshp_soundex_first('Knabe'), 'N100') |
||
5134 | self.assertEqual(pshp_soundex_first('Phil'), 'F400') |
||
5135 | self.assertEqual(pshp_soundex_first('Wieland'), 'V400') |
||
5136 | self.assertEqual(pshp_soundex_first('Wayne', german=True), 'V500') |
||
5137 | self.assertEqual(pshp_soundex_first('Christopher', maxlength=None), |
||
5138 | 'K5') |
||
5139 | self.assertEqual(pshp_soundex_first('Asdaananndsjsjasd', |
||
5140 | maxlength=None), 'A23553223') |
||
5141 | self.assertEqual(pshp_soundex_first('Asdaananndsjsjasd'), 'A235') |
||
5142 | |||
5143 | |||
5144 | class HenryCodeTestCases(unittest.TestCase): |
||
5145 | """Test Henry Code functions. |
||
5146 | |||
5147 | test cases for abydos.phonetic.henry_early |
||
5148 | """ |
||
5149 | |||
5150 | def test_henry_early(self): |
||
5151 | """Test abydos.phonetic.henry_early.""" |
||
5152 | # Base case |
||
5153 | self.assertEqual(henry_early(''), '') |
||
5154 | |||
5155 | # Examples from Legare 1972 paper |
||
5156 | self.assertEqual(henry_early('Descarry'), 'DKR') |
||
5157 | self.assertEqual(henry_early('Descaries'), 'DKR') |
||
5158 | self.assertEqual(henry_early('Campo'), 'KP') |
||
5159 | self.assertEqual(henry_early('Campot'), 'KP') |
||
5160 | self.assertEqual(henry_early('Gausselin'), 'GSL') |
||
5161 | self.assertEqual(henry_early('Gosselin'), 'GSL') |
||
5162 | self.assertEqual(henry_early('Bergeron'), 'BRJ') |
||
5163 | self.assertEqual(henry_early('Bergereau'), 'BRJ') |
||
5164 | self.assertEqual(henry_early('Bosseron'), 'BSR') |
||
5165 | self.assertEqual(henry_early('Cicire'), 'SSR') |
||
5166 | self.assertEqual(henry_early('Lechevalier'), 'LCV') |
||
5167 | self.assertEqual(henry_early('Chevalier'), 'CVL') |
||
5168 | self.assertEqual(henry_early('Peloy'), 'PL') |
||
5169 | self.assertEqual(henry_early('Beloy'), 'BL') |
||
5170 | self.assertEqual(henry_early('Beret'), 'BR') |
||
5171 | self.assertEqual(henry_early('Benet'), 'BN') |
||
5172 | self.assertEqual(henry_early('Turcot'), 'TRK') |
||
5173 | self.assertEqual(henry_early('Turgot'), 'TRG') |
||
5174 | self.assertEqual(henry_early('Vigier'), 'VJ') |
||
5175 | self.assertEqual(henry_early('Vigiere'), 'VJR') |
||
5176 | self.assertEqual(henry_early('Dodin'), 'DD') |
||
5177 | self.assertEqual(henry_early('Dodelin'), 'DDL') |
||
5178 | |||
5179 | # Tests to complete coverage |
||
5180 | self.assertEqual(henry_early('Anil'), 'ANL') |
||
5181 | self.assertEqual(henry_early('Emmanuel'), 'AMN') |
||
5182 | self.assertEqual(henry_early('Ainu'), 'EN') |
||
5183 | self.assertEqual(henry_early('Oeuf'), 'OF') |
||
5184 | self.assertEqual(henry_early('Yves'), 'IV') |
||
5185 | self.assertEqual(henry_early('Yo'), 'I') |
||
5186 | self.assertEqual(henry_early('Umman'), 'EM') |
||
5187 | self.assertEqual(henry_early('Omman'), 'OM') |
||
5188 | self.assertEqual(henry_early('Zoe'), 'S') |
||
5189 | self.assertEqual(henry_early('Beauchamp'), 'BCP') |
||
5190 | self.assertEqual(henry_early('Chloe'), 'KL') |
||
5191 | self.assertEqual(henry_early('Gerard'), 'JRR') |
||
5192 | self.assertEqual(henry_early('Agnes'), 'ANN') |
||
5193 | self.assertEqual(henry_early('Pinot'), 'PN') |
||
5194 | self.assertEqual(henry_early('Philo'), 'FL') |
||
5195 | self.assertEqual(henry_early('Quisling'), 'GL') |
||
5196 | self.assertEqual(henry_early('Qualite'), 'KLT') |
||
5197 | self.assertEqual(henry_early('Sainte-Marie'), 'XMR') |
||
5198 | self.assertEqual(henry_early('Saint-Jean'), 'XJ') |
||
5199 | self.assertEqual(henry_early('Ste-Marie'), 'XMR') |
||
5200 | self.assertEqual(henry_early('St-Jean'), 'XJ') |
||
5201 | self.assertEqual(henry_early('Cloe'), 'KL') |
||
5202 | self.assertEqual(henry_early('Ahch-To'), 'AKT') |
||
5203 | self.assertEqual(henry_early('Zdavros'), 'SDV') |
||
5204 | self.assertEqual(henry_early('Sdavros'), 'DVR') |
||
5205 | self.assertEqual(henry_early('Coulomb'), 'KLB') |
||
5206 | self.assertEqual(henry_early('Calm'), 'K') |
||
5207 | self.assertEqual(henry_early('Omnia'), 'ON') |
||
5208 | self.assertEqual(henry_early('Ramps'), 'RPS') |
||
5209 | self.assertEqual(henry_early('Renault'), 'RN') |
||
5210 | self.assertEqual(henry_early('Czech'), 'CSK') |
||
5211 | self.assertEqual(henry_early('Imran'), 'ER') |
||
5212 | self.assertEqual(henry_early('Christopher', maxlength=None), 'KRXF') |
||
5213 | |||
5214 | |||
5215 | class NorphoneTestCases(unittest.TestCase): |
||
5216 | """Test Norphone functions. |
||
5217 | |||
5218 | test cases for abydos.phonetic.norphone |
||
5219 | """ |
||
5220 | |||
5221 | def test_norphone(self): |
||
5222 | """Test abydos.phonetic.norphone.""" |
||
5223 | # Base case |
||
5224 | self.assertEqual(norphone(''), '') |
||
5225 | |||
5226 | # Examples given at |
||
5227 | # https://github.com/larsga/Duke/blob/master/duke-core/src/test/java/no/priv/garshol/duke/comparators/NorphoneComparatorTest.java |
||
5228 | self.assertEqual(norphone('Aarestad'), norphone('\u00C5rrestad')) |
||
5229 | self.assertEqual(norphone('Andreasen'), norphone('Andreassen')) |
||
5230 | self.assertEqual(norphone('Arntsen'), norphone('Arntzen')) |
||
5231 | self.assertEqual(norphone('Bache'), norphone('Bakke')) |
||
5232 | self.assertEqual(norphone('Frank'), norphone('Franck')) |
||
5233 | self.assertEqual(norphone('Christian'), norphone('Kristian')) |
||
5234 | self.assertEqual(norphone('Kielland'), norphone('Kjelland')) |
||
5235 | self.assertEqual(norphone('Krogh'), norphone('Krog')) |
||
5236 | self.assertEqual(norphone('Krog'), norphone('Krohg')) |
||
5237 | self.assertEqual(norphone('Jendal'), norphone('Jendahl')) |
||
5238 | self.assertEqual(norphone('Jendal'), norphone('Hjendal')) |
||
5239 | self.assertEqual(norphone('Jendal'), norphone('Gjendal')) |
||
5240 | self.assertEqual(norphone('Vold'), norphone('Wold')) |
||
5241 | self.assertEqual(norphone('Thomas'), norphone('Tomas')) |
||
5242 | self.assertEqual(norphone('Aamodt'), norphone('Aamot')) |
||
5243 | self.assertEqual(norphone('Aksel'), norphone('Axel')) |
||
5244 | self.assertEqual(norphone('Kristoffersen'), norphone('Christophersen')) |
||
5245 | self.assertEqual(norphone('Voll'), norphone('Vold')) |
||
5246 | self.assertEqual(norphone('Granli'), norphone('Granlid')) |
||
5247 | self.assertEqual(norphone('Gjever'), norphone('Giever')) |
||
5248 | self.assertEqual(norphone('Sannerhaugen'), norphone('Sanderhaugen')) |
||
5249 | self.assertEqual(norphone('Jahren'), norphone('Jaren')) |
||
5250 | self.assertEqual(norphone('Amundsrud'), norphone('Amundsr\u00F8d')) |
||
5251 | self.assertEqual(norphone('Karlson'), norphone('Carlson')) |
||
5252 | |||
5253 | # Additional tests to increase coverage |
||
5254 | self.assertEqual(norphone('Århus'), 'ÅRHS') |
||
5255 | self.assertEqual(norphone('Skyrim'), 'XRM') |
||
5256 | self.assertEqual(norphone('kyss'), 'XS') |
||
5257 | self.assertEqual(norphone('Äthelwulf'), 'ÆTLVLF') |
||
5258 | self.assertEqual(norphone('eit'), 'ÆT') |
||
5259 | self.assertEqual(norphone('Öl'), 'ØL') |
||
5260 | |||
5261 | # test cases by larsga (the algorithm's author) posted to Reddit |
||
5262 | # https://www.reddit.com/r/norge/comments/vksb5/norphone_mitt_forslag_til_en_norsk_soundex_vel/ |
||
5263 | # modified, where necessary to match the "not implemented" rules |
||
5264 | # and rule added after the Reddit post |
||
5265 | reddit_tests = (('MKLSN', ('MICHALSEN', 'MIKKELSEN', 'MIKALSEN', |
||
5266 | 'MICHAELSEN', 'MIKAELSEN', 'MICKAELSEN', |
||
5267 | 'MICHELSEN', 'MIKELSEN')), |
||
5268 | ('BRKR', ('BERGER', 'BORGERUD', 'BURGER', 'BORGER', |
||
5269 | 'BORGAR', 'BIRGER', 'BRAGER', 'BERGERUD')), |
||
5270 | ('TMS', ('TOMMAS', 'THOMAS', 'THAMS', 'TOUMAS', |
||
5271 | 'THOMMAS', 'TIMMS', 'TOMAS', 'TUOMAS')), |
||
5272 | ('HLR', ('HOLER', 'HELLERUD', 'HALLRE', 'HOLLERUD', |
||
5273 | 'HILLER', 'HALLERUD', 'HOLLER', 'HALLER')), |
||
5274 | ('MS', ('MASS', 'MMS', 'MSS', 'MOES', 'MEZZO', 'MESA', |
||
5275 | 'MESSE', 'MOSS')), |
||
5276 | ('HRST', ('HIRSTI', 'HAARSETH', 'HAARSTAD', 'HARSTAD', |
||
5277 | 'HARESTUA', 'HERSETH', 'HERSTAD', |
||
5278 | 'HERSTUA')), |
||
5279 | ('SVN', ('SWANN', 'SVENI', 'SWAN', 'SVEN', 'SVEIN', |
||
5280 | 'SVEEN', 'SVENN', 'SVANE')), |
||
5281 | ('SLT', ('SELTE', 'SALT', 'SALTE', 'SLOTT', 'SLAATTO', |
||
5282 | 'SLETT', 'SLETTA', 'SLETTE')), |
||
5283 | ('JNSN', ('JANSSEN', 'JANSEN', 'JENSEN', 'JONASSEN', |
||
5284 | 'JANSON', 'JONSON', 'JENSSEN', 'JONSSON')), |
||
5285 | ('ANRSN', ('ANDRESSEN', 'ANDERSSON', 'ANDRESEN', |
||
5286 | 'ANDREASSEN', 'ANDERSEN', 'ANDERSON', |
||
5287 | 'ANDORSEN', 'ANDERSSEN')), |
||
5288 | ('BRK', ('BREKKE', 'BORCH', 'BRAKKE', 'BORK', 'BRECKE', |
||
5289 | 'BROCH', 'BRICK', 'BRUK')), |
||
5290 | ('LN', ('LINDE', 'LENDE', 'LUND', 'LAND', 'LINDA', |
||
5291 | 'LANDE', 'LIND', 'LUNDE')), |
||
5292 | ('SF', ('SOPHIE', 'SFE', 'SEFF', 'SEAFOOD', 'SOFIE', |
||
5293 | 'SAFE', 'SOFI', 'SOPHIA')), |
||
5294 | ('BRST', ('BRUASET', 'BUERSTAD', 'BARSTAD', 'BAARSTAD', |
||
5295 | 'BRUSETH', 'BERSTAD', 'BORSTAD', 'BRUSTAD')), |
||
5296 | ('OLSN', ('OHLSSON', 'OLESEN', 'OLSSON', 'OLAUSSON', |
||
5297 | 'OLAUSEN', 'OLAUSSEN', 'OLSEN', 'OLSON')), |
||
5298 | ('MKL', ('MIKAEL', 'MICHELA', 'MEIKLE', 'MIKAL', |
||
5299 | 'MIKKEL', 'MICHEL', 'MICHAL', 'MICHAEL')), |
||
5300 | ('HR', ('HEIER', 'HAR', 'HEER', 'HARRY', 'HEIR', |
||
5301 | 'HURRE', 'HERO', 'HUURRE')), |
||
5302 | ('VLM', ('VILLUM', 'WOLLUM', 'WILLIAM', 'WILLAM', |
||
5303 | 'WALLEM', 'WILLUM', 'VALUM', 'WILMO')), |
||
5304 | ('SNS', ('SYNNES', 'SINUS', 'SNUS', 'SNEIS', 'SANNES', |
||
5305 | 'SUNAAS', 'SUNNAAS', 'SAINES')), |
||
5306 | ('SNL', ('SANDAL', 'SANDAHL', 'SUNDEL', 'SANDLI', |
||
5307 | 'SUNNDAL', 'SANDELL', 'SANDLIE', 'SUNDAL')), |
||
5308 | ('VK', ('VEKA', 'VIKA', 'WIIK', 'WOK', 'WIKE', 'WEEK', |
||
5309 | 'VIK', 'VIAK')), |
||
5310 | ('MTS', ('METSO', 'MOTHES', 'MATHIAS', 'MATHIS', |
||
5311 | 'MATTIS', 'MYTHES', 'METOS', 'MATS'))) |
||
5312 | for encoded, names in reddit_tests: |
||
5313 | for name in names: |
||
5314 | self.assertEqual(encoded, norphone(name)) |
||
5315 | |||
5316 | |||
5317 | class DolbyTestCases(unittest.TestCase): |
||
5318 | """Test Dolby functions. |
||
5319 | |||
5320 | test cases for abydos.phonetic.dolby |
||
5321 | """ |
||
5322 | |||
5323 | def test_dolby(self): |
||
5324 | """Test abydos.phonetic.dolby.""" |
||
5325 | # Base case |
||
5326 | self.assertEqual(dolby(''), '') |
||
5327 | |||
5328 | # Tests from Dolby (1970) pp. 264--274 |
||
5329 | # https://ejournals.bc.edu/ojs/index.php/ital/article/download/5259/4734 |
||
5330 | # Checked against Cunningham, et al. (1969) pp. 127--136, as needed. |
||
5331 | # https://files.eric.ed.gov/fulltext/ED029679.pdf |
||
5332 | test_cases = (('*BL', 'Abel', 'Abele', 'Abell', 'Able'), |
||
5333 | ('*BRMS', 'Abrahams', 'Abrams'), |
||
5334 | ('*BRMSN', 'Abrahamson', 'Abramson'), |
||
5335 | ('*D', 'Eddy', 'Eddie'), |
||
5336 | ('*DMNS', 'Edmonds', 'Edmunds'), |
||
5337 | ('*DMNSN', 'Edmondson', 'Edmundson'), |
||
5338 | ('*DMS', 'Adams', 'Addems'), |
||
5339 | ('*GN', 'Eagen', 'Egan', 'Eggen'), |
||
5340 | ('*GR', '!Jaeger', 'Yaeger', 'Yeager'), |
||
5341 | ('*KN', 'Aiken', 'Aikin', 'Aitken'), |
||
5342 | ('*KNS', 'Adkins', 'Akins'), |
||
5343 | ('*KR', 'Acker', 'Aker'), |
||
5344 | ('*KR', 'Eckard', 'Eckardt', 'Eckart', 'Eckert', |
||
5345 | 'Eckhardt'), |
||
5346 | ('*KS', 'Oakes', 'Oaks', 'Ochs'), |
||
5347 | ('*LBRD', 'Albright', 'Allbright'), |
||
5348 | ('*LD', 'Elliot', 'Elliott'), |
||
5349 | ('*LN', 'Allan', 'Allen', 'Allyn'), |
||
5350 | ('*LSN', 'Ohlsen', 'Olesen', 'Olsen', 'Olson', 'Olsson'), |
||
5351 | ('*LVR', 'Oliveira', 'Olivera', 'Olivero'), |
||
5352 | ('*MS', 'Ames', 'Eames'), |
||
5353 | ('*NGL', 'Engel', 'Engle', 'Ingle'), |
||
5354 | ('*NL', 'O\'Neal', 'O\'Neil', 'O\'Neill'), |
||
5355 | ('*NRS', 'Andrews', 'Andrus'), |
||
5356 | ('*NRSN', 'Andersen', 'Anderson', 'Andreasen'), |
||
5357 | ('*NS', 'Ennis', 'Enos'), |
||
5358 | # Corrected: 'Enrichsen' below was an error |
||
5359 | # (It's correct in Cunningham, et al. 1969.) |
||
5360 | ('*RKSN', 'Erichsen', 'Erickson', 'Ericson', 'Ericsson', |
||
5361 | 'Eriksen'), |
||
5362 | ('*RL', 'Earley', 'Early'), |
||
5363 | ('*RN', 'Erwin', 'Irwin'), |
||
5364 | ('*RNS', 'Aarons', 'Ahrends', 'Ahrens', 'Arens', |
||
5365 | 'Arentz', 'Arons'), |
||
5366 | ('*RS', 'Ayers', 'Ayres'), |
||
5367 | ('*RVN', 'Ervin', 'Ervine', 'Irvin', 'Irvine'), |
||
5368 | ('*RVNG', 'Erving', 'Irving'), |
||
5369 | ('*SBRN', 'Osborn', 'Osborne', 'Osbourne', 'Osburn'), |
||
5370 | ('B*D', 'Beatie', 'Beattie', 'Beatty', 'Beaty', |
||
5371 | 'Beedie'), |
||
5372 | ('B*DS', 'Betts', 'Betz'), |
||
5373 | ('B*KMN', 'Bachman', 'Bachmann', 'Backman'), |
||
5374 | ('B*L', 'Bailey', 'Baillie', 'Bailly', 'Baily', |
||
5375 | 'Bayley'), |
||
5376 | ('B*L', 'Beal', 'Beale', 'Beall', 'Biehl'), |
||
5377 | ('B*L', 'Belew', 'Ballou', 'Bellew'), |
||
5378 | ('B*L', 'Buhl', 'Buell'), |
||
5379 | ('B*L', 'Belle', 'Bell'), |
||
5380 | # Corrected: No reason for D to disappear |
||
5381 | ('B*LDN', 'Bolton', 'Boulton'), |
||
5382 | ('B*M', 'Baum', 'Bohm', 'Bohme'), |
||
5383 | ('B*MN', 'Bauman', 'Bowman'), |
||
5384 | ('B*N', 'Bain', 'Bane', 'Bayne'), |
||
5385 | ('B*ND', 'Bennet', 'Bennett'), |
||
5386 | ('B*R', 'Baer', 'Bahr', 'Baier', 'Bair', 'Bare', 'Bear', |
||
5387 | 'Beare', 'Behr', 'Beier', 'Bier', '!Bryer'), |
||
5388 | ('B*R', 'Barry', 'Beare', 'Beery', 'Berry'), |
||
5389 | ('B*R', 'Bauer', 'Baur', 'Bower'), |
||
5390 | ('B*R', 'Bird', 'Burd', 'Byrd'), |
||
5391 | ('B*RBR', 'Barbour', 'Barber'), |
||
5392 | ('B*RG', 'Berg', 'Bergh', 'Burge'), |
||
5393 | ('B*RGR', 'Berger', 'Burger'), |
||
5394 | ('B*RK', 'Boerke', 'Birk', 'Bourke', 'Burk', 'Burke'), |
||
5395 | ('B*RN', 'Burn', 'Byrne'), |
||
5396 | ('B*RNR', 'Bernard', 'Bernhard', 'Bernhardt', |
||
5397 | 'Bernhart'), |
||
5398 | ('B*RNS', 'Berns', 'Birns', 'Burns', 'Byrns', 'Byrnes'), |
||
5399 | ('B*RNSN', 'Bernstein', 'Bornstein'), |
||
5400 | # Corrected: 'RCH' -> 'RH' in rule 2 |
||
5401 | ('B*RS', 'Bertsch', '!Birch', '!Burch'), |
||
5402 | ('BL*KBRN', 'Blackburn', '!Blagburn'), |
||
5403 | ('BL*M', 'Blom', 'Bloom', 'Bluhm', 'Blum', 'Blume'), |
||
5404 | ('BR*D', 'Brode', 'Brodie', 'Brody'), |
||
5405 | ('BR*N', 'Braun', 'Brown', 'Browne'), |
||
5406 | ('BR*N', 'Brand', 'Brandt', 'Brant'), |
||
5407 | # Corrected: 'Diezt' -> 'D*SD', so reversed zt -> tz |
||
5408 | # (Correct in Cunningham, et al. 1969.) |
||
5409 | ('D*DS', 'Dietz', 'Ditz'), |
||
5410 | ('D*F', 'Duffie', 'Duffy'), |
||
5411 | ('D*GN', 'Dougan', 'Dugan', 'Duggan'), |
||
5412 | ('D*K', 'Dickey', 'Dicke'), |
||
5413 | ('D*KNSN', 'Dickenson', '!Dickerson', 'Dickinson', |
||
5414 | '!Dickison'), |
||
5415 | ('D*KSN', 'Dickson', 'Dixon', 'Dixson'), |
||
5416 | ('D*L', 'Dailey', 'Daily', 'Daley', 'Daly'), |
||
5417 | ('D*L', 'Dahl', 'Dahle', 'Dall', 'Doll'), |
||
5418 | ('D*L', 'Deahl', 'Deal', 'Diehl'), |
||
5419 | ('D*MN', 'Diamond', 'Dimond', 'Dymond'), |
||
5420 | ('D*N', 'Dean', 'Deane', 'Deen'), |
||
5421 | ('D*N', 'Denney', 'Denny'), |
||
5422 | ('D*N', 'Donahoo', 'Donahue', 'Donoho', 'Donohoe', |
||
5423 | 'Donohoo,'), |
||
5424 | ('D*N', 'Donohue', 'Dunnahoo'), |
||
5425 | ('D*N', 'Downey', 'Downie'), |
||
5426 | ('D*N', 'Dunn', 'Dunne'), |
||
5427 | ('D*NL', 'Donley', 'Donnelley', 'Donnelly'), |
||
5428 | ('D*R', 'Daugherty', 'Doherty', 'Dougherty'), |
||
5429 | ('D*R', 'Dyar', 'Dyer'), |
||
5430 | ('D*RM', 'Derham', 'Durham'), |
||
5431 | ('D*VDSN', 'Davidsen', 'Davidson', '!Davison'), |
||
5432 | ('D*VS', 'Davies', 'Davis'), |
||
5433 | ('DR*SL', 'Driscoll', 'Driskell'), |
||
5434 | ('F*', 'Fay', 'Fahay', 'Fahey'), |
||
5435 | ('F*FR', 'Fifer', 'Pfeffer', 'Pfeiffer'), |
||
5436 | ('F*GN', 'Fagan', 'Feigan', 'Fegan'), |
||
5437 | ('F*L', 'Feil', 'Pfeil'), |
||
5438 | # Corrected: T -> D after LD -> D |
||
5439 | ('F*L', 'Feld', 'Feldt', '!Felt'), |
||
5440 | ('F*LKNR', 'Faulkner', 'Falconer'), |
||
5441 | ('F*LPS', 'Philips', 'Phillips'), |
||
5442 | ('F*NGN', 'Finnegan', 'Finnigan'), |
||
5443 | ('F*NL', 'Finlay', 'Finley'), |
||
5444 | ('F*RL', 'Farrell', 'Ferrell'), |
||
5445 | ('F*RR', 'Ferrara', 'Ferreira', 'Ferriera'), |
||
5446 | # Corrected: No reason for S to be eliminated |
||
5447 | ('F*RSR', 'Foerster', 'Forester', 'Forrester', |
||
5448 | 'Forster'), |
||
5449 | ('F*RS', 'Forrest', 'Forest'), |
||
5450 | ('F*RS', 'Faris', 'Farriss', 'Ferris', 'Ferriss'), |
||
5451 | ('F*RS', 'First', 'Fuerst', 'Furst'), |
||
5452 | ('F*SR', 'Fischer', 'Fisher'), |
||
5453 | ('FL*N', 'Flinn', 'Flynn'), |
||
5454 | ('FL*NGN', 'Flanagan', 'Flanigan', 'Flannigan'), |
||
5455 | ('FR*', 'Frei', 'Frey', 'Fry', 'Frye'), |
||
5456 | ('FR*DMN', 'Freedman', 'Friedman'), |
||
5457 | # Corrected: Fredickson -> Fredrickson |
||
5458 | # (Correct in Cunningham, et al. 1969.) |
||
5459 | ('FR*DRKSN', 'Frederickson', 'Frederiksen', |
||
5460 | 'Fredrickson', 'Fredriksson'), |
||
5461 | # Corrected: NK would not reduce to K because of rule 4 |
||
5462 | ('FR*NK', 'Franck', 'Frank'), |
||
5463 | ('FR*NS', 'France', 'Frantz', 'Franz'), |
||
5464 | # Corrected: vowel deletion happens after double |
||
5465 | # consonant deletion |
||
5466 | ('FR*NSS', 'Frances', 'Francis'), |
||
5467 | ('FR*S', 'Freeze', 'Freese', 'Fries'), |
||
5468 | ('FR*SR', 'Fraser', 'Frasier', 'Frazer', 'Frazier'), |
||
5469 | ('G*D', 'Good', 'Goode'), |
||
5470 | ('G*DS', 'Getz', 'Goetz', 'Goetze'), |
||
5471 | ('G*F', 'Goff', 'Gough'), |
||
5472 | ('G*L', 'Gold', 'Goold', 'Gould'), |
||
5473 | ('G*LMR', 'Gilmer', 'Gilmore', 'Gilmour'), |
||
5474 | ('G*LR', 'Gallagher', 'Gallaher', 'Galleher'), |
||
5475 | ('G*MS', 'Gomes', 'Gomez'), |
||
5476 | ('G*NR', 'Guenther', 'Gunther'), |
||
5477 | ('G*NSLS', 'Gonzales', 'Gonzalez'), |
||
5478 | # Corrected: 'Consalves' for the first case likely a typo |
||
5479 | # (Correct in Cunningham, et al. 1969.) |
||
5480 | ('G*NSLVS', 'Gonsalves', 'Gonzalves'), |
||
5481 | ('G*RD', 'Garratt', 'Garrett'), |
||
5482 | ('G*RD', 'Garrity', 'Geraghty', 'Geraty', 'Gerrity'), |
||
5483 | ('G*RN', 'Gorden', 'Gordohn', 'Gordon'), |
||
5484 | ('G*RNR', 'Gardiner', 'Gardner', 'Gartner'), |
||
5485 | ('G*RR', 'Garrard', 'Gerard', 'Gerrard', 'Girard'), |
||
5486 | ('G*S', 'Gauss', 'Goss'), |
||
5487 | ('GR*', 'Gray', 'Grey'), |
||
5488 | ('GR*FD', 'Griffeth', 'Griffith'), |
||
5489 | ('GR*N', 'Green', 'Greene'), |
||
5490 | ('GR*S', 'Gros', 'Grose', 'Gross'), |
||
5491 | ('H*D', 'Hyde', 'Heidt'), |
||
5492 | ('H*F', 'Hoff', 'Hough', 'Huff'), |
||
5493 | ('H*FMN', 'Hoffman', 'Hoffmann', 'Hofman', 'Hofmann', |
||
5494 | 'Huffman'), |
||
5495 | ('H*G', 'Hoag', 'Hoge', 'Hogue'), |
||
5496 | ('H*GN', 'Hagan', 'Hagen'), |
||
5497 | ('H*K', 'Hauch', 'Hauck', 'Hauk', 'Hauke'), |
||
5498 | ('H*KSN', 'Hutcheson', 'Hutchison'), |
||
5499 | ('H*L', 'Holley', 'Holly'), |
||
5500 | ('H*L', 'Holl', 'Hall'), |
||
5501 | ('H*L', 'Halley', 'Haley'), |
||
5502 | ('H*L', 'Haile', 'Hale'), |
||
5503 | ('H*LD', 'Holiday', 'Halliday', 'Holladay', 'Holliday'), |
||
5504 | ('H*LG', 'Helwig', 'Hellwig'), |
||
5505 | ('H*LM', 'Holm', '!Home'), |
||
5506 | ('H*LMS', 'Holmes', '!Homes'), |
||
5507 | ('H*LN', 'Highland', 'Hyland'), |
||
5508 | ('H*M', 'Ham', 'Hamm'), |
||
5509 | ('H*MR', 'Hammar', 'Hammer'), |
||
5510 | ('H*N', 'Hanna', 'Hannah'), |
||
5511 | ('H*N', 'Hahn', 'Hahne', 'Hann', 'Haun'), |
||
5512 | ('H*NN', 'Hanan', 'Hannan', 'Hannon'), |
||
5513 | ('H*NRKS', 'Hendricks', 'Hendrix', 'Henriques'), |
||
5514 | ('H*NRKSN', 'Hendrickson', 'Henriksen', 'Henrikson'), |
||
5515 | ('H*NS', 'Heintz', 'Heinz', 'Heinze', 'Hindes', 'Hinds', |
||
5516 | 'Hines', 'Hinze'), |
||
5517 | ('H*NS', 'Haines', 'Haynes'), |
||
5518 | ('H*NSN', 'Henson', 'Hansen', 'Hanson', 'Hanssen', |
||
5519 | 'Hansson', 'Hanszen'), |
||
5520 | ('H*R', 'Herd', 'Heard', 'Hird', 'Hurd'), |
||
5521 | ('H*R', 'Hart', 'Hardt', 'Harte', 'Heart'), |
||
5522 | ('H*R', 'Hare', 'Hair'), |
||
5523 | ('H*R', 'Hardey', 'Hardie', 'Hardy'), |
||
5524 | ('H*RMN', 'Hartman', 'Hardmen', 'Hardman', 'Hartmann'), |
||
5525 | ('H*RMN', 'Herman', 'Hermann', 'Herrmann'), |
||
5526 | ('H*RMN', 'Harman', 'Harmon'), |
||
5527 | ('H*RN', 'Heron', 'Herrin', 'Herron'), |
||
5528 | ('H*RN', 'Hardin', 'Harden'), |
||
5529 | ('H*RN', 'Horn', 'Horne'), |
||
5530 | ('H*RNGDN', 'Herrington', 'Harrington'), |
||
5531 | ('H*S', 'Haas', 'Haase', 'Hasse'), |
||
5532 | ('H*S', 'Howes', 'House', 'Howse'), |
||
5533 | ('H*S', 'Hays', 'Hayes'), |
||
5534 | ('H*SN', 'Houston', 'Huston'), |
||
5535 | ('H*VR', 'Hoover', 'Hover'), |
||
5536 | ('J*', 'Jew', 'Jue'), |
||
5537 | ('J*FR', 'Jeffery', 'Jeffrey'), |
||
5538 | ('J*FRS', 'Jefferies', 'Jefferis', 'Jefferys', |
||
5539 | 'Jeffreys'), |
||
5540 | ('J*KB', 'Jacobi', 'Jacoby'), |
||
5541 | ('J*KBSN', 'Jacobsen', 'Jacobson', 'Jackobsen'), |
||
5542 | ('J*KS', 'Jacques', 'Jacks', 'Jaques'), |
||
5543 | ('J*L', 'Jewell', 'Juhl'), |
||
5544 | ('J*MS', 'Jaimes', 'James'), |
||
5545 | ('J*MSN', 'Jameson', 'Jamieson', 'Jamison'), |
||
5546 | ('J*NSN', 'Jahnsen', 'Jansen', 'Jansohn', 'Janssen', |
||
5547 | 'Jansson,', 'Janzen', 'Jensen', 'Jenson'), |
||
5548 | ('J*S', 'Joice', 'Joyce'), |
||
5549 | ('K*', 'Kay', 'Kaye'), |
||
5550 | ('K*F', 'Coffee', 'Coffey'), |
||
5551 | ('K*FMN', 'Coffman', 'Kauffman', 'Kaufman', 'Kaufmann'), |
||
5552 | ('K*K', 'Cook', 'Cooke', 'Koch', 'Koche'), |
||
5553 | ('K*K', 'Cook', 'Cooke', 'Koch', 'Koche'), |
||
5554 | ('K*L', 'Cole', 'Kohl', 'Koll'), |
||
5555 | ('K*L', 'Cole', 'Kohl', 'Koll'), |
||
5556 | ('K*L', 'Kelley', 'Kelly'), |
||
5557 | ('K*LMN', 'Coleman', 'Colman'), |
||
5558 | ('K*LR', 'Koehler', 'Koeller', 'Kohler', 'Koller'), |
||
5559 | ('K*MBRLN', 'Chamberlain', 'Chamberlin'), |
||
5560 | ('K*MBS', 'Combs', 'Coombes', 'Coombs'), |
||
5561 | ('K*MP', 'Camp', 'Kampe', 'Kampf'), |
||
5562 | ('K*MPS', 'Campos', 'Campus'), |
||
5563 | ('K*N', 'Cahn', 'Conn', 'Kahn'), |
||
5564 | ('K*N', 'Cahen', 'Cain', 'Caine', 'Cane', 'Kain', |
||
5565 | 'Kane'), |
||
5566 | ('K*N', 'Chin', 'Chinn'), |
||
5567 | ('K*N', 'Chaney', 'Cheney'), |
||
5568 | ('K*N', 'Coen', 'Cohan', 'Cohen', 'Cohn', 'Cone', |
||
5569 | 'Koehn', 'Kahn'), |
||
5570 | ('K*N', 'Coon', 'Kuhn', 'Kuhne'), |
||
5571 | ('K*N', 'Kenney', 'Kenny', 'Kinney'), |
||
5572 | ('K*NL', 'Conley', 'Conly', 'Connelly', 'Connolly'), |
||
5573 | ('K*NR', 'Conner', 'Connor'), |
||
5574 | ('K*NS', 'Coons', 'Koontz', 'Kuhns', 'Kuns', 'Kuntz', |
||
5575 | 'Kunz'), |
||
5576 | ('K*P', 'Coop', 'Co-op', 'Coope', 'Coupe', 'Koop'), |
||
5577 | ('K*PL', 'Chapel', 'Chapell', 'Chappel', 'Chappell', |
||
5578 | 'Chappelle', 'Chapple'), |
||
5579 | ('K*R', 'Carrie', 'Carey', 'Cary'), |
||
5580 | ('K*R', 'Corey', 'Cory'), |
||
5581 | ('K*R', 'Carr', 'Kar', 'Karr'), |
||
5582 | # Corrected: No reason to strip S |
||
5583 | ('K*RS', 'Kurtz', 'Kurz'), |
||
5584 | ('K*R', 'Kehr', 'Ker', 'Kerr'), |
||
5585 | ('K*RD', 'Cartwright', 'Cortright'), |
||
5586 | # Corrected: No reason to strip D |
||
5587 | ('K*RLDN', 'Carleton', 'Carlton'), |
||
5588 | # Corrected: CE -> SE |
||
5589 | ('K*RN', 'Carney', '!Cerney', 'Kearney'), |
||
5590 | # Corrected: RC -> R |
||
5591 | ('K*RSNR', 'Kirschner', '!Kirchner'), |
||
5592 | ('K*S', 'Chace', 'Chase'), |
||
5593 | ('K*S', 'Cass', 'Kass'), |
||
5594 | ('K*S', 'Kees', 'Keyes', 'Keys'), |
||
5595 | ('K*SL', 'Cassel', 'Cassell', 'Castle'), |
||
5596 | ('K*SLR', 'Kesler', 'Kessler', 'Kestler'), |
||
5597 | ('K*SR', 'Kaiser', 'Kayser', 'Keizer', 'Keyser', |
||
5598 | 'Kieser', 'Kiser', 'Kizer'), |
||
5599 | ('KL*N', 'Cline', 'Klein', 'Kleine', 'Kline'), |
||
5600 | ('KL*RK', 'Clark', 'Clarke'), |
||
5601 | ('KL*SN', 'Claussen', 'Clausen', 'Clawson', 'Closson'), |
||
5602 | ('KR*', 'Crow', 'Crowe'), |
||
5603 | ('KR*GR', 'Krieger', 'Kroeger', 'Krueger', 'Kruger'), |
||
5604 | ('KR*MR', 'Creamer', 'Cramer', 'Kraemer', 'Kramer', |
||
5605 | 'Kremer'), |
||
5606 | ('KR*N', 'Craine', 'Crane'), |
||
5607 | ('KR*S', 'Christie', 'Christy', 'Kristee'), |
||
5608 | ('KR*S', 'Crouss', 'Kraus', 'Krausch', 'Krause', |
||
5609 | 'Krouse'), |
||
5610 | ('KR*S', 'Cross', 'Krost'), |
||
5611 | ('KR*S', 'Crews', 'Cruz', 'Kruse'), |
||
5612 | ('KR*SNSN', 'Christensen', 'Christiansen', |
||
5613 | 'Christianson'), |
||
5614 | ('L*', 'Loe', 'Loewe', 'Low', 'Lowe'), |
||
5615 | ('L*', 'Lea', 'Lee', '!Leigh'), |
||
5616 | ('L*D', 'Lloyd', 'Loyd'), |
||
5617 | ('L*DL', 'Litle', 'Littell', 'Little', 'Lytle'), |
||
5618 | ('L*DRMN', 'Ledterman', 'Letterman'), |
||
5619 | ('L*K', 'Leach', 'Leech', 'Leitch'), |
||
5620 | ('L*KS', 'Lucas', 'Lukas'), |
||
5621 | ('L*LN', 'Laughlin', 'Loughlin'), |
||
5622 | ('L*LR', 'Lawler', 'Lawlor'), |
||
5623 | ('L*MB', 'Lamb', '!Lamm'), |
||
5624 | ('L*MN', 'Lemen', 'Lemmon', 'Lemon'), |
||
5625 | ('L*MN', 'Layman', 'Lehman', 'Lehmann'), |
||
5626 | ('L*N', 'Lind', 'Lynd', 'Lynde'), |
||
5627 | ('L*N', 'Lion', 'Lyon'), |
||
5628 | ('L*N', 'Lin', 'Linn', 'Lynn', 'Lynne'), |
||
5629 | # Corrected: NG -> NG (!N) |
||
5630 | ('L*N', 'Lain', 'Laine', '!Laing', 'Lane', 'Layne'), |
||
5631 | ('L*NG', 'Lang', 'Lange'), |
||
5632 | ('L*NN', 'London', 'Lundin'), |
||
5633 | ('L*NS', 'Lindsay', 'Lindsey', '!Lindsley', '!Linsley'), |
||
5634 | ('L*R', 'Lawry', 'Lowery', 'Lowrey', 'Lowry'), |
||
5635 | ('L*RNS', 'Lawrence', 'Lowrance'), |
||
5636 | ('L*RNS', 'Laurence', 'Lawrance', 'Lawrence', 'Lorence', |
||
5637 | 'Lorenz'), |
||
5638 | ('L*RSN', 'Larsen', 'Larson'), |
||
5639 | ('L*S', 'Lewis', 'Louis', 'Luis', 'Luiz'), |
||
5640 | ('L*S', 'Lacey', 'Lacy'), |
||
5641 | ('L*SR', '!Leicester', 'Lester'), |
||
5642 | ('L*V', 'Levey', 'Levi', 'Levy'), |
||
5643 | ('L*VD', 'Leavett', 'Leavitt', 'Levit'), |
||
5644 | ('L*VL', 'Lavell', 'Lavelle', 'Leavelle', 'Loveall', |
||
5645 | 'Lovell'), |
||
5646 | ('L*VN', 'Lavin', 'Levin', 'Levine'), |
||
5647 | ('M*D', 'Mead', 'Meade'), |
||
5648 | # Corrected: RT*N -> R*N -> RN |
||
5649 | ('M*RN', '!Moretton', 'Morton'), |
||
5650 | ('M*DS', 'Mathews', 'Matthews'), |
||
5651 | ('M*DSN', 'Madison', 'Madsen', 'Matson', 'Matteson', |
||
5652 | 'Mattison', 'Mattson'), |
||
5653 | ('M*KL', 'Michael', 'Michel'), |
||
5654 | ('M*KM', 'Meacham', 'Mechem'), |
||
5655 | # Corrected: RQ*S -> RKS, not KS |
||
5656 | ('M*RKS', 'Marques', 'Marquez', 'Marquis', 'Marquiss'), |
||
5657 | # Corrected: RKS does not compress to KS |
||
5658 | ('M*RKS', 'Marcks', 'Marks', 'Marx'), |
||
5659 | ('M*LN', 'Maloney', 'Moloney', 'Molony'), |
||
5660 | ('M*LN', 'Mullan', 'Mullen', 'Mullin'), |
||
5661 | ('M*LR', 'Mallery', 'Mallory'), |
||
5662 | ('M*LR', 'Moeller', 'Moller', 'Mueller', 'Muller'), |
||
5663 | ('M*LR', 'Millar', 'Miller'), |
||
5664 | ('M*LS', 'Miles', 'Myles'), |
||
5665 | ('M*N', 'Mahan', 'Mann'), |
||
5666 | ('M*NR', 'Miner', 'Minor'), |
||
5667 | ('M*NR', 'Monroe', 'Munro'), |
||
5668 | ('M*NSN', 'Monson', 'Munson'), |
||
5669 | ('M*R', 'Murray', 'Murrey'), |
||
5670 | ('M*R', 'Maher', 'Maier', 'Mayer'), |
||
5671 | ('M*R', 'Mohr', 'Moor', 'Moore'), |
||
5672 | # Corrected: No reason to eliminate final S |
||
5673 | ('M*RS', 'Meyers', 'Myers'), |
||
5674 | ('M*R', 'Meier', 'Meyer', 'Mieir', 'Myhre'), |
||
5675 | ('M*RF', 'Murphey', 'Murphy'), |
||
5676 | ('M*RL', 'Merrell', 'Merrill'), |
||
5677 | ('M*RN', 'Marten', 'Martin', 'Martine', 'Martyn'), |
||
5678 | ('M*RS', 'Meyers', 'Myers'), |
||
5679 | ('M*RS', 'Maurice', 'Morris', 'Morse'), |
||
5680 | ('MK*', 'McCoy', 'McCaughey'), |
||
5681 | ('MK*', 'Magee', 'McGee', 'McGehee', 'McGhie'), |
||
5682 | ('MK*', 'Mackey', 'MacKay', 'Mackie', 'McKay'), |
||
5683 | ('MK*', 'McCue', '!McHugh'), |
||
5684 | ('MK*L', 'Magill', 'McGill'), |
||
5685 | ('MK*LF', 'McCollough', '!McCullah', 'McCullough'), |
||
5686 | ('MK*LM', 'McCallum', 'McCollum', 'McColm'), |
||
5687 | ('MK*N', 'McKenney', 'McKinney'), |
||
5688 | ('MK*NR', 'Macintyre', 'McEntire', 'Mcintire', |
||
5689 | 'Mcintyre'), |
||
5690 | ('MK*NS', 'MacKenzie', 'McKenzie'), |
||
5691 | ('MK*NS', 'Maginnis', 'McGinnis', 'McGuinness', |
||
5692 | 'Mcinnes', 'Mcinnis'), |
||
5693 | ('MK*R', 'Maguire', 'McGuire'), |
||
5694 | ('MK*R', 'McCarthy', 'McCarty'), |
||
5695 | ('MKD*NL', 'MacDonald', 'McDonald', 'McDonnell'), |
||
5696 | ('MKF*RLN', 'MacFarland', 'MacFarlane', 'McFarland', |
||
5697 | 'McFarlane'), |
||
5698 | ('MKF*RSN', 'MacPherson', 'McPherson'), |
||
5699 | ('MKL*D', 'MacLeod', 'McCloud', 'McLeod'), |
||
5700 | ('MKL*KLN', 'MacLachlan', 'Maclachlin', 'McLachlan', |
||
5701 | '!McLaughlin,', '!McLoughlin'), |
||
5702 | ('MKL*LN', 'McClellan', 'McClelland', 'McLellan'), |
||
5703 | ('MKL*N', 'McClain', 'McClaine', 'McLain', 'McLane'), |
||
5704 | ('MKL*N', 'MacLean', 'McClean', 'McLean'), |
||
5705 | ('MKL*S', 'McCloskey', 'McClosky', 'McCluskey'), |
||
5706 | ('MKM*LN', 'MacMillan', 'McMillan', 'McMillin'), |
||
5707 | ('MKN*L', 'MacNeal', 'McNeal', 'McNeil', 'McNeill'), |
||
5708 | ('MKR*D', 'Magrath', 'McGrath'), |
||
5709 | ('N*KL', 'Nichol', 'Nicholl', 'Nickel', 'Nickle', |
||
5710 | 'Nicol', 'Nicoll'), |
||
5711 | ('N*KLS', 'Nicholls', 'Nichols', 'Nickels', 'Nickles', |
||
5712 | 'Nicols'), |
||
5713 | ('N*KLS', 'Nicholas', 'Nicolas'), |
||
5714 | ('N*KLSN', 'Nicholsen', 'Nicholson', 'Nicolaisen', |
||
5715 | 'Nicolson'), |
||
5716 | ('N*KSN', 'Nickson', 'Nixon'), |
||
5717 | ('N*L', 'Neal', 'Neale', 'Neall', 'Neel', 'Neil', |
||
5718 | 'Neill'), |
||
5719 | ('N*LSN', 'Neilsen', 'Neilson', 'Nelsen', 'Nelson', |
||
5720 | 'Nielsen', 'Nielson,', 'Nilson', 'Nilssen', 'Nilsson'), |
||
5721 | ('N*MN', 'Neumann', 'Newman'), |
||
5722 | ('N*RS', 'Norris', 'Nourse'), |
||
5723 | ('N*SBD', 'Nesbit', 'Nesbitt', 'Nisbet'), |
||
5724 | ('P*D', 'Pettee', 'Petty'), |
||
5725 | ('P*DRSN', 'Peterson', 'Pederson', 'Pedersen', |
||
5726 | 'Petersen', 'Petterson'), |
||
5727 | ('P*G', 'Page', 'Paige'), |
||
5728 | ('P*LK', 'Polak', 'Pollack', 'Pollak', 'Pollock'), |
||
5729 | ('P*LSN', 'Polson', 'Paulsen', 'Paulson', 'Poulsen', |
||
5730 | 'Poulsson'), |
||
5731 | ('P*N', 'Paine', 'Payn', 'Payne'), |
||
5732 | ('P*R', 'Parry', 'Perry'), |
||
5733 | ('P*R', 'Parr', 'Paar'), |
||
5734 | ('P*RK', 'Park', 'Parke'), |
||
5735 | ('P*RKS', 'Parks', 'Parkes'), |
||
5736 | # Corrected: RC -> R |
||
5737 | ('P*R', 'Pierce', 'Pearce', 'Peirce', '!Piers'), |
||
5738 | ('P*RS', 'Parish', 'Parrish'), |
||
5739 | ('P*RS', 'Paris', 'Parris'), |
||
5740 | ('P*RSN', 'Pierson', 'Pearson', 'Pehrson', 'Peirson'), |
||
5741 | ('PR*KR', 'Prichard', 'Pritchard'), |
||
5742 | ('PR*NS', 'Prince', 'Prinz'), |
||
5743 | ('PR*R', 'Prior', 'Pryor'), |
||
5744 | ('R*', 'Roe', 'Rowe'), |
||
5745 | ('R*', 'Rae', 'Ray', 'Raye', 'Rea', 'Rey', 'Wray'), |
||
5746 | ('R*BNSN', 'Robinson', '!Robison'), |
||
5747 | ('R*D', 'Rothe', 'Roth'), |
||
5748 | ('R*D', 'Rudd', 'Rood', 'Rude'), |
||
5749 | ('R*D', 'Reed', 'Read', 'Reade', 'Reid'), |
||
5750 | ('R*DR', 'Rider', 'Ryder'), |
||
5751 | ('R*DS', 'Rhoades', 'Rhoads', 'Rhodes'), |
||
5752 | ('R*GN', 'Regan', 'Ragon', 'Reagan'), |
||
5753 | # Corrected: No reason to drop final S |
||
5754 | ('R*GRS', 'Rodgers', 'Rogers'), |
||
5755 | ('R*K', 'Richey', 'Ritchey', 'Ritchie'), |
||
5756 | ('R*K', 'Reich', 'Reiche'), |
||
5757 | ('R*KR', 'Reichardt', 'Richert', 'Rickard'), |
||
5758 | ('R*L', 'Reilley', 'Reilly', 'Reilli', 'Riley'), |
||
5759 | # Corrected: T -> D |
||
5760 | ('R*MNGDN', 'Remington', 'Rimington'), |
||
5761 | ('R*MR', 'Reamer', 'Reimer', 'Riemer', 'Rimmer'), |
||
5762 | ('R*MS', 'Ramsay', 'Ramsey'), |
||
5763 | ('R*N', 'Rhein', 'Rhine', 'Ryan'), |
||
5764 | ('R*NR', 'Reinhard', 'Reinhardt', 'Reinhart', |
||
5765 | 'Rhinehart', 'Rinehart'), |
||
5766 | ('R*S', 'Reas', 'Reece', 'Rees', 'Reese', 'Reis', |
||
5767 | 'Reiss', 'Ries'), |
||
5768 | ('R*S', '!Rauch', 'Rausch', '!Roach', '!Roche', 'Roush'), |
||
5769 | ('R*S', 'Rush', 'Rusch'), |
||
5770 | ('R*S', 'Russ', 'Rus'), |
||
5771 | ('R*VS', 'Reaves', 'Reeves'), |
||
5772 | ('S*BR', 'Seibert', 'Siebert'), |
||
5773 | ('S*FL', 'Schofield', 'Scofield'), |
||
5774 | ('S*FN', 'Stefan', 'Steffan', 'Steffen', 'Stephan', |
||
5775 | 'Stephen'), |
||
5776 | ('S*FNS', 'Steffens', 'Stephens', '!Stevens'), |
||
5777 | ('S*FNSN', 'Steffensen', 'Steffenson', 'Stephenson', |
||
5778 | '!Stevenson'), |
||
5779 | ('S*FR', 'Schaefer', 'Schaeffer', 'Schafer', 'Schaffer', |
||
5780 | 'Schafer,', 'Shaffer', 'Sheaffer'), |
||
5781 | ('S*FR', 'Stauffer', 'Stouffer'), |
||
5782 | ('S*GL', 'Siegal', 'Sigal'), |
||
5783 | ('S*GLR', 'Sigler', 'Ziegler'), |
||
5784 | ('S*K', 'Schuck', 'Shuck'), |
||
5785 | ('S*KS', 'Sachs', 'Sacks', 'Saks', 'Sax', 'Saxe'), |
||
5786 | ('S*L', 'Seeley', 'Seely', 'Seley'), |
||
5787 | ('S*L', 'Schell', 'Shell'), |
||
5788 | ('S*LR', 'Schuler', 'Schuller'), |
||
5789 | # Corrected: LD -> L precedes T -> D |
||
5790 | ('S*LDS', 'Schultz', 'Schultze', '!Schulz', '!Schulze', |
||
5791 | 'Shults', 'Shultz'), |
||
5792 | ('S*LV', 'Silva', 'Sylva'), |
||
5793 | ('S*LVR', 'Silveira', 'Silvera', 'Silveria'), |
||
5794 | ('S*MKR', 'Schomaker', 'Schumacher', 'Schumaker', |
||
5795 | 'Shoemaker,', 'Shumaker'), |
||
5796 | ('S*MN', 'Simon', 'Symon'), |
||
5797 | ('S*MN', 'Seaman', 'Seemann', 'Semon'), |
||
5798 | ('S*MRS', 'Somers', 'Sommars', 'Sommers', 'Summers'), |
||
5799 | ('S*MS', 'Simms', 'Sims'), |
||
5800 | ('S*N', 'Stein', 'Stine'), |
||
5801 | ('S*N', 'Sweeney', 'Sweeny', 'Sweney'), |
||
5802 | ('S*NR', 'Senter', 'Center'), |
||
5803 | ('S*NRS', 'Sanders', 'Saunders'), |
||
5804 | ('S*PR', 'Shepard', '!Shephard', '!Shepheard', |
||
5805 | '!Shepherd', 'Sheppard'), |
||
5806 | ('S*R', 'Stahr', 'Star', 'Starr'), |
||
5807 | ('S*R', 'Stewart', 'Stuart'), |
||
5808 | ('S*R', 'Storey', 'Story'), |
||
5809 | ('S*R', 'Saier', 'Sayre'), |
||
5810 | # Corrected: No reason to strip final S |
||
5811 | ('S*RS', 'Schwartz', 'Schwarz', 'Schwarze', 'Swartz'), |
||
5812 | ('S*RL', 'Schirle', 'Shirley'), |
||
5813 | ('S*RLNG', 'Sterling', 'Stirling'), |
||
5814 | ('S*RMN', 'Scheuermann', 'Schurman', 'Sherman'), |
||
5815 | ('S*RN', 'Stearn', 'Stern'), |
||
5816 | ('S*RR', 'Scherer', 'Shearer', 'Sharer', 'Sherer', |
||
5817 | 'Sheerer'), |
||
5818 | ('S*S', 'Sousa', 'Souza'), |
||
5819 | ('SM*D', 'Smith', 'Smyth', 'Smythe'), |
||
5820 | ('SM*D', 'Schmid', 'Schmidt', 'Schmit', 'Schmitt', |
||
5821 | 'Smit'), |
||
5822 | ('SN*DR', 'Schneider', 'Schnieder', 'Snaider', 'Snider', |
||
5823 | 'Snyder'), |
||
5824 | ('SN*L', 'Schnell', 'Snell'), |
||
5825 | ('SP*LNG', 'Spalding', 'Spaulding'), |
||
5826 | ('SP*R', 'Spear', 'Speer', '!Speirer'), |
||
5827 | # Corrected: No reason to strip final S |
||
5828 | ('SP*RS', 'Spears', 'Speers'), |
||
5829 | ('SR*DR', 'Schroder', 'Schroeder', 'Schroeter'), |
||
5830 | ('SR*DR', 'Schrader', 'Shrader'), |
||
5831 | # Corrected: Everywhere else, rule 3 applies to char 1 |
||
5832 | ('D*D', 'Tait', 'Tate'), |
||
5833 | ('D*MSN', 'Thomason', '!Thompson', 'Thomsen', 'Thomson', |
||
5834 | 'Tomson'), |
||
5835 | ('D*RL', 'Terrel', 'Terrell', 'Terrill'), |
||
5836 | ('DR*S', 'Tracey', 'Tracy'), |
||
5837 | ('V*L', 'Vail', 'Vaile', 'Vale'), |
||
5838 | ('V*L', 'Valley', 'Valle'), |
||
5839 | ('V*R', 'Vieira', 'Vierra'), |
||
5840 | ('W*D', 'White', 'Wight'), |
||
5841 | ('W*DKR', 'Whitacre', 'Whitaker', 'Whiteaker', |
||
5842 | 'Whittaker'), |
||
5843 | ('W*DL', 'Whiteley', 'Whitley'), |
||
5844 | ('W*DMN', 'Whitman', 'Wittman'), |
||
5845 | ('W*DR', 'Woodard', 'Woodward'), |
||
5846 | ('W*DRS', 'Waters', 'Watters'), |
||
5847 | ('W*GNR', 'Wagener', 'Waggener', 'Wagoner', 'Wagner', |
||
5848 | 'Wegner,', 'Waggoner'), |
||
5849 | ('W*L', 'Willey', 'Willi'), |
||
5850 | ('W*L', 'Wiley', 'Wylie'), |
||
5851 | ('W*L', 'Wahl', 'Wall'), |
||
5852 | ('W*LBR', 'Wilber', 'Wilbur'), |
||
5853 | ('W*LF', 'Wolf', 'Wolfe', 'Wolff', 'Woolf', 'Woulfe', |
||
5854 | 'Wulf', 'Wulff'), |
||
5855 | ('W*LKNS', 'Wilkens', 'Wilkins'), |
||
5856 | ('W*LKS', 'Wilkes', 'Wilks'), |
||
5857 | ('W*LN', 'Whalen', 'Whelan'), |
||
5858 | # Corrected: LD -> L precedes T -> D |
||
5859 | ('W*LDR', 'Walter', 'Walther', 'Wolter'), |
||
5860 | ('W*LDRS', 'Walters', 'Walthers', 'Wolters'), |
||
5861 | ('W*LS', 'Wallace', 'Wallis'), |
||
5862 | ('W*LS', 'Welch', 'Welsh'), |
||
5863 | ('W*LS', 'Welles', 'Wells'), |
||
5864 | ('W*LSN', 'Willson', 'Wilson'), |
||
5865 | ('W*N', 'Winn', 'Wynn', 'Wynne'), |
||
5866 | ('W*R', 'Worth', 'Wirth'), |
||
5867 | ('W*R', 'Ware', 'Wear', 'Weir', 'Wier'), |
||
5868 | ('W*RL', 'Wehrle', 'Wehrlie', 'Werle', 'Worley'), |
||
5869 | ('W*RNR', 'Warner', 'Werner'), |
||
5870 | ('W*S', 'Weis', 'Weiss', 'Wiese', 'Wise', 'Wyss'), |
||
5871 | ('W*SMN', 'Weismann', 'Weissman', 'Weseman', 'Wiseman,', |
||
5872 | 'Wismonn', 'Wissman')) |
||
5873 | |||
5874 | for tests in test_cases: |
||
5875 | result, names = tests[0], tests[1:] |
||
5876 | for name in names: |
||
5877 | if name[0] == '!': |
||
5878 | self.assertNotEqual(result, dolby(name[1:])) |
||
5879 | else: |
||
5880 | self.assertEqual(result, dolby(name)) |
||
5881 | |||
5882 | |||
5883 | class PhoneticSpanishTestCases(unittest.TestCase): |
||
5884 | """Test PhoneticSpanish functions. |
||
5885 | |||
5886 | test cases for abydos.phonetic.phonetic_spanish |
||
5887 | """ |
||
5888 | |||
5889 | def test_phonetic_spanish(self): |
||
5890 | """Test abydos.phonetic.phonetic_spanish.""" |
||
5891 | # Base case |
||
5892 | self.assertEqual(phonetic_spanish(''), '') |
||
5893 | |||
5894 | # Examples given in |
||
5895 | self.assertEqual(phonetic_spanish('Giraldo'), '8953') |
||
5896 | self.assertEqual(phonetic_spanish('Jiraldo'), '8953') |
||
5897 | self.assertEqual(phonetic_spanish('Halla'), '25') |
||
5898 | self.assertEqual(phonetic_spanish('Haya'), '25') |
||
5899 | self.assertEqual(phonetic_spanish('Cielo'), '45') |
||
5900 | self.assertEqual(phonetic_spanish('Sielo'), '45') |
||
5901 | |||
5902 | # Test to maximize coverage |
||
5903 | self.assertEqual(phonetic_spanish('Giraldo', maxlength=2), '89') |
||
5904 | |||
5905 | |||
5906 | class SpanishMetaphoneTestCases(unittest.TestCase): |
||
5907 | """Test Spanish Metaphone functions. |
||
5908 | |||
5909 | test cases for abydos.phonetic.spanish_metaphone |
||
5910 | """ |
||
5911 | |||
5912 | def test_spanish_metaphone(self): |
||
5913 | """Test abydos.phonetic.spanish_metaphone.""" |
||
5914 | # Base case |
||
5915 | self.assertEqual(spanish_metaphone(''), '') |
||
5916 | |||
5917 | # Examples given in |
||
5918 | # https://github.com/amsqr/Spanish-Metaphone/blob/master/phonetic_algorithms_es.py |
||
5919 | self.assertEqual(spanish_metaphone('X'), 'X') |
||
5920 | self.assertEqual(spanish_metaphone('xplosion'), 'EXPLSN') |
||
5921 | self.assertEqual(spanish_metaphone('escalera'), 'ESKLR') |
||
5922 | self.assertEqual(spanish_metaphone('scalera'), 'ESKLR') |
||
5923 | self.assertEqual(spanish_metaphone('mi'), 'M') |
||
5924 | self.assertEqual(spanish_metaphone('tu'), 'T') |
||
5925 | self.assertEqual(spanish_metaphone('su'), 'S') |
||
5926 | self.assertEqual(spanish_metaphone('te'), 'T') |
||
5927 | self.assertEqual(spanish_metaphone('ochooomiiiillllllll'), 'OXMYY') |
||
5928 | self.assertEqual(spanish_metaphone('complicado'), 'KMPLKD') |
||
5929 | self.assertEqual(spanish_metaphone('ácaro'), 'AKR') |
||
5930 | self.assertEqual(spanish_metaphone('ácido'), 'AZD') |
||
5931 | self.assertEqual(spanish_metaphone('clown'), 'KLUN') |
||
5932 | self.assertEqual(spanish_metaphone('down'), 'DUN') |
||
5933 | self.assertEqual(spanish_metaphone('col'), 'KL') |
||
5934 | self.assertEqual(spanish_metaphone('clon'), 'KLN') |
||
5935 | self.assertEqual(spanish_metaphone('waterpolo'), 'UTRPL') |
||
5936 | self.assertEqual(spanish_metaphone('aquino'), 'AKN') |
||
5937 | self.assertEqual(spanish_metaphone('rebosar'), 'RVSR') |
||
5938 | self.assertEqual(spanish_metaphone('rebozar'), 'RVZR') |
||
5939 | self.assertEqual(spanish_metaphone('grajea'), 'GRJ') |
||
5940 | self.assertEqual(spanish_metaphone('gragea'), 'GRJ') |
||
5941 | self.assertEqual(spanish_metaphone('encima'), 'ENZM') |
||
5942 | self.assertEqual(spanish_metaphone('enzima'), 'ENZM') |
||
5943 | self.assertEqual(spanish_metaphone('alhamar'), 'ALAMR') |
||
5944 | self.assertEqual(spanish_metaphone('abollar'), 'AVYR') |
||
5945 | self.assertEqual(spanish_metaphone('aboyar'), 'AVYR') |
||
5946 | self.assertEqual(spanish_metaphone('huevo'), 'UV') |
||
5947 | self.assertEqual(spanish_metaphone('webo'), 'UV') |
||
5948 | self.assertEqual(spanish_metaphone('macho'), 'MX') |
||
5949 | self.assertEqual(spanish_metaphone('xocolate'), 'XKLT') |
||
5950 | self.assertEqual(spanish_metaphone('chocolate'), 'XKLT') |
||
5951 | self.assertEqual(spanish_metaphone('axioma'), 'AXM') |
||
5952 | self.assertEqual(spanish_metaphone('abedul'), 'AVDL') |
||
5953 | self.assertEqual(spanish_metaphone('a'), 'A') |
||
5954 | self.assertEqual(spanish_metaphone('gengibre'), 'JNJVR') |
||
5955 | self.assertEqual(spanish_metaphone('yema'), 'YM') |
||
5956 | self.assertEqual(spanish_metaphone('wHISKY'), 'UISKY') |
||
5957 | self.assertEqual(spanish_metaphone('google'), 'GGL') |
||
5958 | self.assertEqual(spanish_metaphone('xilófono'), 'XLFN') |
||
5959 | self.assertEqual(spanish_metaphone('web'), 'UV') |
||
5960 | self.assertEqual(spanish_metaphone('guerra'), 'GRR') |
||
5961 | self.assertEqual(spanish_metaphone('pingüino'), 'PNUN') |
||
5962 | self.assertEqual(spanish_metaphone('si'), 'S') |
||
5963 | self.assertEqual(spanish_metaphone('ke'), 'K') |
||
5964 | self.assertEqual(spanish_metaphone('que'), 'K') |
||
5965 | self.assertEqual(spanish_metaphone('tu'), 'T') |
||
5966 | self.assertEqual(spanish_metaphone('gato'), 'GT') |
||
5967 | self.assertEqual(spanish_metaphone('gitano'), 'JTN') |
||
5968 | self.assertEqual(spanish_metaphone('queso'), 'KS') |
||
5969 | self.assertEqual(spanish_metaphone('paquete'), 'PKT') |
||
5970 | self.assertEqual(spanish_metaphone('cuco'), 'KK') |
||
5971 | self.assertEqual(spanish_metaphone('perro'), 'PRR') |
||
5972 | self.assertEqual(spanish_metaphone('pero'), 'PR') |
||
5973 | self.assertEqual(spanish_metaphone('arrebato'), 'ARRVT') |
||
5974 | self.assertEqual(spanish_metaphone('hola'), 'OL') |
||
5975 | self.assertEqual(spanish_metaphone('zapato'), 'ZPT') |
||
5976 | self.assertEqual(spanish_metaphone('españa'), 'ESPNY') |
||
5977 | self.assertEqual(spanish_metaphone('garrulo'), 'GRRL') |
||
5978 | self.assertEqual(spanish_metaphone('expansión'), 'EXPNSN') |
||
5979 | self.assertEqual(spanish_metaphone('membrillo'), 'MMVRY') |
||
5980 | self.assertEqual(spanish_metaphone('jamón'), 'JMN') |
||
5981 | self.assertEqual(spanish_metaphone('risa'), 'RS') |
||
5982 | self.assertEqual(spanish_metaphone('caricia'), 'KRZ') |
||
5983 | self.assertEqual(spanish_metaphone('llaves'), 'YVS') |
||
5984 | self.assertEqual(spanish_metaphone('paella'), 'PY') |
||
5985 | self.assertEqual(spanish_metaphone('cerilla'), 'ZRY') |
||
5986 | |||
5987 | # tests from file:///home/chrislit/Downloads/ICTRS_2016_12.pdf |
||
5988 | # including of the modified version of the algorithm |
||
5989 | self.assertEqual(spanish_metaphone('Caricia'), 'KRZ') |
||
5990 | self.assertEqual(spanish_metaphone('Caricia', modified=True), 'KRZ') |
||
5991 | self.assertEqual(spanish_metaphone('Llaves'), 'YVS') |
||
5992 | self.assertEqual(spanish_metaphone('Llaves', modified=True), 'YVZ') |
||
5993 | self.assertEqual(spanish_metaphone('Paella'), 'PY') |
||
5994 | self.assertEqual(spanish_metaphone('Paella', modified=True), 'PY') |
||
5995 | self.assertEqual(spanish_metaphone('Cerilla'), 'ZRY') |
||
5996 | self.assertEqual(spanish_metaphone('Cerilla', modified=True), 'ZRY') |
||
5997 | self.assertEqual(spanish_metaphone('Empeorar'), 'EMPRR') |
||
5998 | self.assertEqual(spanish_metaphone('Empeorar', modified=True), 'ENPRR') |
||
5999 | self.assertEqual(spanish_metaphone('Embotellar'), 'EMVTYR') |
||
6000 | self.assertEqual(spanish_metaphone('Embotellar', modified=True), |
||
6001 | 'ENVTYR') |
||
6002 | self.assertEqual(spanish_metaphone('Hoy'), 'OY') |
||
6003 | self.assertEqual(spanish_metaphone('Hoy', modified=True), 'OY') |
||
6004 | self.assertEqual(spanish_metaphone('Xochimilco'), 'XXMLK') |
||
6005 | self.assertEqual(spanish_metaphone('Xochimilco', modified=True), |
||
6006 | 'XXMLK') |
||
6007 | self.assertEqual(spanish_metaphone('Psiquiatra'), 'PSKTR') |
||
6008 | self.assertEqual(spanish_metaphone('Psiquiatra', modified=True), |
||
6009 | 'ZKTR') |
||
6010 | self.assertEqual(spanish_metaphone('siquiatra'), 'SKTR') |
||
6011 | self.assertEqual(spanish_metaphone('siquiatra', modified=True), 'ZKTR') |
||
6012 | self.assertEqual(spanish_metaphone('Obscuro'), 'OVSKR') |
||
6013 | self.assertEqual(spanish_metaphone('Obscuro', modified=True), 'OZKR') |
||
6014 | self.assertEqual(spanish_metaphone('Oscuro'), 'OSKR') |
||
6015 | self.assertEqual(spanish_metaphone('Oscuro', modified=True), 'OZKR') |
||
6016 | self.assertEqual(spanish_metaphone('Combate'), 'KMVT') |
||
6017 | self.assertEqual(spanish_metaphone('Combate', modified=True), 'KNVT') |
||
6018 | self.assertEqual(spanish_metaphone('Convate'), 'KNVT') |
||
6019 | self.assertEqual(spanish_metaphone('Convate', modified=True), 'KNVT') |
||
6020 | self.assertEqual(spanish_metaphone('Conbate'), 'KNVT') |
||
6021 | self.assertEqual(spanish_metaphone('Conbate', modified=True), 'KNVT') |
||
6022 | self.assertEqual(spanish_metaphone('Comportar'), 'KMPRTR') |
||
6023 | self.assertEqual(spanish_metaphone('Comportar', modified=True), |
||
6024 | 'KNPRTR') |
||
6025 | self.assertEqual(spanish_metaphone('Conportar'), 'KNPRTR') |
||
6026 | self.assertEqual(spanish_metaphone('Conportar', modified=True), |
||
6027 | 'KNPRTR') |
||
6028 | self.assertEqual(spanish_metaphone('Zapato'), 'ZPT') |
||
6029 | self.assertEqual(spanish_metaphone('Zapato', modified=True), 'ZPT') |
||
6030 | self.assertEqual(spanish_metaphone('Sapato'), 'SPT') |
||
6031 | self.assertEqual(spanish_metaphone('Sapato', modified=True), 'ZPT') |
||
6032 | self.assertEqual(spanish_metaphone('Escalera'), 'ESKLR') |
||
6033 | self.assertEqual(spanish_metaphone('Escalera', modified=True), 'EZKLR') |
||
6034 | self.assertEqual(spanish_metaphone('scalera'), 'ESKLR') |
||
6035 | self.assertEqual(spanish_metaphone('scalera', modified=True), 'EZKLR') |
||
6036 | |||
6037 | |||
6038 | class SoundexBRTestCases(unittest.TestCase): |
||
6039 | """Test SoundexBR functions. |
||
6040 | |||
6041 | test cases for abydos.phonetic.soundex_br |
||
6042 | """ |
||
6043 | |||
6044 | def test_spanish_metaphone(self): |
||
6045 | """Test abydos.phonetic.soundex_br.""" |
||
6046 | # Base case |
||
6047 | self.assertEqual(soundex_br(''), '0000') |
||
6048 | |||
6049 | # Examples given at https://github.com/danielmarcelino/SoundexBR |
||
6050 | self.assertEqual(soundex_br('Ana Karolina Kuhnen'), 'A526') |
||
6051 | self.assertEqual(soundex_br('Ana Carolina Kuhnen'), 'A526') |
||
6052 | self.assertEqual(soundex_br('Ana Karolina'), 'A526') |
||
6053 | self.assertEqual(soundex_br('João Souza'), 'J220') |
||
6054 | self.assertEqual(soundex_br('Dilma Vana Rousseff'), 'D451') |
||
6055 | self.assertEqual(soundex_br('Dilma Rousef'), 'D456') |
||
6056 | self.assertEqual(soundex_br('Aécio Neves'), 'A251') |
||
6057 | self.assertEqual(soundex_br('Aecio Neves'), 'A251') |
||
6058 | self.assertEqual(soundex_br('HILBERT'), 'I416') |
||
6059 | self.assertEqual(soundex_br('Heilbronn'), 'E416') |
||
6060 | self.assertEqual(soundex_br('Gauss'), 'G200') |
||
6061 | self.assertEqual(soundex_br('Kant'), 'C530') |
||
6062 | |||
6063 | |||
6064 | class BeiderMorseTestCases(unittest.TestCase): |
||
6065 | """Test BMPM functions. |
||
6066 | |||
6067 | test cases for abydos.phonetic.bmpm and abydos.bm.* |
||
6068 | """ |
||
6069 | |||
6070 | def test_bmpm(self): |
||
6071 | """Test abydos.phonetic.bmpm. |
||
6072 | |||
6073 | Most test cases from: |
||
6074 | http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/bm/ |
||
6075 | |||
6076 | As a rule, the test cases are copied from the above code, but the |
||
6077 | resultant values are not. This is largely because this Python port |
||
6078 | follows the PHP reference implementation much more closely than the |
||
6079 | Java port in Apache Commons Codec does. As a result, these tests have |
||
6080 | been conformed to the output produced by the PHP implementation, |
||
6081 | particularly in terms of formatting and ordering. |
||
6082 | """ |
||
6083 | # base cases |
||
6084 | self.assertEqual(bmpm(''), '') |
||
6085 | |||
6086 | for langs in ('', 1, 'spanish', 'english,italian', 3): |
||
6087 | for name_mode in ('gen', 'ash', 'sep'): |
||
6088 | for match_mode in ('approx', 'exact'): |
||
6089 | for concat in (False, True): |
||
6090 | if (isinstance(langs, text_type) and |
||
6091 | ((name_mode == 'ash' and 'italian' in langs) or |
||
6092 | (name_mode == 'sep' and 'english' in langs))): |
||
6093 | self.assertRaises(ValueError, bmpm, '', langs, |
||
6094 | name_mode, match_mode, concat) |
||
6095 | else: |
||
6096 | self.assertEqual(bmpm('', langs, name_mode, |
||
6097 | match_mode, concat), '') |
||
6098 | |||
6099 | # testSolrGENERIC |
||
6100 | # concat is true, ruleType is EXACT |
||
6101 | self.assertEqual(bmpm('Angelo', '', 'gen', 'exact', True), |
||
6102 | 'angelo anxelo anhelo anjelo anZelo andZelo') |
||
6103 | self.assertEqual(bmpm('D\'Angelo', '', 'gen', 'exact', True), |
||
6104 | 'angelo anxelo anhelo anjelo anZelo andZelo dangelo' + |
||
6105 | ' danxelo danhelo danjelo danZelo dandZelo') |
||
6106 | self.assertEqual(bmpm('Angelo', 'italian,greek,spanish', 'gen', |
||
6107 | 'exact', True), |
||
6108 | 'angelo anxelo andZelo') |
||
6109 | self.assertEqual(bmpm('1234', '', 'gen', 'exact', True), '') |
||
6110 | |||
6111 | # concat is false, ruleType is EXACT |
||
6112 | self.assertEqual(bmpm('Angelo', '', 'gen', 'exact', False), |
||
6113 | 'angelo anxelo anhelo anjelo anZelo andZelo') |
||
6114 | self.assertEqual(bmpm('D\'Angelo', '', 'gen', 'exact', False), |
||
6115 | 'angelo anxelo anhelo anjelo anZelo andZelo dangelo' + |
||
6116 | ' danxelo danhelo danjelo danZelo dandZelo') |
||
6117 | self.assertEqual(bmpm('Angelo', 'italian,greek,spanish', 'gen', |
||
6118 | 'exact', False), |
||
6119 | 'angelo anxelo andZelo') |
||
6120 | self.assertEqual(bmpm('1234', '', 'gen', 'exact', False), '') |
||
6121 | |||
6122 | # concat is true, ruleType is APPROX |
||
6123 | self.assertEqual(bmpm('Angelo', '', 'gen', 'approx', True), |
||
6124 | 'angilo angYlo agilo ongilo ongYlo ogilo Yngilo' + |
||
6125 | ' YngYlo anxilo onxilo anilo onilo aniilo oniilo' + |
||
6126 | ' anzilo onzilo') |
||
6127 | self.assertEqual(bmpm('D\'Angelo', '', 'gen', 'approx', True), |
||
6128 | 'angilo angYlo agilo ongilo ongYlo ogilo Yngilo' + |
||
6129 | ' YngYlo anxilo onxilo anilo onilo aniilo oniilo' + |
||
6130 | ' anzilo onzilo dangilo dangYlo dagilo dongilo' + |
||
6131 | ' dongYlo dogilo dYngilo dYngYlo danxilo donxilo' + |
||
6132 | ' danilo donilo daniilo doniilo danzilo donzilo') |
||
6133 | self.assertEqual(bmpm('Angelo', 'italian,greek,spanish', 'gen', |
||
6134 | 'approx', True), |
||
6135 | 'angilo ongilo anxilo onxilo anzilo onzilo') |
||
6136 | self.assertEqual(bmpm('1234', '', 'gen', 'approx', True), '') |
||
6137 | |||
6138 | # concat is false, ruleType is APPROX |
||
6139 | self.assertEqual(bmpm('Angelo', '', 'gen', 'approx', False), |
||
6140 | 'angilo angYlo agilo ongilo ongYlo ogilo Yngilo' + |
||
6141 | ' YngYlo anxilo onxilo anilo onilo aniilo oniilo' + |
||
6142 | ' anzilo onzilo') |
||
6143 | self.assertEqual(bmpm('D\'Angelo', '', 'gen', 'approx', False), |
||
6144 | 'angilo angYlo agilo ongilo ongYlo ogilo Yngilo' + |
||
6145 | ' YngYlo anxilo onxilo anilo onilo aniilo oniilo' + |
||
6146 | ' anzilo onzilo dangilo dangYlo dagilo dongilo' + |
||
6147 | ' dongYlo dogilo dYngilo dYngYlo danxilo donxilo' + |
||
6148 | ' danilo donilo daniilo doniilo danzilo donzilo') |
||
6149 | self.assertEqual(bmpm('Angelo', 'italian,greek,spanish', 'gen', |
||
6150 | 'approx', False), |
||
6151 | 'angilo ongilo anxilo onxilo anzilo onzilo') |
||
6152 | self.assertEqual(bmpm('1234', '', 'gen', 'approx', False), '') |
||
6153 | |||
6154 | # testSolrASHKENAZI |
||
6155 | # concat is true, ruleType is EXACT |
||
6156 | self.assertEqual(bmpm('Angelo', '', 'ash', 'exact', True), |
||
6157 | 'angelo andZelo anhelo anxelo') |
||
6158 | self.assertEqual(bmpm('D\'Angelo', '', 'ash', 'exact', True), |
||
6159 | 'dangelo dandZelo danhelo danxelo') |
||
6160 | self.assertRaises(ValueError, bmpm, 'Angelo', 'italian,greek,spanish', |
||
6161 | 'ash', 'exact', True) |
||
6162 | self.assertEqual(bmpm('Angelo', 'italian,greek,spanish', 'ash', |
||
6163 | 'exact', True, True), 'anxelo angelo') |
||
6164 | self.assertEqual(bmpm('1234', '', 'ash', 'exact', True), '') |
||
6165 | |||
6166 | # concat is false, ruleType is EXACT |
||
6167 | self.assertEqual(bmpm('Angelo', '', 'ash', 'exact', False), |
||
6168 | 'angelo andZelo anhelo anxelo') |
||
6169 | self.assertEqual(bmpm('D\'Angelo', '', 'ash', 'exact', False), |
||
6170 | 'dangelo dandZelo danhelo danxelo') |
||
6171 | self.assertRaises(ValueError, bmpm, 'Angelo', 'italian,greek,spanish', |
||
6172 | 'ash', 'exact', False) |
||
6173 | self.assertEqual(bmpm('Angelo', 'italian,greek,spanish', 'ash', |
||
6174 | 'exact', False, True), 'anxelo angelo') |
||
6175 | self.assertEqual(bmpm('1234', '', 'ash', 'exact', False), '') |
||
6176 | |||
6177 | # concat is true, ruleType is APPROX |
||
6178 | self.assertEqual(bmpm('Angelo', '', 'ash', 'approx', True), |
||
6179 | 'angilo angYlo ongilo ongYlo Yngilo YngYlo anzilo' + |
||
6180 | ' onzilo anilo onilo anxilo onxilo') |
||
6181 | self.assertEqual(bmpm('D\'Angelo', '', 'ash', 'approx', True), |
||
6182 | 'dangilo dangYlo dongilo dongYlo dYngilo dYngYlo' + |
||
6183 | ' danzilo donzilo danilo donilo danxilo donxilo') |
||
6184 | self.assertRaises(ValueError, bmpm, 'Angelo', 'italian,greek,spanish', |
||
6185 | 'ash', 'approx', True) |
||
6186 | self.assertEqual(bmpm('Angelo', 'italian,greek,spanish', 'ash', |
||
6187 | 'approx', True, True), |
||
6188 | 'anxYlo anxilo onxYlo onxilo angYlo angilo ongYlo' + |
||
6189 | ' ongilo') |
||
6190 | self.assertEqual(bmpm('1234', '', 'ash', 'approx', True), '') |
||
6191 | |||
6192 | # concat is false, ruleType is APPROX |
||
6193 | self.assertEqual(bmpm('Angelo', '', 'ash', 'approx', False), |
||
6194 | 'angilo angYlo ongilo ongYlo Yngilo YngYlo anzilo' + |
||
6195 | ' onzilo anilo onilo anxilo onxilo') |
||
6196 | self.assertEqual(bmpm('D\'Angelo', '', 'ash', 'approx', False), |
||
6197 | 'dangilo dangYlo dongilo dongYlo dYngilo dYngYlo' + |
||
6198 | ' danzilo donzilo danilo donilo danxilo donxilo') |
||
6199 | self.assertRaises(ValueError, bmpm, 'Angelo', 'italian,greek,spanish', |
||
6200 | 'ash', 'approx', False) |
||
6201 | self.assertEqual(bmpm('Angelo', 'italian,greek,spanish', 'ash', |
||
6202 | 'approx', False, True), |
||
6203 | 'anxYlo anxilo onxYlo onxilo angYlo angilo ongYlo' + |
||
6204 | ' ongilo') |
||
6205 | self.assertEqual(bmpm('1234', '', 'ash', 'approx', False), '') |
||
6206 | |||
6207 | # testSolrSEPHARDIC |
||
6208 | # concat is true, ruleType is EXACT |
||
6209 | self.assertEqual(bmpm('Angelo', '', 'sep', 'exact', True), |
||
6210 | 'anZelo andZelo anxelo') |
||
6211 | self.assertEqual(bmpm('D\'Angelo', '', 'sep', 'exact', True), |
||
6212 | 'anZelo andZelo anxelo') |
||
6213 | self.assertRaises(ValueError, bmpm, 'Angelo', 'italian,greek,spanish', |
||
6214 | 'sep', 'exact', True) |
||
6215 | self.assertEqual(bmpm('Angelo', 'italian,greek,spanish', 'sep', |
||
6216 | 'exact', True, True), |
||
6217 | 'andZelo anxelo') |
||
6218 | self.assertEqual(bmpm('1234', '', 'sep', 'exact', True), '') |
||
6219 | |||
6220 | # concat is false, ruleType is EXACT |
||
6221 | self.assertEqual(bmpm('Angelo', '', 'sep', 'exact', False), |
||
6222 | 'anZelo andZelo anxelo') |
||
6223 | self.assertEqual(bmpm('D\'Angelo', '', 'sep', 'exact', False), |
||
6224 | 'anZelo andZelo anxelo') |
||
6225 | self.assertRaises(ValueError, bmpm, 'Angelo', 'italian,greek,spanish', |
||
6226 | 'sep', 'exact', False) |
||
6227 | self.assertEqual(bmpm('Angelo', 'italian,greek,spanish', 'sep', |
||
6228 | 'exact', False, True), 'andZelo anxelo') |
||
6229 | self.assertEqual(bmpm('1234', '', 'sep', 'exact', False), '') |
||
6230 | |||
6231 | # concat is true, ruleType is APPROX |
||
6232 | self.assertEqual(bmpm('Angelo', '', 'sep', 'approx', True), |
||
6233 | 'anzila anzilu nzila nzilu anhila anhilu nhila nhilu') |
||
6234 | self.assertEqual(bmpm('D\'Angelo', '', 'sep', 'approx', True), |
||
6235 | 'anzila anzilu nzila nzilu anhila anhilu nhila nhilu') |
||
6236 | self.assertRaises(ValueError, bmpm, 'Angelo', 'italian,greek,spanish', |
||
6237 | 'sep', 'approx', True) |
||
6238 | self.assertEqual(bmpm('Angelo', 'italian,greek,spanish', 'sep', |
||
6239 | 'approx', True, True), |
||
6240 | 'anzila anzilu nzila nzilu anhila anhilu nhila nhilu') |
||
6241 | self.assertEqual(bmpm('1234', '', 'sep', 'approx', True), '') |
||
6242 | |||
6243 | # concat is false, ruleType is APPROX |
||
6244 | self.assertEqual(bmpm('Angelo', '', 'sep', 'approx', False), |
||
6245 | 'anzila anzilu nzila nzilu anhila anhilu nhila nhilu') |
||
6246 | self.assertEqual(bmpm('D\'Angelo', '', 'sep', 'approx', False), |
||
6247 | 'anzila anzilu nzila nzilu anhila anhilu nhila nhilu') |
||
6248 | self.assertRaises(ValueError, bmpm, 'Angelo', 'italian,greek,spanish', |
||
6249 | 'sep', 'approx', False) |
||
6250 | self.assertEqual(bmpm('Angelo', 'italian,greek,spanish', 'sep', |
||
6251 | 'approx', False, True), |
||
6252 | 'anzila anzilu nzila nzilu anhila anhilu nhila nhilu') |
||
6253 | self.assertEqual(bmpm('1234', '', 'sep', 'approx', False), '') |
||
6254 | |||
6255 | # testCompatibilityWithOriginalVersion |
||
6256 | self.assertEqual(bmpm('abram', '', 'gen', 'approx', False), |
||
6257 | 'abram abrom avram avrom obram obrom ovram ovrom' + |
||
6258 | ' Ybram Ybrom abran abron obran obron') |
||
6259 | self.assertEqual(bmpm('Bendzin', '', 'gen', 'approx', False), |
||
6260 | 'binzn bindzn vindzn bintsn vintsn') |
||
6261 | self.assertEqual(bmpm('abram', '', 'ash', 'approx', False), |
||
6262 | 'abram abrom avram avrom obram obrom ovram ovrom' + |
||
6263 | ' Ybram Ybrom ombram ombrom imbram imbrom') |
||
6264 | self.assertEqual(bmpm('Halpern', '', 'ash', 'approx', False), |
||
6265 | 'alpirn alpYrn olpirn olpYrn Ylpirn YlpYrn xalpirn' + |
||
6266 | ' xolpirn') |
||
6267 | |||
6268 | # PhoneticEngineTest |
||
6269 | self.assertEqual(bmpm('Renault', '', 'gen', 'approx', True), |
||
6270 | 'rinolt rino rinDlt rinalt rinult rinD rina rinu') |
||
6271 | self.assertEqual(bmpm('Renault', '', 'ash', 'approx', True), |
||
6272 | 'rinDlt rinalt rinult rYnDlt rYnalt rYnult rinolt') |
||
6273 | self.assertEqual(bmpm('Renault', '', 'sep', 'approx', True), |
||
6274 | 'rinDlt') |
||
6275 | self.assertEqual(bmpm('SntJohn-Smith', '', 'gen', 'exact', True), |
||
6276 | 'sntjonsmit') |
||
6277 | self.assertEqual(bmpm('d\'ortley', '', 'gen', 'exact', True), |
||
6278 | 'ortlaj ortlej dortlaj dortlej') |
||
6279 | self.assertEqual(bmpm('van helsing', '', 'gen', 'exact', False), |
||
6280 | 'helSink helsink helzink xelsink elSink elsink' + |
||
6281 | ' vanhelsink vanhelzink vanjelsink fanhelsink' + |
||
6282 | ' fanhelzink banhelsink') |
||
6283 | |||
6284 | def test_bmpm_misc(self): |
||
6285 | """Test abydos.phonetic.bmpm (miscellaneous tests). |
||
6286 | |||
6287 | The purpose of this test set is to achieve higher code coverage |
||
6288 | and to hit some of the test cases noted in the BMPM reference code. |
||
6289 | """ |
||
6290 | # test of Ashkenazi with discardable prefix |
||
6291 | self.assertEqual(bmpm('bar Hayim', name_mode='ash'), 'Dm xDm') |
||
6292 | |||
6293 | # tests of concat behavior |
||
6294 | self.assertEqual(bmpm('Rodham Clinton', concat=False), |
||
6295 | 'rodam rodom rYdam rYdom rodan rodon rodxam rodxom' + |
||
6296 | ' rodxan rodxon rudam rudom klinton klnton klintun' + |
||
6297 | ' klntun tzlinton tzlnton tzlintun tzlntun zlinton' + |
||
6298 | ' zlnton') |
||
6299 | self.assertEqual(bmpm('Rodham Clinton', concat=True), |
||
6300 | 'rodamklinton rodomklinton rodamklnton rodomklnton' + |
||
6301 | ' rodamklintun rodomklintun rodamklntun rodomklntun' + |
||
6302 | ' rodamtzlinton rodomtzlinton rodamtzlnton' + |
||
6303 | ' rodomtzlnton rodamtzlintun rodomtzlintun' + |
||
6304 | ' rodamtzlntun rodomtzlntun rodamzlinton' + |
||
6305 | ' rodomzlinton rodamzlnton rodomzlnton rodanklinton' + |
||
6306 | ' rodonklinton rodanklnton rodonklnton' + |
||
6307 | ' rodxamklinton rodxomklinton rodxamklnton' + |
||
6308 | ' rodxomklnton rodxanklinton rodxonklinton' + |
||
6309 | ' rodxanklnton rodxonklnton rudamklinton' + |
||
6310 | ' rudomklinton rudamklnton rudomklnton rudamklintun' + |
||
6311 | ' rudomklintun rudamklntun rudomklntun' + |
||
6312 | ' rudamtzlinton rudomtzlinton rudamtzlnton' + |
||
6313 | ' rudomtzlnton rudamtzlintun rudomtzlintun' + |
||
6314 | ' rudamtzlntun rudomtzlntun') |
||
6315 | |||
6316 | # tests of name_mode values |
||
6317 | self.assertEqual(bmpm('bar Hayim', name_mode='ash'), 'Dm xDm') |
||
6318 | self.assertEqual(bmpm('bar Hayim', name_mode='ashkenazi'), 'Dm xDm') |
||
6319 | self.assertEqual(bmpm('bar Hayim', name_mode='Ashkenazi'), 'Dm xDm') |
||
6320 | self.assertEqual(bmpm('bar Hayim', name_mode='gen', concat=True), |
||
6321 | 'barDm borDm bYrDm varDm vorDm barDn borDn barxDm' + |
||
6322 | ' borxDm varxDm vorxDm barxDn borxDn') |
||
6323 | self.assertEqual(bmpm('bar Hayim', name_mode='general', concat=True), |
||
6324 | 'barDm borDm bYrDm varDm vorDm barDn borDn barxDm' + |
||
6325 | ' borxDm varxDm vorxDm barxDn borxDn') |
||
6326 | self.assertEqual(bmpm('bar Hayim', name_mode='Mizrahi', concat=True), |
||
6327 | 'barDm borDm bYrDm varDm vorDm barDn borDn barxDm' + |
||
6328 | ' borxDm varxDm vorxDm barxDn borxDn') |
||
6329 | self.assertEqual(bmpm('bar Hayim', name_mode='mizrahi', concat=True), |
||
6330 | 'barDm borDm bYrDm varDm vorDm barDn borDn barxDm' + |
||
6331 | ' borxDm varxDm vorxDm barxDn borxDn') |
||
6332 | self.assertEqual(bmpm('bar Hayim', name_mode='miz', concat=True), |
||
6333 | 'barDm borDm bYrDm varDm vorDm barDn borDn barxDm' + |
||
6334 | ' borxDm varxDm vorxDm barxDn borxDn') |
||
6335 | |||
6336 | # test that out-of-range langauge_arg results in L_ANY |
||
6337 | self.assertEqual(bmpm('Rodham Clinton', language_arg=2**32), |
||
6338 | 'rodam rodom rYdam rYdom rodan rodon rodxam rodxom' + |
||
6339 | ' rodxan rodxon rudam rudom klinton klnton klintun' + |
||
6340 | ' klntun tzlinton tzlnton tzlintun tzlntun zlinton' + |
||
6341 | ' zlnton') |
||
6342 | self.assertEqual(bmpm('Rodham Clinton', language_arg=-4), |
||
6343 | 'rodam rodom rYdam rYdom rodan rodon rodxam rodxom' + |
||
6344 | ' rodxan rodxon rudam rudom klinton klnton klintun' + |
||
6345 | ' klntun tzlinton tzlnton tzlintun tzlntun zlinton' + |
||
6346 | ' zlnton') |
||
6347 | |||
6348 | # etc. (for code coverage) |
||
6349 | self.assertEqual(bmpm('van Damme', name_mode='sep'), 'dami mi dam m') |
||
6350 | |||
6351 | View Code Duplication | def test_bmpm_nachnamen(self): |
|
|
|||
6352 | """Test abydos.phonetic.bmpm (Nachnamen set).""" |
||
6353 | if not ALLOW_RANDOM: |
||
6354 | return |
||
6355 | with codecs.open(TESTDIR + '/corpora/nachnamen.bm.csv', |
||
6356 | encoding='utf-8') as nachnamen_testset: |
||
6357 | next(nachnamen_testset) |
||
6358 | for nn_line in nachnamen_testset: |
||
6359 | nn_line = nn_line.strip().split(',') |
||
6360 | # This test set is very large (~10000 entries) |
||
6361 | # so let's just randomly select about 20 for testing |
||
6362 | if nn_line[0] != '#' and one_in(500): |
||
6363 | self.assertEqual(bmpm(nn_line[0], language_arg='german'), |
||
6364 | nn_line[1]) |
||
6365 | self.assertEqual(bmpm(nn_line[0]), nn_line[2]) |
||
6366 | |||
6367 | View Code Duplication | def test_bmpm_nachnamen_cc(self): |
|
6368 | """Test abydos.phonetic.bmpm (Nachnamen set, corner cases).""" |
||
6369 | with codecs.open(TESTDIR + '/corpora/nachnamen.bm.cc.csv', |
||
6370 | encoding='utf-8') as nachnamen_testset: |
||
6371 | next(nachnamen_testset) |
||
6372 | for nn_line in nachnamen_testset: |
||
6373 | nn_line = nn_line.strip().split(',') |
||
6374 | # This test set is very large (~10000 entries) |
||
6375 | # so let's just randomly select about 20 for testing |
||
6376 | if nn_line[0] != '#': |
||
6377 | self.assertEqual(bmpm(nn_line[0], language_arg='german'), |
||
6378 | nn_line[1]) |
||
6379 | self.assertEqual(bmpm(nn_line[0]), nn_line[2]) |
||
6380 | |||
6381 | View Code Duplication | def test_bmpm_uscensus2000(self): |
|
6382 | """Test abydos.phonetic.bmpm (US Census 2000 set).""" |
||
6383 | if not ALLOW_RANDOM: |
||
6384 | return |
||
6385 | with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts: |
||
6386 | next(uscensus_ts) |
||
6387 | for cen_line in uscensus_ts: |
||
6388 | cen_line = cen_line.strip().split(',') |
||
6389 | # This test set is very large (~150000 entries) |
||
6390 | # so let's just randomly select about 20 for testing |
||
6391 | if cen_line[0] != '#' and one_in(7500): |
||
6392 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
||
6393 | name_mode='gen'), cen_line[1]) |
||
6394 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
||
6395 | name_mode='ash'), cen_line[2]) |
||
6396 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
||
6397 | name_mode='sep'), cen_line[3]) |
||
6398 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
||
6399 | name_mode='gen'), cen_line[4]) |
||
6400 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
||
6401 | name_mode='ash'), cen_line[5]) |
||
6402 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
||
6403 | name_mode='sep'), cen_line[6]) |
||
6404 | |||
6405 | View Code Duplication | def test_bmpm_uscensus2000_cc(self): |
|
6406 | """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
||
6407 | with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts: |
||
6408 | next(uscensus_ts) |
||
6409 | for cen_line in uscensus_ts: |
||
6410 | cen_line = cen_line.strip().split(',') |
||
6411 | # This test set is very large (~150000 entries) |
||
6412 | # so let's just randomly select about 20 for testing |
||
6413 | if cen_line[0] != '#' and one_in(10): |
||
6414 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
||
6415 | name_mode='gen'), cen_line[1]) |
||
6416 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
||
6417 | name_mode='ash'), cen_line[2]) |
||
6418 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
||
6419 | name_mode='sep'), cen_line[3]) |
||
6420 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
||
6421 | name_mode='gen'), cen_line[4]) |
||
6422 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
||
6423 | name_mode='ash'), cen_line[5]) |
||
6424 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
||
6425 | name_mode='sep'), cen_line[6]) |
||
6426 | |||
6427 | def test_bm_phonetic_number(self): |
||
6428 | """Test abydos.bm._bm_phonetic_number.""" |
||
6429 | self.assertEqual(_bm_phonetic_number(''), '') |
||
6430 | self.assertEqual(_bm_phonetic_number('abcd'), 'abcd') |
||
6431 | self.assertEqual(_bm_phonetic_number('abcd[123]'), 'abcd') |
||
6432 | self.assertEqual(_bm_phonetic_number('abcd[123'), 'abcd') |
||
6433 | self.assertEqual(_bm_phonetic_number('abcd['), 'abcd') |
||
6434 | self.assertEqual(_bm_phonetic_number('abcd[[[123]]]'), 'abcd') |
||
6435 | |||
6436 | def test_bm_apply_rule_if_compat(self): |
||
6437 | """Test abydos.bm._bm_apply_rule_if_compat.""" |
||
6438 | self.assertEqual(_bm_apply_rule_if_compat('abc', 'def', 4), 'abcdef') |
||
6439 | self.assertEqual(_bm_apply_rule_if_compat('abc', 'def[6]', 4), |
||
6440 | 'abcdef[4]') |
||
6441 | self.assertEqual(_bm_apply_rule_if_compat('abc', 'def[4]', 4), |
||
6442 | 'abcdef[4]') |
||
6443 | self.assertEqual(_bm_apply_rule_if_compat('abc', 'def[0]', 4), None) |
||
6444 | self.assertEqual(_bm_apply_rule_if_compat('abc', 'def[8]', 4), None) |
||
6445 | self.assertEqual(_bm_apply_rule_if_compat('abc', 'def', 1), 'abcdef') |
||
6446 | self.assertEqual(_bm_apply_rule_if_compat('abc', 'def[4]', 1), |
||
6447 | 'abcdef[4]') |
||
6448 | |||
6449 | def test_bm_language(self): |
||
6450 | """Test abydos.bm._bm_language. |
||
6451 | |||
6452 | Most test cases from: |
||
6453 | http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/bm/LanguageGuessingTest.java?view=markup |
||
6454 | """ |
||
6455 | self.assertEqual(_bm_language('Renault', 'gen'), L_FRENCH) |
||
6456 | self.assertEqual(_bm_language('Mickiewicz', 'gen'), L_POLISH) |
||
6457 | self.assertEqual(_bm_language('Thompson', 'gen') & L_ENGLISH, |
||
6458 | L_ENGLISH) |
||
6459 | self.assertEqual(_bm_language('Nuñez', 'gen'), L_SPANISH) |
||
6460 | self.assertEqual(_bm_language('Carvalho', 'gen'), L_PORTUGUESE) |
||
6461 | self.assertEqual(_bm_language('Čapek', 'gen'), L_CZECH | L_LATVIAN) |
||
6462 | self.assertEqual(_bm_language('Sjneijder', 'gen'), L_DUTCH) |
||
6463 | self.assertEqual(_bm_language('Klausewitz', 'gen'), L_GERMAN) |
||
6464 | self.assertEqual(_bm_language('Küçük', 'gen'), L_TURKISH) |
||
6465 | self.assertEqual(_bm_language('Giacometti', 'gen'), L_ITALIAN) |
||
6466 | self.assertEqual(_bm_language('Nagy', 'gen'), L_HUNGARIAN) |
||
6467 | self.assertEqual(_bm_language('Ceauşescu', 'gen'), L_ROMANIAN) |
||
6468 | self.assertEqual(_bm_language('Angelopoulos', 'gen'), L_GREEKLATIN) |
||
6469 | self.assertEqual(_bm_language('Αγγελόπουλος', 'gen'), L_GREEK) |
||
6470 | self.assertEqual(_bm_language('Пушкин', 'gen'), L_CYRILLIC) |
||
6471 | self.assertEqual(_bm_language('כהן', 'gen'), L_HEBREW) |
||
6472 | self.assertEqual(_bm_language('ácz', 'gen'), L_ANY) |
||
6473 | self.assertEqual(_bm_language('átz', 'gen'), L_ANY) |
||
6474 | |||
6475 | def test_bm_expand_alternates(self): |
||
6476 | """Test abydos.bm._bm_expand_alternates.""" |
||
6477 | self.assertEqual(_bm_expand_alternates(''), '') |
||
6478 | self.assertEqual(_bm_expand_alternates('aa'), 'aa') |
||
6479 | self.assertEqual(_bm_expand_alternates('aa|bb'), 'aa|bb') |
||
6480 | self.assertEqual(_bm_expand_alternates('aa|aa'), 'aa|aa') |
||
6481 | |||
6482 | self.assertEqual(_bm_expand_alternates('(aa)(bb)'), 'aabb') |
||
6483 | self.assertEqual(_bm_expand_alternates('(aa)(bb[0])'), '') |
||
6484 | self.assertEqual(_bm_expand_alternates('(aa)(bb[4])'), 'aabb[4]') |
||
6485 | self.assertEqual(_bm_expand_alternates('(aa[0])(bb)'), '') |
||
6486 | self.assertEqual(_bm_expand_alternates('(aa[4])(bb)'), 'aabb[4]') |
||
6487 | |||
6488 | self.assertEqual(_bm_expand_alternates('(a|b|c)(a|b|c)'), |
||
6489 | 'aa|ab|ac|ba|bb|bc|ca|cb|cc') |
||
6490 | self.assertEqual(_bm_expand_alternates('(a[1]|b[2])(c|d)'), |
||
6491 | 'ac[1]|ad[1]|bc[2]|bd[2]') |
||
6492 | self.assertEqual(_bm_expand_alternates('(a[1]|b[2])(c[4]|d)'), |
||
6493 | 'ad[1]|bd[2]') |
||
6494 | |||
6495 | def test_bm_remove_dupes(self): |
||
6496 | """Test abydos.bm._bm_remove_dupes.""" |
||
6497 | self.assertEqual(_bm_remove_dupes(''), '') |
||
6498 | self.assertEqual(_bm_remove_dupes('aa'), 'aa') |
||
6499 | self.assertEqual(_bm_remove_dupes('aa|bb'), 'aa|bb') |
||
6500 | self.assertEqual(_bm_remove_dupes('aa|aa'), 'aa') |
||
6501 | self.assertEqual(_bm_remove_dupes('aa|aa|aa|bb|aa'), 'aa|bb') |
||
6502 | self.assertEqual(_bm_remove_dupes('bb|aa|bb|aa|bb'), 'bb|aa') |
||
6503 | |||
6504 | def test_bm_normalize_lang_attrs(self): |
||
6505 | """Test abydos.bm._bm_normalize_language_attributes.""" |
||
6506 | self.assertEqual(_bm_normalize_lang_attrs('', False), '') |
||
6507 | self.assertEqual(_bm_normalize_lang_attrs('', True), '') |
||
6508 | |||
6509 | self.assertRaises(ValueError, _bm_normalize_lang_attrs, 'a[1', False) |
||
6510 | self.assertRaises(ValueError, _bm_normalize_lang_attrs, 'a[1', True) |
||
6511 | |||
6512 | self.assertEqual(_bm_normalize_lang_attrs('abc', False), 'abc') |
||
6513 | self.assertEqual(_bm_normalize_lang_attrs('abc[0]', False), '[0]') |
||
6514 | self.assertEqual(_bm_normalize_lang_attrs('abc[2]', False), 'abc[2]') |
||
6515 | self.assertEqual(_bm_normalize_lang_attrs('abc[2][4]', False), '[0]') |
||
6516 | self.assertEqual(_bm_normalize_lang_attrs('abc[2][6]', False), |
||
6517 | 'abc[2]') |
||
6518 | self.assertEqual(_bm_normalize_lang_attrs('ab[2]c[4]', False), '[0]') |
||
6519 | self.assertEqual(_bm_normalize_lang_attrs('ab[2]c[6]', False), |
||
6520 | 'abc[2]') |
||
6521 | |||
6522 | self.assertEqual(_bm_normalize_lang_attrs('abc', True), 'abc') |
||
6523 | self.assertEqual(_bm_normalize_lang_attrs('abc[0]', True), 'abc') |
||
6524 | self.assertEqual(_bm_normalize_lang_attrs('abc[2]', True), 'abc') |
||
6525 | self.assertEqual(_bm_normalize_lang_attrs('abc[2][4]', True), 'abc') |
||
6526 | self.assertEqual(_bm_normalize_lang_attrs('abc[2][6]', True), 'abc') |
||
6527 | self.assertEqual(_bm_normalize_lang_attrs('ab[2]c[4]', True), 'abc') |
||
6528 | self.assertEqual(_bm_normalize_lang_attrs('ab[2]c[6]', True), 'abc') |
||
6529 | |||
6530 | |||
6531 | if __name__ == '__main__': |
||
6532 | unittest.main() |
||
6533 |