1
|
|
|
# -*- coding: utf-8 -*- |
2
|
|
|
|
3
|
|
|
# Copyright 2014-2018 by Christopher C. Little. |
4
|
|
|
# This file is part of Abydos. |
5
|
|
|
# |
6
|
|
|
# Abydos is free software: you can redistribute it and/or modify |
7
|
|
|
# it under the terms of the GNU General Public License as published by |
8
|
|
|
# the Free Software Foundation, either version 3 of the License, or |
9
|
|
|
# (at your option) any later version. |
10
|
|
|
# |
11
|
|
|
# Abydos is distributed in the hope that it will be useful, |
12
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
13
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14
|
|
|
# GNU General Public License for more details. |
15
|
|
|
# |
16
|
|
|
# You should have received a copy of the GNU General Public License |
17
|
|
|
# along with Abydos. If not, see <http://www.gnu.org/licenses/>. |
18
|
|
|
|
19
|
|
|
"""abydos.tests.test_phonetic_nysiis. |
20
|
|
|
|
21
|
|
|
This module contains unit tests for abydos.phonetic.nysiis |
22
|
|
|
""" |
23
|
|
|
|
24
|
|
|
from __future__ import unicode_literals |
25
|
|
|
|
26
|
|
|
import unittest |
27
|
|
|
|
28
|
|
|
from abydos.phonetic.nysiis import nysiis |
29
|
|
|
|
30
|
|
|
|
31
|
|
|
class NysiisTestCases(unittest.TestCase): |
32
|
|
|
"""Test NYSIIS functions. |
33
|
|
|
|
34
|
|
|
test cases for abydos.phonetic.nysiis |
35
|
|
|
""" |
36
|
|
|
|
37
|
|
|
def test_nysiis(self): |
38
|
|
|
"""Test abydos.phonetic.nysiis.""" |
39
|
|
|
self.assertEqual(nysiis(''), '') |
40
|
|
|
|
41
|
|
|
# http://coryodaniel.com/index.php/2009/12/30/ruby-nysiis-implementation/ |
42
|
|
|
self.assertEqual(nysiis('O\'Daniel'), 'ODANAL') |
43
|
|
|
self.assertEqual(nysiis('O\'Donnel'), 'ODANAL') |
44
|
|
|
self.assertEqual(nysiis('Cory'), 'CARY') |
45
|
|
|
self.assertEqual(nysiis('Corey'), 'CARY') |
46
|
|
|
self.assertEqual(nysiis('Kory'), 'CARY') |
47
|
|
|
|
48
|
|
|
# http://ntz-develop.blogspot.com/2011/03/phonetic-algorithms.html |
49
|
|
|
self.assertEqual(nysiis('Diggell'), 'DAGAL') |
50
|
|
|
self.assertEqual(nysiis('Dougal'), 'DAGAL') |
51
|
|
|
self.assertEqual(nysiis('Doughill'), 'DAGAL') |
52
|
|
|
self.assertEqual(nysiis('Dougill'), 'DAGAL') |
53
|
|
|
self.assertEqual(nysiis('Dowgill'), 'DAGAL') |
54
|
|
|
self.assertEqual(nysiis('Dugall'), 'DAGAL') |
55
|
|
|
self.assertEqual(nysiis('Dugall'), 'DAGAL') |
56
|
|
|
self.assertEqual(nysiis('Glinde'), 'GLAND') |
57
|
|
|
self.assertEqual(nysiis('Plumridge', max_length=20), 'PLANRADG') |
58
|
|
|
self.assertEqual(nysiis('Chinnick'), 'CANAC') |
59
|
|
|
self.assertEqual(nysiis('Chinnock'), 'CANAC') |
60
|
|
|
self.assertEqual(nysiis('Chinnock'), 'CANAC') |
61
|
|
|
self.assertEqual(nysiis('Chomicki'), 'CANAC') |
62
|
|
|
self.assertEqual(nysiis('Chomicz'), 'CANAC') |
63
|
|
|
self.assertEqual(nysiis('Schimek'), 'SANAC') |
64
|
|
|
self.assertEqual(nysiis('Shimuk'), 'SANAC') |
65
|
|
|
self.assertEqual(nysiis('Simak'), 'SANAC') |
66
|
|
|
self.assertEqual(nysiis('Simek'), 'SANAC') |
67
|
|
|
self.assertEqual(nysiis('Simic'), 'SANAC') |
68
|
|
|
self.assertEqual(nysiis('Sinnock'), 'SANAC') |
69
|
|
|
self.assertEqual(nysiis('Sinnocke'), 'SANAC') |
70
|
|
|
self.assertEqual(nysiis('Sunnex'), 'SANAX') |
71
|
|
|
self.assertEqual(nysiis('Sunnucks'), 'SANAC') |
72
|
|
|
self.assertEqual(nysiis('Sunock'), 'SANAC') |
73
|
|
|
self.assertEqual(nysiis('Webberley', max_length=20), 'WABARLY') |
74
|
|
|
self.assertEqual(nysiis('Wibberley', max_length=20), 'WABARLY') |
75
|
|
|
|
76
|
|
|
# etc. (for code coverage) |
77
|
|
|
self.assertEqual(nysiis('Alpharades'), 'ALFARA') |
78
|
|
|
self.assertEqual(nysiis('Aschenputtel'), 'ASANPA') |
79
|
|
|
self.assertEqual(nysiis('Beverly'), 'BAFARL') |
80
|
|
|
self.assertEqual(nysiis('Hardt'), 'HARD') |
81
|
|
|
self.assertEqual(nysiis('acknowledge'), 'ACNALA') |
82
|
|
|
self.assertEqual(nysiis('MacNeill'), 'MCNAL') |
83
|
|
|
self.assertEqual(nysiis('MacNeill'), nysiis('McNeill')) |
84
|
|
|
self.assertEqual(nysiis('Knight'), 'NAGT') |
85
|
|
|
self.assertEqual(nysiis('Knight'), nysiis('Night')) |
86
|
|
|
self.assertEqual(nysiis('Pfarr'), 'FAR') |
87
|
|
|
self.assertEqual(nysiis('Phair'), 'FAR') |
88
|
|
|
self.assertEqual(nysiis('Phair'), nysiis('Pfarr')) |
89
|
|
|
self.assertEqual(nysiis('Cherokee'), 'CARACY') |
90
|
|
|
self.assertEqual(nysiis('Iraq'), 'IRAG') |
91
|
|
|
|
92
|
|
|
# max_length bounds tests |
93
|
|
|
self.assertEqual(nysiis('Niall', max_length=-1), 'NAL') |
94
|
|
|
self.assertEqual(nysiis('Niall', max_length=0), 'NAL') |
95
|
|
|
|
96
|
|
|
def test_modified_nysiis(self): |
97
|
|
|
"""Test abydos.phonetic.nysiis (modified version).""" |
98
|
|
|
self.assertEqual(nysiis('', max_length=-1, modified=True), |
99
|
|
|
'') |
100
|
|
|
|
101
|
|
|
# https://naldc.nal.usda.gov/download/27833/PDF |
102
|
|
|
# Some of these were... wrong... and have been corrected |
103
|
|
|
self.assertEqual(nysiis('Daves', max_length=8, modified=True), 'DAV') |
104
|
|
|
self.assertEqual(nysiis('Davies', max_length=8, modified=True), 'DAVY') |
105
|
|
|
self.assertEqual(nysiis('Devies', max_length=8, modified=True), 'DAFY') |
106
|
|
|
self.assertEqual(nysiis('Divish', max_length=8, modified=True), |
107
|
|
|
'DAVAS') |
108
|
|
|
self.assertEqual(nysiis('Dove', max_length=8, modified=True), 'DAV') |
109
|
|
|
self.assertEqual(nysiis('Devese', max_length=8, modified=True), |
110
|
|
|
'DAFAS') |
111
|
|
|
self.assertEqual(nysiis('Devies', max_length=8, modified=True), 'DAFY') |
112
|
|
|
self.assertEqual(nysiis('Devos', max_length=8, modified=True), 'DAF') |
113
|
|
|
|
114
|
|
|
self.assertEqual(nysiis('Schmit', max_length=8, modified=True), 'SNAT') |
115
|
|
|
self.assertEqual(nysiis('Schmitt', max_length=8, modified=True), |
116
|
|
|
'SNAT') |
117
|
|
|
self.assertEqual(nysiis('Schmitz', max_length=8, modified=True), |
118
|
|
|
'SNAT') |
119
|
|
|
self.assertEqual(nysiis('Schmoutz', max_length=8, modified=True), |
120
|
|
|
'SNAT') |
121
|
|
|
self.assertEqual(nysiis('Schnitt', max_length=8, modified=True), |
122
|
|
|
'SNAT') |
123
|
|
|
self.assertEqual(nysiis('Smit', max_length=8, modified=True), 'SNAT') |
124
|
|
|
self.assertEqual(nysiis('Smite', max_length=8, modified=True), 'SNAT') |
125
|
|
|
self.assertEqual(nysiis('Smits', max_length=8, modified=True), 'SNAT') |
126
|
|
|
self.assertEqual(nysiis('Smoot', max_length=8, modified=True), 'SNAT') |
127
|
|
|
self.assertEqual(nysiis('Smuts', max_length=8, modified=True), 'SNAT') |
128
|
|
|
self.assertEqual(nysiis('Sneath', max_length=8, modified=True), 'SNAT') |
129
|
|
|
self.assertEqual(nysiis('Smyth', max_length=8, modified=True), 'SNAT') |
130
|
|
|
self.assertEqual(nysiis('Smithy', max_length=8, modified=True), |
131
|
|
|
'SNATY') |
132
|
|
|
self.assertEqual(nysiis('Smithey', max_length=8, modified=True), |
133
|
|
|
'SNATY') |
134
|
|
|
|
135
|
|
|
# http://www.dropby.com/NYSIISTextStrings.html |
136
|
|
|
# Some of these have been altered since the above uses a different set |
137
|
|
|
# of modifications. |
138
|
|
|
self.assertEqual(nysiis('Edwards', max_length=8, modified=True), |
139
|
|
|
'EDWAD') |
140
|
|
|
self.assertEqual(nysiis('Perez', max_length=8, modified=True), 'PAR') |
141
|
|
|
self.assertEqual(nysiis('Macintosh', max_length=8, modified=True), |
142
|
|
|
'MCANTAS') |
143
|
|
|
self.assertEqual(nysiis('Phillipson', max_length=8, modified=True), |
144
|
|
|
'FALAPSAN') |
145
|
|
|
self.assertEqual(nysiis('Haddix', max_length=8, modified=True), |
146
|
|
|
'HADAC') |
147
|
|
|
self.assertEqual(nysiis('Essex', max_length=8, modified=True), 'ESAC') |
148
|
|
|
self.assertEqual(nysiis('Moye', max_length=8, modified=True), 'MY') |
149
|
|
|
self.assertEqual(nysiis('McKee', max_length=8, modified=True), 'MCY') |
150
|
|
|
self.assertEqual(nysiis('Mackie', max_length=8, modified=True), 'MCY') |
151
|
|
|
self.assertEqual(nysiis('Heitschmidt', max_length=8, modified=True), |
152
|
|
|
'HATSNAD') |
153
|
|
|
self.assertEqual(nysiis('Bart', max_length=8, modified=True), 'BAD') |
154
|
|
|
self.assertEqual(nysiis('Hurd', max_length=8, modified=True), 'HAD') |
155
|
|
|
self.assertEqual(nysiis('Hunt', max_length=8, modified=True), 'HAN') |
156
|
|
|
self.assertEqual(nysiis('Westerlund', max_length=8, modified=True), |
157
|
|
|
'WASTARLA') |
158
|
|
|
self.assertEqual(nysiis('Evers', max_length=8, modified=True), 'EVAR') |
159
|
|
|
self.assertEqual(nysiis('Devito', max_length=8, modified=True), |
160
|
|
|
'DAFAT') |
161
|
|
|
self.assertEqual(nysiis('Rawson', max_length=8, modified=True), |
162
|
|
|
'RASAN') |
163
|
|
|
self.assertEqual(nysiis('Shoulders', max_length=8, modified=True), |
164
|
|
|
'SALDAR') |
165
|
|
|
self.assertEqual(nysiis('Leighton', max_length=8, modified=True), |
166
|
|
|
'LATAN') |
167
|
|
|
self.assertEqual(nysiis('Wooldridge', max_length=8, modified=True), |
168
|
|
|
'WALDRAG') |
169
|
|
|
self.assertEqual(nysiis('Oliphant', max_length=8, modified=True), |
170
|
|
|
'OLAFAN') |
171
|
|
|
self.assertEqual(nysiis('Hatchett', max_length=8, modified=True), |
172
|
|
|
'HATCAT') |
173
|
|
|
self.assertEqual(nysiis('McKnight', max_length=8, modified=True), |
174
|
|
|
'MCNAT') |
175
|
|
|
self.assertEqual(nysiis('Rickert', max_length=8, modified=True), |
176
|
|
|
'RACAD') |
177
|
|
|
self.assertEqual(nysiis('Bowman', max_length=8, modified=True), |
178
|
|
|
'BANAN') |
179
|
|
|
self.assertEqual(nysiis('Vasquez', max_length=8, modified=True), |
180
|
|
|
'VASG') |
181
|
|
|
self.assertEqual(nysiis('Bashaw', max_length=8, modified=True), 'BAS') |
182
|
|
|
self.assertEqual(nysiis('Schoenhoeft', max_length=8, modified=True), |
183
|
|
|
'SANAFT') |
184
|
|
|
self.assertEqual(nysiis('Heywood', max_length=8, modified=True), 'HAD') |
185
|
|
|
self.assertEqual(nysiis('Hayman', max_length=8, modified=True), |
186
|
|
|
'HANAN') |
187
|
|
|
self.assertEqual(nysiis('Seawright', max_length=8, modified=True), |
188
|
|
|
'SARAT') |
189
|
|
|
self.assertEqual(nysiis('Kratzer', max_length=8, modified=True), |
190
|
|
|
'CRATSAR') |
191
|
|
|
self.assertEqual(nysiis('Canaday', max_length=8, modified=True), |
192
|
|
|
'CANADY') |
193
|
|
|
self.assertEqual(nysiis('Crepeau', max_length=8, modified=True), |
194
|
|
|
'CRAP') |
195
|
|
|
|
196
|
|
|
# Additional tests from @Yomguithereal's talisman |
197
|
|
|
# https://github.com/Yomguithereal/talisman/blob/master/test/phonetics/nysiis.js |
198
|
|
|
self.assertEqual(nysiis('Andrew', max_length=8, modified=True), 'ANDR') |
199
|
|
|
self.assertEqual(nysiis('Robertson', max_length=8, modified=True), |
200
|
|
|
'RABARTSA') |
201
|
|
|
self.assertEqual(nysiis('Nolan', max_length=8, modified=True), 'NALAN') |
202
|
|
|
self.assertEqual(nysiis('Louis XVI', max_length=8, modified=True), |
203
|
|
|
'LASXV') |
204
|
|
|
self.assertEqual(nysiis('Case', max_length=8, modified=True), 'CAS') |
205
|
|
|
self.assertEqual(nysiis('Mclaughlin', max_length=8, modified=True), |
206
|
|
|
'MCLAGLAN') |
207
|
|
|
self.assertEqual(nysiis('Awale', max_length=8, modified=True), 'AL') |
208
|
|
|
self.assertEqual(nysiis('Aegir', max_length=8, modified=True), 'AGAR') |
209
|
|
|
self.assertEqual(nysiis('Lundgren', max_length=8, modified=True), |
210
|
|
|
'LANGRAN') |
211
|
|
|
self.assertEqual(nysiis('Philbert', max_length=8, modified=True), |
212
|
|
|
'FALBAD') |
213
|
|
|
self.assertEqual(nysiis('Harry', max_length=8, modified=True), 'HARY') |
214
|
|
|
self.assertEqual(nysiis('Mackenzie', max_length=8, modified=True), |
215
|
|
|
'MCANSY') |
216
|
|
|
|
217
|
|
|
# max_length bounds tests |
218
|
|
|
self.assertEqual(nysiis('Niall', max_length=-1, modified=True), 'NAL') |
219
|
|
|
self.assertEqual(nysiis('Niall', max_length=0, modified=True), 'NAL') |
220
|
|
|
|
221
|
|
|
# coverage |
222
|
|
|
self.assertEqual(nysiis('Sam Jr.', modified=True), 'ERROR') |
223
|
|
|
self.assertEqual(nysiis('John Sr.', modified=True), 'ERROR') |
224
|
|
|
self.assertEqual(nysiis('Wright', modified=True), 'RAT') |
225
|
|
|
self.assertEqual(nysiis('Rhodes', modified=True), 'RAD') |
226
|
|
|
self.assertEqual(nysiis('Dgagoda', modified=True), 'GAGAD') |
227
|
|
|
self.assertEqual(nysiis('Bosch', modified=True), 'BAS') |
228
|
|
|
self.assertEqual(nysiis('Schrader', modified=True), 'SRADAR') |
229
|
|
|
|
230
|
|
|
|
231
|
|
|
if __name__ == '__main__': |
232
|
|
|
unittest.main() |
233
|
|
|
|