Completed
Branch master (87ccc1)
by Chris
10:18
created

tests.phonetic.test_phonetic_dolby.DolbyTestCases.test_dolby()   B

Complexity

Conditions 4

Size

Total Lines 570
Code Lines 527

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 527
nop 1
dl 0
loc 570
rs 7
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
"""abydos.tests.test_phonetic_dolby.
20
21
This module contains unit tests for abydos.phonetic.dolby
22
"""
23
24
from __future__ import unicode_literals
25
26
import unittest
27
28
from abydos.phonetic.dolby import dolby
29
30
31
class DolbyTestCases(unittest.TestCase):
32
    """Test Dolby functions.
33
34
    test cases for abydos.phonetic.dolby
35
    """
36
37
    def test_dolby(self):
38
        """Test abydos.phonetic.dolby."""
39
        # Base case
40
        self.assertEqual(dolby(''), '')
41
42
        # Tests from Dolby (1970) pp. 264--274
43
        # https://ejournals.bc.edu/ojs/index.php/ital/article/download/5259/4734
44
        # Checked against Cunningham, et al. (1969) pp. 127--136, as needed.
45
        # https://files.eric.ed.gov/fulltext/ED029679.pdf
46
        test_cases = (('*BL', 'Abel', 'Abele', 'Abell', 'Able'),
47
                      ('*BRMS', 'Abrahams', 'Abrams'),
48
                      ('*BRMSN', 'Abrahamson', 'Abramson'),
49
                      ('*D', 'Eddy', 'Eddie'),
50
                      ('*DMNS', 'Edmonds', 'Edmunds'),
51
                      ('*DMNSN', 'Edmondson', 'Edmundson'),
52
                      ('*DMS', 'Adams', 'Addems'),
53
                      ('*GN', 'Eagen', 'Egan', 'Eggen'),
54
                      ('*GR', '!Jaeger', 'Yaeger', 'Yeager'),
55
                      ('*KN', 'Aiken', 'Aikin', 'Aitken'),
56
                      ('*KNS', 'Adkins', 'Akins'),
57
                      ('*KR', 'Acker', 'Aker'),
58
                      ('*KR', 'Eckard', 'Eckardt', 'Eckart', 'Eckert',
59
                       'Eckhardt'),
60
                      ('*KS', 'Oakes', 'Oaks', 'Ochs'),
61
                      ('*LBRD', 'Albright', 'Allbright'),
62
                      ('*LD', 'Elliot', 'Elliott'),
63
                      ('*LN', 'Allan', 'Allen', 'Allyn'),
64
                      ('*LSN', 'Ohlsen', 'Olesen', 'Olsen', 'Olson', 'Olsson'),
65
                      ('*LVR', 'Oliveira', 'Olivera', 'Olivero'),
66
                      ('*MS', 'Ames', 'Eames'),
67
                      ('*NGL', 'Engel', 'Engle', 'Ingle'),
68
                      ('*NL', 'O\'Neal', 'O\'Neil', 'O\'Neill'),
69
                      ('*NRS', 'Andrews', 'Andrus'),
70
                      ('*NRSN', 'Andersen', 'Anderson', 'Andreasen'),
71
                      ('*NS', 'Ennis', 'Enos'),
72
                      # Corrected: 'Enrichsen' below was an error
73
                      # (It's correct in Cunningham, et al. 1969.)
74
                      ('*RKSN', 'Erichsen', 'Erickson', 'Ericson', 'Ericsson',
75
                       'Eriksen'),
76
                      ('*RL', 'Earley', 'Early'),
77
                      ('*RN', 'Erwin', 'Irwin'),
78
                      ('*RNS', 'Aarons', 'Ahrends', 'Ahrens', 'Arens',
79
                       'Arentz', 'Arons'),
80
                      ('*RS', 'Ayers', 'Ayres'),
81
                      ('*RVN', 'Ervin', 'Ervine', 'Irvin', 'Irvine'),
82
                      ('*RVNG', 'Erving', 'Irving'),
83
                      ('*SBRN', 'Osborn', 'Osborne', 'Osbourne', 'Osburn'),
84
                      ('B*D', 'Beatie', 'Beattie', 'Beatty', 'Beaty',
85
                       'Beedie'),
86
                      ('B*DS', 'Betts', 'Betz'),
87
                      ('B*KMN', 'Bachman', 'Bachmann', 'Backman'),
88
                      ('B*L', 'Bailey', 'Baillie', 'Bailly', 'Baily',
89
                       'Bayley'),
90
                      ('B*L', 'Beal', 'Beale', 'Beall', 'Biehl'),
91
                      ('B*L', 'Belew', 'Ballou', 'Bellew'),
92
                      ('B*L', 'Buhl', 'Buell'),
93
                      ('B*L', 'Belle', 'Bell'),
94
                      # Corrected: No reason for D to disappear
95
                      ('B*LDN', 'Bolton', 'Boulton'),
96
                      ('B*M', 'Baum', 'Bohm', 'Bohme'),
97
                      ('B*MN', 'Bauman', 'Bowman'),
98
                      ('B*N', 'Bain', 'Bane', 'Bayne'),
99
                      ('B*ND', 'Bennet', 'Bennett'),
100
                      ('B*R', 'Baer', 'Bahr', 'Baier', 'Bair', 'Bare', 'Bear',
101
                       'Beare', 'Behr', 'Beier', 'Bier', '!Bryer'),
102
                      ('B*R', 'Barry', 'Beare', 'Beery', 'Berry'),
103
                      ('B*R', 'Bauer', 'Baur', 'Bower'),
104
                      ('B*R', 'Bird', 'Burd', 'Byrd'),
105
                      ('B*RBR', 'Barbour', 'Barber'),
106
                      ('B*RG', 'Berg', 'Bergh', 'Burge'),
107
                      ('B*RGR', 'Berger', 'Burger'),
108
                      ('B*RK', 'Boerke', 'Birk', 'Bourke', 'Burk', 'Burke'),
109
                      ('B*RN', 'Burn', 'Byrne'),
110
                      ('B*RNR', 'Bernard', 'Bernhard', 'Bernhardt',
111
                       'Bernhart'),
112
                      ('B*RNS', 'Berns', 'Birns', 'Burns', 'Byrns', 'Byrnes'),
113
                      ('B*RNSN', 'Bernstein', 'Bornstein'),
114
                      # Corrected: 'RCH' -> 'RH' in rule 2
115
                      ('B*RS', 'Bertsch', '!Birch', '!Burch'),
116
                      ('BL*KBRN', 'Blackburn', '!Blagburn'),
117
                      ('BL*M', 'Blom', 'Bloom', 'Bluhm', 'Blum', 'Blume'),
118
                      ('BR*D', 'Brode', 'Brodie', 'Brody'),
119
                      ('BR*N', 'Braun', 'Brown', 'Browne'),
120
                      ('BR*N', 'Brand', 'Brandt', 'Brant'),
121
                      # Corrected: 'Diezt' -> 'D*SD', so reversed zt -> tz
122
                      # (Correct in Cunningham, et al. 1969.)
123
                      ('D*DS', 'Dietz', 'Ditz'),
124
                      ('D*F', 'Duffie', 'Duffy'),
125
                      ('D*GN', 'Dougan', 'Dugan', 'Duggan'),
126
                      ('D*K', 'Dickey', 'Dicke'),
127
                      ('D*KNSN', 'Dickenson', '!Dickerson', 'Dickinson',
128
                       '!Dickison'),
129
                      ('D*KSN', 'Dickson', 'Dixon', 'Dixson'),
130
                      ('D*L', 'Dailey', 'Daily', 'Daley', 'Daly'),
131
                      ('D*L', 'Dahl', 'Dahle', 'Dall', 'Doll'),
132
                      ('D*L', 'Deahl', 'Deal', 'Diehl'),
133
                      ('D*MN', 'Diamond', 'Dimond', 'Dymond'),
134
                      ('D*N', 'Dean', 'Deane', 'Deen'),
135
                      ('D*N', 'Denney', 'Denny'),
136
                      ('D*N', 'Donahoo', 'Donahue', 'Donoho', 'Donohoe',
137
                       'Donohoo,'),
138
                      ('D*N', 'Donohue', 'Dunnahoo'),
139
                      ('D*N', 'Downey', 'Downie'),
140
                      ('D*N', 'Dunn', 'Dunne'),
141
                      ('D*NL', 'Donley', 'Donnelley', 'Donnelly'),
142
                      ('D*R', 'Daugherty', 'Doherty', 'Dougherty'),
143
                      ('D*R', 'Dyar', 'Dyer'),
144
                      ('D*RM', 'Derham', 'Durham'),
145
                      ('D*VDSN', 'Davidsen', 'Davidson', '!Davison'),
146
                      ('D*VS', 'Davies', 'Davis'),
147
                      ('DR*SL', 'Driscoll', 'Driskell'),
148
                      ('F*', 'Fay', 'Fahay', 'Fahey'),
149
                      ('F*FR', 'Fifer', 'Pfeffer', 'Pfeiffer'),
150
                      ('F*GN', 'Fagan', 'Feigan', 'Fegan'),
151
                      ('F*L', 'Feil', 'Pfeil'),
152
                      # Corrected: T -> D after LD -> D
153
                      ('F*L', 'Feld', 'Feldt', '!Felt'),
154
                      ('F*LKNR', 'Faulkner', 'Falconer'),
155
                      ('F*LPS', 'Philips', 'Phillips'),
156
                      ('F*NGN', 'Finnegan', 'Finnigan'),
157
                      ('F*NL', 'Finlay', 'Finley'),
158
                      ('F*RL', 'Farrell', 'Ferrell'),
159
                      ('F*RR', 'Ferrara', 'Ferreira', 'Ferriera'),
160
                      # Corrected: No reason for S to be eliminated
161
                      ('F*RSR', 'Foerster', 'Forester', 'Forrester',
162
                       'Forster'),
163
                      ('F*RS', 'Forrest', 'Forest'),
164
                      ('F*RS', 'Faris', 'Farriss', 'Ferris', 'Ferriss'),
165
                      ('F*RS', 'First', 'Fuerst', 'Furst'),
166
                      ('F*SR', 'Fischer', 'Fisher'),
167
                      ('FL*N', 'Flinn', 'Flynn'),
168
                      ('FL*NGN', 'Flanagan', 'Flanigan', 'Flannigan'),
169
                      ('FR*', 'Frei', 'Frey', 'Fry', 'Frye'),
170
                      ('FR*DMN', 'Freedman', 'Friedman'),
171
                      # Corrected: Fredickson -> Fredrickson
172
                      # (Correct in Cunningham, et al. 1969.)
173
                      ('FR*DRKSN', 'Frederickson', 'Frederiksen',
174
                       'Fredrickson', 'Fredriksson'),
175
                      # Corrected: NK would not reduce to K because of rule 4
176
                      ('FR*NK', 'Franck', 'Frank'),
177
                      ('FR*NS', 'France', 'Frantz', 'Franz'),
178
                      # Corrected: vowel deletion happens after double
179
                      # consonant deletion
180
                      ('FR*NSS', 'Frances', 'Francis'),
181
                      ('FR*S', 'Freeze', 'Freese', 'Fries'),
182
                      ('FR*SR', 'Fraser', 'Frasier', 'Frazer', 'Frazier'),
183
                      ('G*D', 'Good', 'Goode'),
184
                      ('G*DS', 'Getz', 'Goetz', 'Goetze'),
185
                      ('G*F', 'Goff', 'Gough'),
186
                      ('G*L', 'Gold', 'Goold', 'Gould'),
187
                      ('G*LMR', 'Gilmer', 'Gilmore', 'Gilmour'),
188
                      ('G*LR', 'Gallagher', 'Gallaher', 'Galleher'),
189
                      ('G*MS', 'Gomes', 'Gomez'),
190
                      ('G*NR', 'Guenther', 'Gunther'),
191
                      ('G*NSLS', 'Gonzales', 'Gonzalez'),
192
                      # Corrected: 'Consalves' for the first case likely a typo
193
                      # (Correct in Cunningham, et al. 1969.)
194
                      ('G*NSLVS', 'Gonsalves', 'Gonzalves'),
195
                      ('G*RD', 'Garratt', 'Garrett'),
196
                      ('G*RD', 'Garrity', 'Geraghty', 'Geraty', 'Gerrity'),
197
                      ('G*RN', 'Gorden', 'Gordohn', 'Gordon'),
198
                      ('G*RNR', 'Gardiner', 'Gardner', 'Gartner'),
199
                      ('G*RR', 'Garrard', 'Gerard', 'Gerrard', 'Girard'),
200
                      ('G*S', 'Gauss', 'Goss'),
201
                      ('GR*', 'Gray', 'Grey'),
202
                      ('GR*FD', 'Griffeth', 'Griffith'),
203
                      ('GR*N', 'Green', 'Greene'),
204
                      ('GR*S', 'Gros', 'Grose', 'Gross'),
205
                      ('H*D', 'Hyde', 'Heidt'),
206
                      ('H*F', 'Hoff', 'Hough', 'Huff'),
207
                      ('H*FMN', 'Hoffman', 'Hoffmann', 'Hofman', 'Hofmann',
208
                       'Huffman'),
209
                      ('H*G', 'Hoag', 'Hoge', 'Hogue'),
210
                      ('H*GN', 'Hagan', 'Hagen'),
211
                      ('H*K', 'Hauch', 'Hauck', 'Hauk', 'Hauke'),
212
                      ('H*KSN', 'Hutcheson', 'Hutchison'),
213
                      ('H*L', 'Holley', 'Holly'),
214
                      ('H*L', 'Holl', 'Hall'),
215
                      ('H*L', 'Halley', 'Haley'),
216
                      ('H*L', 'Haile', 'Hale'),
217
                      ('H*LD', 'Holiday', 'Halliday', 'Holladay', 'Holliday'),
218
                      ('H*LG', 'Helwig', 'Hellwig'),
219
                      ('H*LM', 'Holm', '!Home'),
220
                      ('H*LMS', 'Holmes', '!Homes'),
221
                      ('H*LN', 'Highland', 'Hyland'),
222
                      ('H*M', 'Ham', 'Hamm'),
223
                      ('H*MR', 'Hammar', 'Hammer'),
224
                      ('H*N', 'Hanna', 'Hannah'),
225
                      ('H*N', 'Hahn', 'Hahne', 'Hann', 'Haun'),
226
                      ('H*NN', 'Hanan', 'Hannan', 'Hannon'),
227
                      ('H*NRKS', 'Hendricks', 'Hendrix', 'Henriques'),
228
                      ('H*NRKSN', 'Hendrickson', 'Henriksen', 'Henrikson'),
229
                      ('H*NS', 'Heintz', 'Heinz', 'Heinze', 'Hindes', 'Hinds',
230
                       'Hines', 'Hinze'),
231
                      ('H*NS', 'Haines', 'Haynes'),
232
                      ('H*NSN', 'Henson', 'Hansen', 'Hanson', 'Hanssen',
233
                       'Hansson', 'Hanszen'),
234
                      ('H*R', 'Herd', 'Heard', 'Hird', 'Hurd'),
235
                      ('H*R', 'Hart', 'Hardt', 'Harte', 'Heart'),
236
                      ('H*R', 'Hare', 'Hair'),
237
                      ('H*R', 'Hardey', 'Hardie', 'Hardy'),
238
                      ('H*RMN', 'Hartman', 'Hardmen', 'Hardman', 'Hartmann'),
239
                      ('H*RMN', 'Herman', 'Hermann', 'Herrmann'),
240
                      ('H*RMN', 'Harman', 'Harmon'),
241
                      ('H*RN', 'Heron', 'Herrin', 'Herron'),
242
                      ('H*RN', 'Hardin', 'Harden'),
243
                      ('H*RN', 'Horn', 'Horne'),
244
                      ('H*RNGDN', 'Herrington', 'Harrington'),
245
                      ('H*S', 'Haas', 'Haase', 'Hasse'),
246
                      ('H*S', 'Howes', 'House', 'Howse'),
247
                      ('H*S', 'Hays', 'Hayes'),
248
                      ('H*SN', 'Houston', 'Huston'),
249
                      ('H*VR', 'Hoover', 'Hover'),
250
                      ('J*', 'Jew', 'Jue'),
251
                      ('J*FR', 'Jeffery', 'Jeffrey'),
252
                      ('J*FRS', 'Jefferies', 'Jefferis', 'Jefferys',
253
                       'Jeffreys'),
254
                      ('J*KB', 'Jacobi', 'Jacoby'),
255
                      ('J*KBSN', 'Jacobsen', 'Jacobson', 'Jackobsen'),
256
                      ('J*KS', 'Jacques', 'Jacks', 'Jaques'),
257
                      ('J*L', 'Jewell', 'Juhl'),
258
                      ('J*MS', 'Jaimes', 'James'),
259
                      ('J*MSN', 'Jameson', 'Jamieson', 'Jamison'),
260
                      ('J*NSN', 'Jahnsen', 'Jansen', 'Jansohn', 'Janssen',
261
                       'Jansson,', 'Janzen', 'Jensen', 'Jenson'),
262
                      ('J*S', 'Joice', 'Joyce'),
263
                      ('K*', 'Kay', 'Kaye'),
264
                      ('K*F', 'Coffee', 'Coffey'),
265
                      ('K*FMN', 'Coffman', 'Kauffman', 'Kaufman', 'Kaufmann'),
266
                      ('K*K', 'Cook', 'Cooke', 'Koch', 'Koche'),
267
                      ('K*K', 'Cook', 'Cooke', 'Koch', 'Koche'),
268
                      ('K*L', 'Cole', 'Kohl', 'Koll'),
269
                      ('K*L', 'Cole', 'Kohl', 'Koll'),
270
                      ('K*L', 'Kelley', 'Kelly'),
271
                      ('K*LMN', 'Coleman', 'Colman'),
272
                      ('K*LR', 'Koehler', 'Koeller', 'Kohler', 'Koller'),
273
                      ('K*MBRLN', 'Chamberlain', 'Chamberlin'),
274
                      ('K*MBS', 'Combs', 'Coombes', 'Coombs'),
275
                      ('K*MP', 'Camp', 'Kampe', 'Kampf'),
276
                      ('K*MPS', 'Campos', 'Campus'),
277
                      ('K*N', 'Cahn', 'Conn', 'Kahn'),
278
                      ('K*N', 'Cahen', 'Cain', 'Caine', 'Cane', 'Kain',
279
                       'Kane'),
280
                      ('K*N', 'Chin', 'Chinn'),
281
                      ('K*N', 'Chaney', 'Cheney'),
282
                      ('K*N', 'Coen', 'Cohan', 'Cohen', 'Cohn', 'Cone',
283
                       'Koehn', 'Kahn'),
284
                      ('K*N', 'Coon', 'Kuhn', 'Kuhne'),
285
                      ('K*N', 'Kenney', 'Kenny', 'Kinney'),
286
                      ('K*NL', 'Conley', 'Conly', 'Connelly', 'Connolly'),
287
                      ('K*NR', 'Conner', 'Connor'),
288
                      ('K*NS', 'Coons', 'Koontz', 'Kuhns', 'Kuns', 'Kuntz',
289
                       'Kunz'),
290
                      ('K*P', 'Coop', 'Co-op', 'Coope', 'Coupe', 'Koop'),
291
                      ('K*PL', 'Chapel', 'Chapell', 'Chappel', 'Chappell',
292
                       'Chappelle', 'Chapple'),
293
                      ('K*R', 'Carrie', 'Carey', 'Cary'),
294
                      ('K*R', 'Corey', 'Cory'),
295
                      ('K*R', 'Carr', 'Kar', 'Karr'),
296
                      # Corrected: No reason to strip S
297
                      ('K*RS', 'Kurtz', 'Kurz'),
298
                      ('K*R', 'Kehr', 'Ker', 'Kerr'),
299
                      ('K*RD', 'Cartwright', 'Cortright'),
300
                      # Corrected: No reason to strip D
301
                      ('K*RLDN', 'Carleton', 'Carlton'),
302
                      # Corrected: CE -> SE
303
                      ('K*RN', 'Carney', '!Cerney', 'Kearney'),
304
                      # Corrected: RC -> R
305
                      ('K*RSNR', 'Kirschner', '!Kirchner'),
306
                      ('K*S', 'Chace', 'Chase'),
307
                      ('K*S', 'Cass', 'Kass'),
308
                      ('K*S', 'Kees', 'Keyes', 'Keys'),
309
                      ('K*SL', 'Cassel', 'Cassell', 'Castle'),
310
                      ('K*SLR', 'Kesler', 'Kessler', 'Kestler'),
311
                      ('K*SR', 'Kaiser', 'Kayser', 'Keizer', 'Keyser',
312
                       'Kieser', 'Kiser', 'Kizer'),
313
                      ('KL*N', 'Cline', 'Klein', 'Kleine', 'Kline'),
314
                      ('KL*RK', 'Clark', 'Clarke'),
315
                      ('KL*SN', 'Claussen', 'Clausen', 'Clawson', 'Closson'),
316
                      ('KR*', 'Crow', 'Crowe'),
317
                      ('KR*GR', 'Krieger', 'Kroeger', 'Krueger', 'Kruger'),
318
                      ('KR*MR', 'Creamer', 'Cramer', 'Kraemer', 'Kramer',
319
                       'Kremer'),
320
                      ('KR*N', 'Craine', 'Crane'),
321
                      ('KR*S', 'Christie', 'Christy', 'Kristee'),
322
                      ('KR*S', 'Crouss', 'Kraus', 'Krausch', 'Krause',
323
                       'Krouse'),
324
                      ('KR*S', 'Cross', 'Krost'),
325
                      ('KR*S', 'Crews', 'Cruz', 'Kruse'),
326
                      ('KR*SNSN', 'Christensen', 'Christiansen',
327
                       'Christianson'),
328
                      ('L*', 'Loe', 'Loewe', 'Low', 'Lowe'),
329
                      ('L*', 'Lea', 'Lee', '!Leigh'),
330
                      ('L*D', 'Lloyd', 'Loyd'),
331
                      ('L*DL', 'Litle', 'Littell', 'Little', 'Lytle'),
332
                      ('L*DRMN', 'Ledterman', 'Letterman'),
333
                      ('L*K', 'Leach', 'Leech', 'Leitch'),
334
                      ('L*KS', 'Lucas', 'Lukas'),
335
                      ('L*LN', 'Laughlin', 'Loughlin'),
336
                      ('L*LR', 'Lawler', 'Lawlor'),
337
                      ('L*MB', 'Lamb', '!Lamm'),
338
                      ('L*MN', 'Lemen', 'Lemmon', 'Lemon'),
339
                      ('L*MN', 'Layman', 'Lehman', 'Lehmann'),
340
                      ('L*N', 'Lind', 'Lynd', 'Lynde'),
341
                      ('L*N', 'Lion', 'Lyon'),
342
                      ('L*N', 'Lin', 'Linn', 'Lynn', 'Lynne'),
343
                      # Corrected: NG -> NG (!N)
344
                      ('L*N', 'Lain', 'Laine', '!Laing', 'Lane', 'Layne'),
345
                      ('L*NG', 'Lang', 'Lange'),
346
                      ('L*NN', 'London', 'Lundin'),
347
                      ('L*NS', 'Lindsay', 'Lindsey', '!Lindsley', '!Linsley'),
348
                      ('L*R', 'Lawry', 'Lowery', 'Lowrey', 'Lowry'),
349
                      ('L*RNS', 'Lawrence', 'Lowrance'),
350
                      ('L*RNS', 'Laurence', 'Lawrance', 'Lawrence', 'Lorence',
351
                       'Lorenz'),
352
                      ('L*RSN', 'Larsen', 'Larson'),
353
                      ('L*S', 'Lewis', 'Louis', 'Luis', 'Luiz'),
354
                      ('L*S', 'Lacey', 'Lacy'),
355
                      ('L*SR', '!Leicester', 'Lester'),
356
                      ('L*V', 'Levey', 'Levi', 'Levy'),
357
                      ('L*VD', 'Leavett', 'Leavitt', 'Levit'),
358
                      ('L*VL', 'Lavell', 'Lavelle', 'Leavelle', 'Loveall',
359
                       'Lovell'),
360
                      ('L*VN', 'Lavin', 'Levin', 'Levine'),
361
                      ('M*D', 'Mead', 'Meade'),
362
                      # Corrected: RT*N -> R*N -> RN
363
                      ('M*RN', '!Moretton', 'Morton'),
364
                      ('M*DS', 'Mathews', 'Matthews'),
365
                      ('M*DSN', 'Madison', 'Madsen', 'Matson', 'Matteson',
366
                       'Mattison', 'Mattson'),
367
                      ('M*KL', 'Michael', 'Michel'),
368
                      ('M*KM', 'Meacham', 'Mechem'),
369
                      # Corrected: RQ*S -> RKS, not KS
370
                      ('M*RKS', 'Marques', 'Marquez', 'Marquis', 'Marquiss'),
371
                      # Corrected: RKS does not compress to KS
372
                      ('M*RKS', 'Marcks', 'Marks', 'Marx'),
373
                      ('M*LN', 'Maloney', 'Moloney', 'Molony'),
374
                      ('M*LN', 'Mullan', 'Mullen', 'Mullin'),
375
                      ('M*LR', 'Mallery', 'Mallory'),
376
                      ('M*LR', 'Moeller', 'Moller', 'Mueller', 'Muller'),
377
                      ('M*LR', 'Millar', 'Miller'),
378
                      ('M*LS', 'Miles', 'Myles'),
379
                      ('M*N', 'Mahan', 'Mann'),
380
                      ('M*NR', 'Miner', 'Minor'),
381
                      ('M*NR', 'Monroe', 'Munro'),
382
                      ('M*NSN', 'Monson', 'Munson'),
383
                      ('M*R', 'Murray', 'Murrey'),
384
                      ('M*R', 'Maher', 'Maier', 'Mayer'),
385
                      ('M*R', 'Mohr', 'Moor', 'Moore'),
386
                      # Corrected: No reason to eliminate final S
387
                      ('M*RS', 'Meyers', 'Myers'),
388
                      ('M*R', 'Meier', 'Meyer', 'Mieir', 'Myhre'),
389
                      ('M*RF', 'Murphey', 'Murphy'),
390
                      ('M*RL', 'Merrell', 'Merrill'),
391
                      ('M*RN', 'Marten', 'Martin', 'Martine', 'Martyn'),
392
                      ('M*RS', 'Meyers', 'Myers'),
393
                      ('M*RS', 'Maurice', 'Morris', 'Morse'),
394
                      ('MK*', 'McCoy', 'McCaughey'),
395
                      ('MK*', 'Magee', 'McGee', 'McGehee', 'McGhie'),
396
                      ('MK*', 'Mackey', 'MacKay', 'Mackie', 'McKay'),
397
                      ('MK*', 'McCue', '!McHugh'),
398
                      ('MK*L', 'Magill', 'McGill'),
399
                      ('MK*LF', 'McCollough', '!McCullah', 'McCullough'),
400
                      ('MK*LM', 'McCallum', 'McCollum', 'McColm'),
401
                      ('MK*N', 'McKenney', 'McKinney'),
402
                      ('MK*NR', 'Macintyre', 'McEntire', 'Mcintire',
403
                       'Mcintyre'),
404
                      ('MK*NS', 'MacKenzie', 'McKenzie'),
405
                      ('MK*NS', 'Maginnis', 'McGinnis', 'McGuinness',
406
                       'Mcinnes', 'Mcinnis'),
407
                      ('MK*R', 'Maguire', 'McGuire'),
408
                      ('MK*R', 'McCarthy', 'McCarty'),
409
                      ('MKD*NL', 'MacDonald', 'McDonald', 'McDonnell'),
410
                      ('MKF*RLN', 'MacFarland', 'MacFarlane', 'McFarland',
411
                       'McFarlane'),
412
                      ('MKF*RSN', 'MacPherson', 'McPherson'),
413
                      ('MKL*D', 'MacLeod', 'McCloud', 'McLeod'),
414
                      ('MKL*KLN', 'MacLachlan', 'Maclachlin', 'McLachlan',
415
                       '!McLaughlin,', '!McLoughlin'),
416
                      ('MKL*LN', 'McClellan', 'McClelland', 'McLellan'),
417
                      ('MKL*N', 'McClain', 'McClaine', 'McLain', 'McLane'),
418
                      ('MKL*N', 'MacLean', 'McClean', 'McLean'),
419
                      ('MKL*S', 'McCloskey', 'McClosky', 'McCluskey'),
420
                      ('MKM*LN', 'MacMillan', 'McMillan', 'McMillin'),
421
                      ('MKN*L', 'MacNeal', 'McNeal', 'McNeil', 'McNeill'),
422
                      ('MKR*D', 'Magrath', 'McGrath'),
423
                      ('N*KL', 'Nichol', 'Nicholl', 'Nickel', 'Nickle',
424
                       'Nicol', 'Nicoll'),
425
                      ('N*KLS', 'Nicholls', 'Nichols', 'Nickels', 'Nickles',
426
                       'Nicols'),
427
                      ('N*KLS', 'Nicholas', 'Nicolas'),
428
                      ('N*KLSN', 'Nicholsen', 'Nicholson', 'Nicolaisen',
429
                       'Nicolson'),
430
                      ('N*KSN', 'Nickson', 'Nixon'),
431
                      ('N*L', 'Neal', 'Neale', 'Neall', 'Neel', 'Neil',
432
                       'Neill'),
433
                      ('N*LSN', 'Neilsen', 'Neilson', 'Nelsen', 'Nelson',
434
                       'Nielsen', 'Nielson,', 'Nilson', 'Nilssen', 'Nilsson'),
435
                      ('N*MN', 'Neumann', 'Newman'),
436
                      ('N*RS', 'Norris', 'Nourse'),
437
                      ('N*SBD', 'Nesbit', 'Nesbitt', 'Nisbet'),
438
                      ('P*D', 'Pettee', 'Petty'),
439
                      ('P*DRSN', 'Peterson', 'Pederson', 'Pedersen',
440
                       'Petersen', 'Petterson'),
441
                      ('P*G', 'Page', 'Paige'),
442
                      ('P*LK', 'Polak', 'Pollack', 'Pollak', 'Pollock'),
443
                      ('P*LSN', 'Polson', 'Paulsen', 'Paulson', 'Poulsen',
444
                       'Poulsson'),
445
                      ('P*N', 'Paine', 'Payn', 'Payne'),
446
                      ('P*R', 'Parry', 'Perry'),
447
                      ('P*R', 'Parr', 'Paar'),
448
                      ('P*RK', 'Park', 'Parke'),
449
                      ('P*RKS', 'Parks', 'Parkes'),
450
                      # Corrected: RC -> R
451
                      ('P*R', 'Pierce', 'Pearce', 'Peirce', '!Piers'),
452
                      ('P*RS', 'Parish', 'Parrish'),
453
                      ('P*RS', 'Paris', 'Parris'),
454
                      ('P*RSN', 'Pierson', 'Pearson', 'Pehrson', 'Peirson'),
455
                      ('PR*KR', 'Prichard', 'Pritchard'),
456
                      ('PR*NS', 'Prince', 'Prinz'),
457
                      ('PR*R', 'Prior', 'Pryor'),
458
                      ('R*', 'Roe', 'Rowe'),
459
                      ('R*', 'Rae', 'Ray', 'Raye', 'Rea', 'Rey', 'Wray'),
460
                      ('R*BNSN', 'Robinson', '!Robison'),
461
                      ('R*D', 'Rothe', 'Roth'),
462
                      ('R*D', 'Rudd', 'Rood', 'Rude'),
463
                      ('R*D', 'Reed', 'Read', 'Reade', 'Reid'),
464
                      ('R*DR', 'Rider', 'Ryder'),
465
                      ('R*DS', 'Rhoades', 'Rhoads', 'Rhodes'),
466
                      ('R*GN', 'Regan', 'Ragon', 'Reagan'),
467
                      # Corrected: No reason to drop final S
468
                      ('R*GRS', 'Rodgers', 'Rogers'),
469
                      ('R*K', 'Richey', 'Ritchey', 'Ritchie'),
470
                      ('R*K', 'Reich', 'Reiche'),
471
                      ('R*KR', 'Reichardt', 'Richert', 'Rickard'),
472
                      ('R*L', 'Reilley', 'Reilly', 'Reilli', 'Riley'),
473
                      # Corrected: T -> D
474
                      ('R*MNGDN', 'Remington', 'Rimington'),
475
                      ('R*MR', 'Reamer', 'Reimer', 'Riemer', 'Rimmer'),
476
                      ('R*MS', 'Ramsay', 'Ramsey'),
477
                      ('R*N', 'Rhein', 'Rhine', 'Ryan'),
478
                      ('R*NR', 'Reinhard', 'Reinhardt', 'Reinhart',
479
                       'Rhinehart', 'Rinehart'),
480
                      ('R*S', 'Reas', 'Reece', 'Rees', 'Reese', 'Reis',
481
                       'Reiss', 'Ries'),
482
                      ('R*S', '!Rauch', 'Rausch', '!Roach', '!Roche', 'Roush'),
483
                      ('R*S', 'Rush', 'Rusch'),
484
                      ('R*S', 'Russ', 'Rus'),
485
                      ('R*VS', 'Reaves', 'Reeves'),
486
                      ('S*BR', 'Seibert', 'Siebert'),
487
                      ('S*FL', 'Schofield', 'Scofield'),
488
                      ('S*FN', 'Stefan', 'Steffan', 'Steffen', 'Stephan',
489
                       'Stephen'),
490
                      ('S*FNS', 'Steffens', 'Stephens', '!Stevens'),
491
                      ('S*FNSN', 'Steffensen', 'Steffenson', 'Stephenson',
492
                       '!Stevenson'),
493
                      ('S*FR', 'Schaefer', 'Schaeffer', 'Schafer', 'Schaffer',
494
                       'Schafer,', 'Shaffer', 'Sheaffer'),
495
                      ('S*FR', 'Stauffer', 'Stouffer'),
496
                      ('S*GL', 'Siegal', 'Sigal'),
497
                      ('S*GLR', 'Sigler', 'Ziegler'),
498
                      ('S*K', 'Schuck', 'Shuck'),
499
                      ('S*KS', 'Sachs', 'Sacks', 'Saks', 'Sax', 'Saxe'),
500
                      ('S*L', 'Seeley', 'Seely', 'Seley'),
501
                      ('S*L', 'Schell', 'Shell'),
502
                      ('S*LR', 'Schuler', 'Schuller'),
503
                      # Corrected: LD -> L precedes T -> D
504
                      ('S*LDS', 'Schultz', 'Schultze', '!Schulz', '!Schulze',
505
                       'Shults', 'Shultz'),
506
                      ('S*LV', 'Silva', 'Sylva'),
507
                      ('S*LVR', 'Silveira', 'Silvera', 'Silveria'),
508
                      ('S*MKR', 'Schomaker', 'Schumacher', 'Schumaker',
509
                       'Shoemaker,', 'Shumaker'),
510
                      ('S*MN', 'Simon', 'Symon'),
511
                      ('S*MN', 'Seaman', 'Seemann', 'Semon'),
512
                      ('S*MRS', 'Somers', 'Sommars', 'Sommers', 'Summers'),
513
                      ('S*MS', 'Simms', 'Sims'),
514
                      ('S*N', 'Stein', 'Stine'),
515
                      ('S*N', 'Sweeney', 'Sweeny', 'Sweney'),
516
                      ('S*NR', 'Senter', 'Center'),
517
                      ('S*NRS', 'Sanders', 'Saunders'),
518
                      ('S*PR', 'Shepard', '!Shephard', '!Shepheard',
519
                       '!Shepherd', 'Sheppard'),
520
                      ('S*R', 'Stahr', 'Star', 'Starr'),
521
                      ('S*R', 'Stewart', 'Stuart'),
522
                      ('S*R', 'Storey', 'Story'),
523
                      ('S*R', 'Saier', 'Sayre'),
524
                      # Corrected: No reason to strip final S
525
                      ('S*RS', 'Schwartz', 'Schwarz', 'Schwarze', 'Swartz'),
526
                      ('S*RL', 'Schirle', 'Shirley'),
527
                      ('S*RLNG', 'Sterling', 'Stirling'),
528
                      ('S*RMN', 'Scheuermann', 'Schurman', 'Sherman'),
529
                      ('S*RN', 'Stearn', 'Stern'),
530
                      ('S*RR', 'Scherer', 'Shearer', 'Sharer', 'Sherer',
531
                       'Sheerer'),
532
                      ('S*S', 'Sousa', 'Souza'),
533
                      ('SM*D', 'Smith', 'Smyth', 'Smythe'),
534
                      ('SM*D', 'Schmid', 'Schmidt', 'Schmit', 'Schmitt',
535
                       'Smit'),
536
                      ('SN*DR', 'Schneider', 'Schnieder', 'Snaider', 'Snider',
537
                       'Snyder'),
538
                      ('SN*L', 'Schnell', 'Snell'),
539
                      ('SP*LNG', 'Spalding', 'Spaulding'),
540
                      ('SP*R', 'Spear', 'Speer', '!Speirer'),
541
                      # Corrected: No reason to strip final S
542
                      ('SP*RS', 'Spears', 'Speers'),
543
                      ('SR*DR', 'Schroder', 'Schroeder', 'Schroeter'),
544
                      ('SR*DR', 'Schrader', 'Shrader'),
545
                      # Corrected: Everywhere else, rule 3 applies to char 1
546
                      ('D*D', 'Tait', 'Tate'),
547
                      ('D*MSN', 'Thomason', '!Thompson', 'Thomsen', 'Thomson',
548
                       'Tomson'),
549
                      ('D*RL', 'Terrel', 'Terrell', 'Terrill'),
550
                      ('DR*S', 'Tracey', 'Tracy'),
551
                      ('V*L', 'Vail', 'Vaile', 'Vale'),
552
                      ('V*L', 'Valley', 'Valle'),
553
                      ('V*R', 'Vieira', 'Vierra'),
554
                      ('W*D', 'White', 'Wight'),
555
                      ('W*DKR', 'Whitacre', 'Whitaker', 'Whiteaker',
556
                       'Whittaker'),
557
                      ('W*DL', 'Whiteley', 'Whitley'),
558
                      ('W*DMN', 'Whitman', 'Wittman'),
559
                      ('W*DR', 'Woodard', 'Woodward'),
560
                      ('W*DRS', 'Waters', 'Watters'),
561
                      ('W*GNR', 'Wagener', 'Waggener', 'Wagoner', 'Wagner',
562
                       'Wegner,', 'Waggoner'),
563
                      ('W*L', 'Willey', 'Willi'),
564
                      ('W*L', 'Wiley', 'Wylie'),
565
                      ('W*L', 'Wahl', 'Wall'),
566
                      ('W*LBR', 'Wilber', 'Wilbur'),
567
                      ('W*LF', 'Wolf', 'Wolfe', 'Wolff', 'Woolf', 'Woulfe',
568
                       'Wulf', 'Wulff'),
569
                      ('W*LKNS', 'Wilkens', 'Wilkins'),
570
                      ('W*LKS', 'Wilkes', 'Wilks'),
571
                      ('W*LN', 'Whalen', 'Whelan'),
572
                      # Corrected: LD -> L precedes T -> D
573
                      ('W*LDR', 'Walter', 'Walther', 'Wolter'),
574
                      ('W*LDRS', 'Walters', 'Walthers', 'Wolters'),
575
                      ('W*LS', 'Wallace', 'Wallis'),
576
                      ('W*LS', 'Welch', 'Welsh'),
577
                      ('W*LS', 'Welles', 'Wells'),
578
                      ('W*LSN', 'Willson', 'Wilson'),
579
                      ('W*N', 'Winn', 'Wynn', 'Wynne'),
580
                      ('W*R', 'Worth', 'Wirth'),
581
                      ('W*R', 'Ware', 'Wear', 'Weir', 'Wier'),
582
                      ('W*RL', 'Wehrle', 'Wehrlie', 'Werle', 'Worley'),
583
                      ('W*RNR', 'Warner', 'Werner'),
584
                      ('W*S', 'Weis', 'Weiss', 'Wiese', 'Wise', 'Wyss'),
585
                      ('W*SMN', 'Weismann', 'Weissman', 'Weseman', 'Wiseman,',
586
                       'Wismonn', 'Wissman'))
587
588
        for tests in test_cases:
589
            result, names = tests[0], tests[1:]
590
            for name in names:
591
                if name[0] == '!':
592
                    self.assertNotEqual(result, dolby(name[1:]))
593
                else:
594
                    self.assertEqual(result, dolby(name))
595
596
        # Additional tests to improve coverage
597
        self.assertEqual(dolby('Rune'), 'R*N')
598
        self.assertEqual(dolby('Rune', keep_vowels=True), 'R*N*')
599
        self.assertEqual(dolby('Rune', vowel_char=''), 'RN')
600
        self.assertEqual(dolby('Rune', vowel_char='A'), 'RAN')
601
        self.assertEqual(dolby('Rune', max_length=2), 'R*')
602
        self.assertEqual(dolby('Rune', max_length=2), 'R*')
603
        self.assertEqual(dolby('Wassermann', max_length=4), 'W*SR')
604
        self.assertEqual(dolby('Wassermanns', max_length=4, keep_vowels=True),
605
                         'W*S*')
606
        self.assertEqual(dolby('Wassermanns'), 'W*SRMNS')
607
608
609
if __name__ == '__main__':
610
    unittest.main()
611