1
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
|
2
|
|
|
|
3
|
|
|
# Copyright 2014-2018 by Christopher C. Little. |
4
|
|
|
# This file is part of Abydos. |
5
|
|
|
# |
6
|
|
|
# Abydos is free software: you can redistribute it and/or modify |
7
|
|
|
# it under the terms of the GNU General Public License as published by |
8
|
|
|
# the Free Software Foundation, either version 3 of the License, or |
9
|
|
|
# (at your option) any later version. |
10
|
|
|
# |
11
|
|
|
# Abydos is distributed in the hope that it will be useful, |
12
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
13
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14
|
|
|
# GNU General Public License for more details. |
15
|
|
|
# |
16
|
|
|
# You should have received a copy of the GNU General Public License |
17
|
|
|
# along with Abydos. If not, see <http://www.gnu.org/licenses/>. |
18
|
|
|
|
19
|
1 |
|
"""abydos.phonetic._phonet. |
20
|
|
|
|
21
|
|
|
The phonetic._phonet module implements phonet algorithm (a.k.a. Hannoveraner |
22
|
|
|
Phonetik), intended chiefly for German. |
23
|
|
|
""" |
24
|
|
|
|
25
|
1 |
|
from __future__ import unicode_literals |
26
|
|
|
|
27
|
1 |
|
from collections import Counter |
28
|
1 |
|
from unicodedata import normalize as unicode_normalize |
29
|
|
|
|
30
|
1 |
|
from six import text_type |
31
|
1 |
|
from six.moves import range |
32
|
|
|
|
33
|
1 |
|
__all__ = ['phonet'] |
34
|
|
|
|
35
|
|
|
|
36
|
1 |
|
def phonet(word, mode=1, lang='de'): |
37
|
|
|
"""Return the phonet code for a word. |
38
|
|
|
|
39
|
|
|
phonet ("Hannoveraner Phonetik") was developed by Jörg Michael and |
40
|
|
|
documented in :cite:`Michael:1999`. |
41
|
|
|
|
42
|
|
|
This is a port of Jesper Zedlitz's code, which is licensed LGPL |
43
|
|
|
:cite:`Zedlitz:2015`. |
44
|
|
|
|
45
|
|
|
That is, in turn, based on Michael's C code, which is also licensed LGPL |
46
|
|
|
:cite:`Michael:2007`. |
47
|
|
|
|
48
|
|
|
:param str word: the word to transform |
49
|
|
|
:param int mode: the ponet variant to employ (1 or 2) |
50
|
|
|
:param str lang: 'de' (default) for German |
51
|
|
|
'none' for no language |
52
|
|
|
:returns: the phonet value |
53
|
|
|
:rtype: str |
54
|
|
|
|
55
|
|
|
>>> phonet('Christopher') |
56
|
|
|
'KRISTOFA' |
57
|
|
|
>>> phonet('Niall') |
58
|
|
|
'NIAL' |
59
|
|
|
>>> phonet('Smith') |
60
|
|
|
'SMIT' |
61
|
|
|
>>> phonet('Schmidt') |
62
|
|
|
'SHMIT' |
63
|
|
|
|
64
|
|
|
>>> phonet('Christopher', mode=2) |
65
|
|
|
'KRIZTUFA' |
66
|
|
|
>>> phonet('Niall', mode=2) |
67
|
|
|
'NIAL' |
68
|
|
|
>>> phonet('Smith', mode=2) |
69
|
|
|
'ZNIT' |
70
|
|
|
>>> phonet('Schmidt', mode=2) |
71
|
|
|
'ZNIT' |
72
|
|
|
|
73
|
|
|
>>> phonet('Christopher', lang='none') |
74
|
|
|
'CHRISTOPHER' |
75
|
|
|
>>> phonet('Niall', lang='none') |
76
|
|
|
'NIAL' |
77
|
|
|
>>> phonet('Smith', lang='none') |
78
|
|
|
'SMITH' |
79
|
|
|
>>> phonet('Schmidt', lang='none') |
80
|
|
|
'SCHMIDT' |
81
|
|
|
""" |
82
|
1 |
|
_phonet_rules_no_lang = ( # separator chars |
83
|
|
|
# fmt: off |
84
|
|
|
'´', ' ', ' ', |
85
|
|
|
'"', ' ', ' ', |
86
|
|
|
'`$', '', '', |
87
|
|
|
'\'', ' ', ' ', |
88
|
|
|
',', ',', ',', |
89
|
|
|
';', ',', ',', |
90
|
|
|
'-', ' ', ' ', |
91
|
|
|
' ', ' ', ' ', |
92
|
|
|
'.', '.', '.', |
93
|
|
|
':', '.', '.', |
94
|
|
|
# German umlauts |
95
|
|
|
'Ä', 'AE', 'AE', |
96
|
|
|
'Ö', 'OE', 'OE', |
97
|
|
|
'Ü', 'UE', 'UE', |
98
|
|
|
'ß', 'S', 'S', |
99
|
|
|
# international umlauts |
100
|
|
|
'À', 'A', 'A', |
101
|
|
|
'Á', 'A', 'A', |
102
|
|
|
'Â', 'A', 'A', |
103
|
|
|
'Ã', 'A', 'A', |
104
|
|
|
'Å', 'A', 'A', |
105
|
|
|
'Æ', 'AE', 'AE', |
106
|
|
|
'Ç', 'C', 'C', |
107
|
|
|
'Ð', 'DJ', 'DJ', |
108
|
|
|
'È', 'E', 'E', |
109
|
|
|
'É', 'E', 'E', |
110
|
|
|
'Ê', 'E', 'E', |
111
|
|
|
'Ë', 'E', 'E', |
112
|
|
|
'Ì', 'I', 'I', |
113
|
|
|
'Í', 'I', 'I', |
114
|
|
|
'Î', 'I', 'I', |
115
|
|
|
'Ï', 'I', 'I', |
116
|
|
|
'Ñ', 'NH', 'NH', |
117
|
|
|
'Ò', 'O', 'O', |
118
|
|
|
'Ó', 'O', 'O', |
119
|
|
|
'Ô', 'O', 'O', |
120
|
|
|
'Õ', 'O', 'O', |
121
|
|
|
'Œ', 'OE', 'OE', |
122
|
|
|
'Ø', 'OE', 'OE', |
123
|
|
|
'Š', 'SH', 'SH', |
124
|
|
|
'Þ', 'TH', 'TH', |
125
|
|
|
'Ù', 'U', 'U', |
126
|
|
|
'Ú', 'U', 'U', |
127
|
|
|
'Û', 'U', 'U', |
128
|
|
|
'Ý', 'Y', 'Y', |
129
|
|
|
'Ÿ', 'Y', 'Y', |
130
|
|
|
# 'normal' letters (A-Z) |
131
|
|
|
'MC^', 'MAC', 'MAC', |
132
|
|
|
'MC^', 'MAC', 'MAC', |
133
|
|
|
'M´^', 'MAC', 'MAC', |
134
|
|
|
'M\'^', 'MAC', 'MAC', |
135
|
|
|
'O´^', 'O', 'O', |
136
|
|
|
'O\'^', 'O', 'O', |
137
|
|
|
'VAN DEN ^', 'VANDEN', 'VANDEN', |
138
|
|
|
None, None, None |
139
|
|
|
# fmt: on |
140
|
|
|
) |
141
|
|
|
|
142
|
1 |
|
_phonet_rules_german = ( # separator chars |
143
|
|
|
# fmt: off |
144
|
|
|
'´', ' ', ' ', |
145
|
|
|
'"', ' ', ' ', |
146
|
|
|
'`$', '', '', |
147
|
|
|
'\'', ' ', ' ', |
148
|
|
|
',', ' ', ' ', |
149
|
|
|
';', ' ', ' ', |
150
|
|
|
'-', ' ', ' ', |
151
|
|
|
' ', ' ', ' ', |
152
|
|
|
'.', '.', '.', |
153
|
|
|
':', '.', '.', |
154
|
|
|
# German umlauts |
155
|
|
|
'ÄE', 'E', 'E', |
156
|
|
|
'ÄU<', 'EU', 'EU', |
157
|
|
|
'ÄV(AEOU)-<', 'EW', None, |
158
|
|
|
'Ä$', 'Ä', None, |
159
|
|
|
'Ä<', None, 'E', |
160
|
|
|
'Ä', 'E', None, |
161
|
|
|
'ÖE', 'Ö', 'Ö', |
162
|
|
|
'ÖU', 'Ö', 'Ö', |
163
|
|
|
'ÖVER--<', 'ÖW', None, |
164
|
|
|
'ÖV(AOU)-', 'ÖW', None, |
165
|
|
|
'ÜBEL(GNRW)-^^', 'ÜBL ', 'IBL ', |
166
|
|
|
'ÜBER^^', 'ÜBA', 'IBA', |
167
|
|
|
'ÜE', 'Ü', 'I', |
168
|
|
|
'ÜVER--<', 'ÜW', None, |
169
|
|
|
'ÜV(AOU)-', 'ÜW', None, |
170
|
|
|
'Ü', None, 'I', |
171
|
|
|
'ßCH<', None, 'Z', |
172
|
|
|
'ß<', 'S', 'Z', |
173
|
|
|
# international umlauts |
174
|
|
|
'À<', 'A', 'A', |
175
|
|
|
'Á<', 'A', 'A', |
176
|
|
|
'Â<', 'A', 'A', |
177
|
|
|
'Ã<', 'A', 'A', |
178
|
|
|
'Å<', 'A', 'A', |
179
|
|
|
'ÆER-', 'E', 'E', |
180
|
|
|
'ÆU<', 'EU', 'EU', |
181
|
|
|
'ÆV(AEOU)-<', 'EW', None, |
182
|
|
|
'Æ$', 'Ä', None, |
183
|
|
|
'Æ<', None, 'E', |
184
|
|
|
'Æ', 'E', None, |
185
|
|
|
'Ç', 'Z', 'Z', |
186
|
|
|
'ÐÐ-', '', '', |
187
|
|
|
'Ð', 'DI', 'TI', |
188
|
|
|
'È<', 'E', 'E', |
189
|
|
|
'É<', 'E', 'E', |
190
|
|
|
'Ê<', 'E', 'E', |
191
|
|
|
'Ë', 'E', 'E', |
192
|
|
|
'Ì<', 'I', 'I', |
193
|
|
|
'Í<', 'I', 'I', |
194
|
|
|
'Î<', 'I', 'I', |
195
|
|
|
'Ï', 'I', 'I', |
196
|
|
|
'ÑÑ-', '', '', |
197
|
|
|
'Ñ', 'NI', 'NI', |
198
|
|
|
'Ò<', 'O', 'U', |
199
|
|
|
'Ó<', 'O', 'U', |
200
|
|
|
'Ô<', 'O', 'U', |
201
|
|
|
'Õ<', 'O', 'U', |
202
|
|
|
'Œ<', 'Ö', 'Ö', |
203
|
|
|
'Ø(IJY)-<', 'E', 'E', |
204
|
|
|
'Ø<', 'Ö', 'Ö', |
205
|
|
|
'Š', 'SH', 'Z', |
206
|
|
|
'Þ', 'T', 'T', |
207
|
|
|
'Ù<', 'U', 'U', |
208
|
|
|
'Ú<', 'U', 'U', |
209
|
|
|
'Û<', 'U', 'U', |
210
|
|
|
'Ý<', 'I', 'I', |
211
|
|
|
'Ÿ<', 'I', 'I', |
212
|
|
|
# 'normal' letters (A-Z) |
213
|
|
|
'ABELLE$', 'ABL', 'ABL', |
214
|
|
|
'ABELL$', 'ABL', 'ABL', |
215
|
|
|
'ABIENNE$', 'ABIN', 'ABIN', |
216
|
|
|
'ACHME---^', 'ACH', 'AK', |
217
|
|
|
'ACEY$', 'AZI', 'AZI', |
218
|
|
|
'ADV', 'ATW', None, |
219
|
|
|
'AEGL-', 'EK', None, |
220
|
|
|
'AEU<', 'EU', 'EU', |
221
|
|
|
'AE2', 'E', 'E', |
222
|
|
|
'AFTRAUBEN------', 'AFT ', 'AFT ', |
223
|
|
|
'AGL-1', 'AK', None, |
224
|
|
|
'AGNI-^', 'AKN', 'AKN', |
225
|
|
|
'AGNIE-', 'ANI', 'ANI', |
226
|
|
|
'AGN(AEOU)-$', 'ANI', 'ANI', |
227
|
|
|
'AH(AIOÖUÜY)-', 'AH', None, |
228
|
|
|
'AIA2', 'AIA', 'AIA', |
229
|
|
|
'AIE$', 'E', 'E', |
230
|
|
|
'AILL(EOU)-', 'ALI', 'ALI', |
231
|
|
|
'AINE$', 'EN', 'EN', |
232
|
|
|
'AIRE$', 'ER', 'ER', |
233
|
|
|
'AIR-', 'E', 'E', |
234
|
|
|
'AISE$', 'ES', 'EZ', |
235
|
|
|
'AISSANCE$', 'ESANS', 'EZANZ', |
236
|
|
|
'AISSE$', 'ES', 'EZ', |
237
|
|
|
'AIX$', 'EX', 'EX', |
238
|
|
|
'AJ(AÄEÈÉÊIOÖUÜ)--', 'A', 'A', |
239
|
|
|
'AKTIE', 'AXIE', 'AXIE', |
240
|
|
|
'AKTUEL', 'AKTUEL', None, |
241
|
|
|
'ALOI^', 'ALOI', 'ALUI', # Don't merge these rules |
242
|
|
|
'ALOY^', 'ALOI', 'ALUI', # needed by 'check_rules' |
243
|
|
|
'AMATEU(RS)-', 'AMATÖ', 'ANATÖ', |
244
|
|
|
'ANCH(OEI)-', 'ANSH', 'ANZ', |
245
|
|
|
'ANDERGEGANG----', 'ANDA GE', 'ANTA KE', |
246
|
|
|
'ANDERGEHE----', 'ANDA ', 'ANTA ', |
247
|
|
|
'ANDERGESETZ----', 'ANDA GE', 'ANTA KE', |
248
|
|
|
'ANDERGING----', 'ANDA ', 'ANTA ', |
249
|
|
|
'ANDERSETZ(ET)-----', 'ANDA ', 'ANTA ', |
250
|
|
|
'ANDERZUGEHE----', 'ANDA ZU ', 'ANTA ZU ', |
251
|
|
|
'ANDERZUSETZE-----', 'ANDA ZU ', 'ANTA ZU ', |
252
|
|
|
'ANER(BKO)---^^', 'AN', None, |
253
|
|
|
'ANHAND---^$', 'AN H', 'AN ', |
254
|
|
|
'ANH(AÄEIOÖUÜY)--^^', 'AN', None, |
255
|
|
|
'ANIELLE$', 'ANIEL', 'ANIL', |
256
|
|
|
'ANIEL', 'ANIEL', None, |
257
|
|
|
'ANSTELLE----^$', 'AN ST', 'AN ZT', |
258
|
|
|
'ANTI^^', 'ANTI', 'ANTI', |
259
|
|
|
'ANVER^^', 'ANFA', 'ANFA', |
260
|
|
|
'ATIA$', 'ATIA', 'ATIA', |
261
|
|
|
'ATIA(NS)--', 'ATI', 'ATI', |
262
|
|
|
'ATI(AÄOÖUÜ)-', 'AZI', 'AZI', |
263
|
|
|
'AUAU--', '', '', |
264
|
|
|
'AUERE$', 'AUERE', None, |
265
|
|
|
'AUERE(NS)-$', 'AUERE', None, |
266
|
|
|
'AUERE(AIOUY)--', 'AUER', None, |
267
|
|
|
'AUER(AÄIOÖUÜY)-', 'AUER', None, |
268
|
|
|
'AUER<', 'AUA', 'AUA', |
269
|
|
|
'AUF^^', 'AUF', 'AUF', |
270
|
|
|
'AULT$', 'O', 'U', |
271
|
|
|
'AUR(BCDFGKLMNQSTVWZ)-', 'AUA', 'AUA', |
272
|
|
|
'AUR$', 'AUA', 'AUA', |
273
|
|
|
'AUSSE$', 'OS', 'UZ', |
274
|
|
|
'AUS(ST)-^', 'AUS', 'AUS', |
275
|
|
|
'AUS^^', 'AUS', 'AUS', |
276
|
|
|
'AUTOFAHR----', 'AUTO ', 'AUTU ', |
277
|
|
|
'AUTO^^', 'AUTO', 'AUTU', |
278
|
|
|
'AUX(IY)-', 'AUX', 'AUX', |
279
|
|
|
'AUX', 'O', 'U', |
280
|
|
|
'AU', 'AU', 'AU', |
281
|
|
|
'AVER--<', 'AW', None, |
282
|
|
|
'AVIER$', 'AWIE', 'AFIE', |
283
|
|
|
'AV(EÈÉÊI)-^', 'AW', None, |
284
|
|
|
'AV(AOU)-', 'AW', None, |
285
|
|
|
'AYRE$', 'EIRE', 'EIRE', |
286
|
|
|
'AYRE(NS)-$', 'EIRE', 'EIRE', |
287
|
|
|
'AYRE(AIOUY)--', 'EIR', 'EIR', |
288
|
|
|
'AYR(AÄIOÖUÜY)-', 'EIR', 'EIR', |
289
|
|
|
'AYR<', 'EIA', 'EIA', |
290
|
|
|
'AYER--<', 'EI', 'EI', |
291
|
|
|
'AY(AÄEIOÖUÜY)--', 'A', 'A', |
292
|
|
|
'AË', 'E', 'E', |
293
|
|
|
'A(IJY)<', 'EI', 'EI', |
294
|
|
|
'BABY^$', 'BEBI', 'BEBI', |
295
|
|
|
'BAB(IY)^', 'BEBI', 'BEBI', |
296
|
|
|
'BEAU^$', 'BO', None, |
297
|
|
|
'BEA(BCMNRU)-^', 'BEA', 'BEA', |
298
|
|
|
'BEAT(AEIMORU)-^', 'BEAT', 'BEAT', |
299
|
|
|
'BEE$', 'BI', 'BI', |
300
|
|
|
'BEIGE^$', 'BESH', 'BEZ', |
301
|
|
|
'BENOIT--', 'BENO', 'BENU', |
302
|
|
|
'BER(DT)-', 'BER', None, |
303
|
|
|
'BERN(DT)-', 'BERN', None, |
304
|
|
|
'BE(LMNRST)-^', 'BE', 'BE', |
305
|
|
|
'BETTE$', 'BET', 'BET', |
306
|
|
|
'BEVOR^$', 'BEFOR', None, |
307
|
|
|
'BIC$', 'BIZ', 'BIZ', |
308
|
|
|
'BOWL(EI)-', 'BOL', 'BUL', |
309
|
|
|
'BP(AÄEÈÉÊIÌÍÎOÖRUÜY)-', 'B', 'B', |
310
|
|
|
'BRINGEND-----^', 'BRI', 'BRI', |
311
|
|
|
'BRINGEND-----', ' BRI', ' BRI', |
312
|
|
|
'BROW(NS)-', 'BRAU', 'BRAU', |
313
|
|
|
'BUDGET7', 'BÜGE', 'BIKE', |
314
|
|
|
'BUFFET7', 'BÜFE', 'BIFE', |
315
|
|
|
'BYLLE$', 'BILE', 'BILE', |
316
|
|
|
'BYLL$', 'BIL', 'BIL', |
317
|
|
|
'BYPA--^', 'BEI', 'BEI', |
318
|
|
|
'BYTE<', 'BEIT', 'BEIT', |
319
|
|
|
'BY9^', 'BÜ', None, |
320
|
|
|
'B(SßZ)$', 'BS', None, |
321
|
|
|
'CACH(EI)-^', 'KESH', 'KEZ', |
322
|
|
|
'CAE--', 'Z', 'Z', |
323
|
|
|
'CA(IY)$', 'ZEI', 'ZEI', |
324
|
|
|
'CE(EIJUY)--', 'Z', 'Z', |
325
|
|
|
'CENT<', 'ZENT', 'ZENT', |
326
|
|
|
'CERST(EI)----^', 'KE', 'KE', |
327
|
|
|
'CER$', 'ZA', 'ZA', |
328
|
|
|
'CE3', 'ZE', 'ZE', |
329
|
|
|
'CH\'S$', 'X', 'X', |
330
|
|
|
'CH´S$', 'X', 'X', |
331
|
|
|
'CHAO(ST)-', 'KAO', 'KAU', |
332
|
|
|
'CHAMPIO-^', 'SHEMPI', 'ZENBI', |
333
|
|
|
'CHAR(AI)-^', 'KAR', 'KAR', |
334
|
|
|
'CHAU(CDFSVWXZ)-', 'SHO', 'ZU', |
335
|
|
|
'CHÄ(CF)-', 'SHE', 'ZE', |
336
|
|
|
'CHE(CF)-', 'SHE', 'ZE', |
337
|
|
|
'CHEM-^', 'KE', 'KE', # or: 'CHE', 'KE' |
338
|
|
|
'CHEQUE<', 'SHEK', 'ZEK', |
339
|
|
|
'CHI(CFGPVW)-', 'SHI', 'ZI', |
340
|
|
|
'CH(AEUY)-<^', 'SH', 'Z', |
341
|
|
|
'CHK-', '', '', |
342
|
|
|
'CHO(CKPS)-^', 'SHO', 'ZU', |
343
|
|
|
'CHRIS-', 'KRI', None, |
344
|
|
|
'CHRO-', 'KR', None, |
345
|
|
|
'CH(LOR)-<^', 'K', 'K', |
346
|
|
|
'CHST-', 'X', 'X', |
347
|
|
|
'CH(SßXZ)3', 'X', 'X', |
348
|
|
|
'CHTNI-3', 'CHN', 'KN', |
349
|
|
|
'CH^', 'K', 'K', # or: 'CH', 'K' |
350
|
|
|
'CH', 'CH', 'K', |
351
|
|
|
'CIC$', 'ZIZ', 'ZIZ', |
352
|
|
|
'CIENCEFICT----', 'EIENS ', 'EIENZ ', |
353
|
|
|
'CIENCE$', 'EIENS', 'EIENZ', |
354
|
|
|
'CIER$', 'ZIE', 'ZIE', |
355
|
|
|
'CYB-^', 'ZEI', 'ZEI', |
356
|
|
|
'CY9^', 'ZÜ', 'ZI', |
357
|
|
|
'C(IJY)-<3', 'Z', 'Z', |
358
|
|
|
'CLOWN-', 'KLAU', 'KLAU', |
359
|
|
|
'CCH', 'Z', 'Z', |
360
|
|
|
'CCE-', 'X', 'X', |
361
|
|
|
'C(CK)-', '', '', |
362
|
|
|
'CLAUDET---', 'KLO', 'KLU', |
363
|
|
|
'CLAUDINE^$', 'KLODIN', 'KLUTIN', |
364
|
|
|
'COACH', 'KOSH', 'KUZ', |
365
|
|
|
'COLE$', 'KOL', 'KUL', |
366
|
|
|
'COUCH', 'KAUSH', 'KAUZ', |
367
|
|
|
'COW', 'KAU', 'KAU', |
368
|
|
|
'CQUES$', 'K', 'K', |
369
|
|
|
'CQUE', 'K', 'K', |
370
|
|
|
'CRASH--9', 'KRE', 'KRE', |
371
|
|
|
'CREAT-^', 'KREA', 'KREA', |
372
|
|
|
'CST', 'XT', 'XT', |
373
|
|
|
'CS<^', 'Z', 'Z', |
374
|
|
|
'C(SßX)', 'X', 'X', |
375
|
|
|
'CT\'S$', 'X', 'X', |
376
|
|
|
'CT(SßXZ)', 'X', 'X', |
377
|
|
|
'CZ<', 'Z', 'Z', |
378
|
|
|
'C(ÈÉÊÌÍÎÝ)3', 'Z', 'Z', |
379
|
|
|
'C.^', 'C.', 'C.', |
380
|
|
|
'CÄ-', 'Z', 'Z', |
381
|
|
|
'CÜ$', 'ZÜ', 'ZI', |
382
|
|
|
'C\'S$', 'X', 'X', |
383
|
|
|
'C<', 'K', 'K', |
384
|
|
|
'DAHER^$', 'DAHER', None, |
385
|
|
|
'DARAUFFOLGE-----', 'DARAUF ', 'TARAUF ', |
386
|
|
|
'DAVO(NR)-^$', 'DAFO', 'TAFU', |
387
|
|
|
'DD(SZ)--<', '', '', |
388
|
|
|
'DD9', 'D', None, |
389
|
|
|
'DEPOT7', 'DEPO', 'TEBU', |
390
|
|
|
'DESIGN', 'DISEIN', 'TIZEIN', |
391
|
|
|
'DE(LMNRST)-3^', 'DE', 'TE', |
392
|
|
|
'DETTE$', 'DET', 'TET', |
393
|
|
|
'DH$', 'T', None, |
394
|
|
|
'DIC$', 'DIZ', 'TIZ', |
395
|
|
|
'DIDR-^', 'DIT', None, |
396
|
|
|
'DIEDR-^', 'DIT', None, |
397
|
|
|
'DJ(AEIOU)-^', 'I', 'I', |
398
|
|
|
'DMITR-^', 'DIMIT', 'TINIT', |
399
|
|
|
'DRY9^', 'DRÜ', None, |
400
|
|
|
'DT-', '', '', |
401
|
|
|
'DUIS-^', 'DÜ', 'TI', |
402
|
|
|
'DURCH^^', 'DURCH', 'TURK', |
403
|
|
|
'DVA$', 'TWA', None, |
404
|
|
|
'DY9^', 'DÜ', None, |
405
|
|
|
'DYS$', 'DIS', None, |
406
|
|
|
'DS(CH)--<', 'T', 'T', |
407
|
|
|
'DST', 'ZT', 'ZT', |
408
|
|
|
'DZS(CH)--', 'T', 'T', |
409
|
|
|
'D(SßZ)', 'Z', 'Z', |
410
|
|
|
'D(AÄEIOÖRUÜY)-', 'D', None, |
411
|
|
|
'D(ÀÁÂÃÅÈÉÊÌÍÎÙÚÛ)-', 'D', None, |
412
|
|
|
'D\'H^', 'D', 'T', |
413
|
|
|
'D´H^', 'D', 'T', |
414
|
|
|
'D`H^', 'D', 'T', |
415
|
|
|
'D\'S3$', 'Z', 'Z', |
416
|
|
|
'D´S3$', 'Z', 'Z', |
417
|
|
|
'D^', 'D', None, |
418
|
|
|
'D', 'T', 'T', |
419
|
|
|
'EAULT$', 'O', 'U', |
420
|
|
|
'EAUX$', 'O', 'U', |
421
|
|
|
'EAU', 'O', 'U', |
422
|
|
|
'EAV', 'IW', 'IF', |
423
|
|
|
'EAS3$', 'EAS', None, |
424
|
|
|
'EA(AÄEIOÖÜY)-3', 'EA', 'EA', |
425
|
|
|
'EA3$', 'EA', 'EA', |
426
|
|
|
'EA3', 'I', 'I', |
427
|
|
|
'EBENSO^$', 'EBNSO', 'EBNZU', |
428
|
|
|
'EBENSO^^', 'EBNSO ', 'EBNZU ', |
429
|
|
|
'EBEN^^', 'EBN', 'EBN', |
430
|
|
|
'EE9', 'E', 'E', |
431
|
|
|
'EGL-1', 'EK', None, |
432
|
|
|
'EHE(IUY)--1', 'EH', None, |
433
|
|
|
'EHUNG---1', 'E', None, |
434
|
|
|
'EH(AÄIOÖUÜY)-1', 'EH', None, |
435
|
|
|
'EIEI--', '', '', |
436
|
|
|
'EIERE^$', 'EIERE', None, |
437
|
|
|
'EIERE$', 'EIERE', None, |
438
|
|
|
'EIERE(NS)-$', 'EIERE', None, |
439
|
|
|
'EIERE(AIOUY)--', 'EIER', None, |
440
|
|
|
'EIER(AÄIOÖUÜY)-', 'EIER', None, |
441
|
|
|
'EIER<', 'EIA', None, |
442
|
|
|
'EIGL-1', 'EIK', None, |
443
|
|
|
'EIGH$', 'EI', 'EI', |
444
|
|
|
'EIH--', 'E', 'E', |
445
|
|
|
'EILLE$', 'EI', 'EI', |
446
|
|
|
'EIR(BCDFGKLMNQSTVWZ)-', 'EIA', 'EIA', |
447
|
|
|
'EIR$', 'EIA', 'EIA', |
448
|
|
|
'EITRAUBEN------', 'EIT ', 'EIT ', |
449
|
|
|
'EI', 'EI', 'EI', |
450
|
|
|
'EJ$', 'EI', 'EI', |
451
|
|
|
'ELIZ^', 'ELIS', None, |
452
|
|
|
'ELZ^', 'ELS', None, |
453
|
|
|
'EL-^', 'E', 'E', |
454
|
|
|
'ELANG----1', 'E', 'E', |
455
|
|
|
'EL(DKL)--1', 'E', 'E', |
456
|
|
|
'EL(MNT)--1$', 'E', 'E', |
457
|
|
|
'ELYNE$', 'ELINE', 'ELINE', |
458
|
|
|
'ELYN$', 'ELIN', 'ELIN', |
459
|
|
|
'EL(AÄEÈÉÊIÌÍÎOÖUÜY)-1', 'EL', 'EL', |
460
|
|
|
'EL-1', 'L', 'L', |
461
|
|
|
'EM-^', None, 'E', |
462
|
|
|
'EM(DFKMPQT)--1', None, 'E', |
463
|
|
|
'EM(AÄEÈÉÊIÌÍÎOÖUÜY)--1', None, 'E', |
464
|
|
|
'EM-1', None, 'N', |
465
|
|
|
'ENGAG-^', 'ANGA', 'ANKA', |
466
|
|
|
'EN-^', 'E', 'E', |
467
|
|
|
'ENTUEL', 'ENTUEL', None, |
468
|
|
|
'EN(CDGKQSTZ)--1', 'E', 'E', |
469
|
|
|
'EN(AÄEÈÉÊIÌÍÎNOÖUÜY)-1', 'EN', 'EN', |
470
|
|
|
'EN-1', '', '', |
471
|
|
|
'ERH(AÄEIOÖUÜ)-^', 'ERH', 'ER', |
472
|
|
|
'ER-^', 'E', 'E', |
473
|
|
|
'ERREGEND-----', ' ER', ' ER', |
474
|
|
|
'ERT1$', 'AT', None, |
475
|
|
|
'ER(DGLKMNRQTZß)-1', 'ER', None, |
476
|
|
|
'ER(AÄEÈÉÊIÌÍÎOÖUÜY)-1', 'ER', 'A', |
477
|
|
|
'ER1$', 'A', 'A', |
478
|
|
|
'ER<1', 'A', 'A', |
479
|
|
|
'ETAT7', 'ETA', 'ETA', |
480
|
|
|
'ETI(AÄOÖÜU)-', 'EZI', 'EZI', |
481
|
|
|
'EUERE$', 'EUERE', None, |
482
|
|
|
'EUERE(NS)-$', 'EUERE', None, |
483
|
|
|
'EUERE(AIOUY)--', 'EUER', None, |
484
|
|
|
'EUER(AÄIOÖUÜY)-', 'EUER', None, |
485
|
|
|
'EUER<', 'EUA', None, |
486
|
|
|
'EUEU--', '', '', |
487
|
|
|
'EUILLE$', 'Ö', 'Ö', |
488
|
|
|
'EUR$', 'ÖR', 'ÖR', |
489
|
|
|
'EUX', 'Ö', 'Ö', |
490
|
|
|
'EUSZ$', 'EUS', None, |
491
|
|
|
'EUTZ$', 'EUS', None, |
492
|
|
|
'EUYS$', 'EUS', 'EUZ', |
493
|
|
|
'EUZ$', 'EUS', None, |
494
|
|
|
'EU', 'EU', 'EU', |
495
|
|
|
'EVER--<1', 'EW', None, |
496
|
|
|
'EV(ÄOÖUÜ)-1', 'EW', None, |
497
|
|
|
'EYER<', 'EIA', 'EIA', |
498
|
|
|
'EY<', 'EI', 'EI', |
499
|
|
|
'FACETTE', 'FASET', 'FAZET', |
500
|
|
|
'FANS--^$', 'FE', 'FE', |
501
|
|
|
'FAN-^$', 'FE', 'FE', |
502
|
|
|
'FAULT-', 'FOL', 'FUL', |
503
|
|
|
'FEE(DL)-', 'FI', 'FI', |
504
|
|
|
'FEHLER', 'FELA', 'FELA', |
505
|
|
|
'FE(LMNRST)-3^', 'FE', 'FE', |
506
|
|
|
'FOERDERN---^', 'FÖRD', 'FÖRT', |
507
|
|
|
'FOERDERN---', ' FÖRD', ' FÖRT', |
508
|
|
|
'FOND7', 'FON', 'FUN', |
509
|
|
|
'FRAIN$', 'FRA', 'FRA', |
510
|
|
|
'FRISEU(RS)-', 'FRISÖ', 'FRIZÖ', |
511
|
|
|
'FY9^', 'FÜ', None, |
512
|
|
|
'FÖRDERN---^', 'FÖRD', 'FÖRT', |
513
|
|
|
'FÖRDERN---', ' FÖRD', ' FÖRT', |
514
|
|
|
'GAGS^$', 'GEX', 'KEX', |
515
|
|
|
'GAG^$', 'GEK', 'KEK', |
516
|
|
|
'GD', 'KT', 'KT', |
517
|
|
|
'GEGEN^^', 'GEGN', 'KEKN', |
518
|
|
|
'GEGENGEKOM-----', 'GEGN ', 'KEKN ', |
519
|
|
|
'GEGENGESET-----', 'GEGN ', 'KEKN ', |
520
|
|
|
'GEGENKOMME-----', 'GEGN ', 'KEKN ', |
521
|
|
|
'GEGENZUKOM---', 'GEGN ZU ', 'KEKN ZU ', |
522
|
|
|
'GENDETWAS-----$', 'GENT ', 'KENT ', |
523
|
|
|
'GENRE', 'IORE', 'IURE', |
524
|
|
|
'GE(LMNRST)-3^', 'GE', 'KE', |
525
|
|
|
'GER(DKT)-', 'GER', None, |
526
|
|
|
'GETTE$', 'GET', 'KET', |
527
|
|
|
'GGF.', 'GF.', None, |
528
|
|
|
'GG-', '', '', |
529
|
|
|
'GH', 'G', None, |
530
|
|
|
'GI(AOU)-^', 'I', 'I', |
531
|
|
|
'GION-3', 'KIO', 'KIU', |
532
|
|
|
'G(CK)-', '', '', |
533
|
|
|
'GJ(AEIOU)-^', 'I', 'I', |
534
|
|
|
'GMBH^$', 'GMBH', 'GMBH', |
535
|
|
|
'GNAC$', 'NIAK', 'NIAK', |
536
|
|
|
'GNON$', 'NION', 'NIUN', |
537
|
|
|
'GN$', 'N', 'N', |
538
|
|
|
'GONCAL-^', 'GONZA', 'KUNZA', |
539
|
|
|
'GRY9^', 'GRÜ', None, |
540
|
|
|
'G(SßXZ)-<', 'K', 'K', |
541
|
|
|
'GUCK-', 'KU', 'KU', |
542
|
|
|
'GUISEP-^', 'IUSE', 'IUZE', |
543
|
|
|
'GUI-^', 'G', 'K', |
544
|
|
|
'GUTAUSSEH------^', 'GUT ', 'KUT ', |
545
|
|
|
'GUTGEHEND------^', 'GUT ', 'KUT ', |
546
|
|
|
'GY9^', 'GÜ', None, |
547
|
|
|
'G(AÄEILOÖRUÜY)-', 'G', None, |
548
|
|
|
'G(ÀÁÂÃÅÈÉÊÌÍÎÙÚÛ)-', 'G', None, |
549
|
|
|
'G\'S$', 'X', 'X', |
550
|
|
|
'G´S$', 'X', 'X', |
551
|
|
|
'G^', 'G', None, |
552
|
|
|
'G', 'K', 'K', |
553
|
|
|
'HA(HIUY)--1', 'H', None, |
554
|
|
|
'HANDVOL---^', 'HANT ', 'ANT ', |
555
|
|
|
'HANNOVE-^', 'HANOF', None, |
556
|
|
|
'HAVEN7$', 'HAFN', None, |
557
|
|
|
'HEAD-', 'HE', 'E', |
558
|
|
|
'HELIEGEN------', 'E ', 'E ', |
559
|
|
|
'HESTEHEN------', 'E ', 'E ', |
560
|
|
|
'HE(LMNRST)-3^', 'HE', 'E', |
561
|
|
|
'HE(LMN)-1', 'E', 'E', |
562
|
|
|
'HEUR1$', 'ÖR', 'ÖR', |
563
|
|
|
'HE(HIUY)--1', 'H', None, |
564
|
|
|
'HIH(AÄEIOÖUÜY)-1', 'IH', None, |
565
|
|
|
'HLH(AÄEIOÖUÜY)-1', 'LH', None, |
566
|
|
|
'HMH(AÄEIOÖUÜY)-1', 'MH', None, |
567
|
|
|
'HNH(AÄEIOÖUÜY)-1', 'NH', None, |
568
|
|
|
'HOBBY9^', 'HOBI', None, |
569
|
|
|
'HOCHBEGAB-----^', 'HOCH ', 'UK ', |
570
|
|
|
'HOCHTALEN-----^', 'HOCH ', 'UK ', |
571
|
|
|
'HOCHZUFRI-----^', 'HOCH ', 'UK ', |
572
|
|
|
'HO(HIY)--1', 'H', None, |
573
|
|
|
'HRH(AÄEIOÖUÜY)-1', 'RH', None, |
574
|
|
|
'HUH(AÄEIOÖUÜY)-1', 'UH', None, |
575
|
|
|
'HUIS^^', 'HÜS', 'IZ', |
576
|
|
|
'HUIS$', 'ÜS', 'IZ', |
577
|
|
|
'HUI--1', 'H', None, |
578
|
|
|
'HYGIEN^', 'HÜKIEN', None, |
579
|
|
|
'HY9^', 'HÜ', None, |
580
|
|
|
'HY(BDGMNPST)-', 'Ü', None, |
581
|
|
|
'H.^', None, 'H.', |
582
|
|
|
'HÄU--1', 'H', None, |
583
|
|
|
'H^', 'H', '', |
584
|
|
|
'H', '', '', |
585
|
|
|
'ICHELL---', 'ISH', 'IZ', |
586
|
|
|
'ICHI$', 'ISHI', 'IZI', |
587
|
|
|
'IEC$', 'IZ', 'IZ', |
588
|
|
|
'IEDENSTELLE------', 'IDN ', 'ITN ', |
589
|
|
|
'IEI-3', '', '', |
590
|
|
|
'IELL3', 'IEL', 'IEL', |
591
|
|
|
'IENNE$', 'IN', 'IN', |
592
|
|
|
'IERRE$', 'IER', 'IER', |
593
|
|
|
'IERZULAN---', 'IR ZU ', 'IR ZU ', |
594
|
|
|
'IETTE$', 'IT', 'IT', |
595
|
|
|
'IEU', 'IÖ', 'IÖ', |
596
|
|
|
'IE<4', 'I', 'I', |
597
|
|
|
'IGL-1', 'IK', None, |
598
|
|
|
'IGHT3$', 'EIT', 'EIT', |
599
|
|
|
'IGNI(EO)-', 'INI', 'INI', |
600
|
|
|
'IGN(AEOU)-$', 'INI', 'INI', |
601
|
|
|
'IHER(DGLKRT)--1', 'IHE', None, |
602
|
|
|
'IHE(IUY)--', 'IH', None, |
603
|
|
|
'IH(AIOÖUÜY)-', 'IH', None, |
604
|
|
|
'IJ(AOU)-', 'I', 'I', |
605
|
|
|
'IJ$', 'I', 'I', |
606
|
|
|
'IJ<', 'EI', 'EI', |
607
|
|
|
'IKOLE$', 'IKOL', 'IKUL', |
608
|
|
|
'ILLAN(STZ)--4', 'ILIA', 'ILIA', |
609
|
|
|
'ILLAR(DT)--4', 'ILIA', 'ILIA', |
610
|
|
|
'IMSTAN----^', 'IM ', 'IN ', |
611
|
|
|
'INDELERREGE------', 'INDL ', 'INTL ', |
612
|
|
|
'INFRAGE-----^$', 'IN ', 'IN ', |
613
|
|
|
'INTERN(AOU)-^', 'INTAN', 'INTAN', |
614
|
|
|
'INVER-', 'INWE', 'INFE', |
615
|
|
|
'ITI(AÄIOÖUÜ)-', 'IZI', 'IZI', |
616
|
|
|
'IUSZ$', 'IUS', None, |
617
|
|
|
'IUTZ$', 'IUS', None, |
618
|
|
|
'IUZ$', 'IUS', None, |
619
|
|
|
'IVER--<', 'IW', None, |
620
|
|
|
'IVIER$', 'IWIE', 'IFIE', |
621
|
|
|
'IV(ÄOÖUÜ)-', 'IW', None, |
622
|
|
|
'IV<3', 'IW', None, |
623
|
|
|
'IY2', 'I', None, |
624
|
|
|
'I(ÈÉÊ)<4', 'I', 'I', |
625
|
|
|
'JAVIE---<^', 'ZA', 'ZA', |
626
|
|
|
'JEANS^$', 'JINS', 'INZ', |
627
|
|
|
'JEANNE^$', 'IAN', 'IAN', |
628
|
|
|
'JEAN-^', 'IA', 'IA', |
629
|
|
|
'JER-^', 'IE', 'IE', |
630
|
|
|
'JE(LMNST)-', 'IE', 'IE', |
631
|
|
|
'JI^', 'JI', None, |
632
|
|
|
'JOR(GK)^$', 'IÖRK', 'IÖRK', |
633
|
|
|
'J', 'I', 'I', |
634
|
|
|
'KC(ÄEIJ)-', 'X', 'X', |
635
|
|
|
'KD', 'KT', None, |
636
|
|
|
'KE(LMNRST)-3^', 'KE', 'KE', |
637
|
|
|
'KG(AÄEILOÖRUÜY)-', 'K', None, |
638
|
|
|
'KH<^', 'K', 'K', |
639
|
|
|
'KIC$', 'KIZ', 'KIZ', |
640
|
|
|
'KLE(LMNRST)-3^', 'KLE', 'KLE', |
641
|
|
|
'KOTELE-^', 'KOTL', 'KUTL', |
642
|
|
|
'KREAT-^', 'KREA', 'KREA', |
643
|
|
|
'KRÜS(TZ)--^', 'KRI', None, |
644
|
|
|
'KRYS(TZ)--^', 'KRI', None, |
645
|
|
|
'KRY9^', 'KRÜ', None, |
646
|
|
|
'KSCH---', 'K', 'K', |
647
|
|
|
'KSH--', 'K', 'K', |
648
|
|
|
'K(SßXZ)7', 'X', 'X', # implies 'KST' -> 'XT' |
649
|
|
|
'KT\'S$', 'X', 'X', |
650
|
|
|
'KTI(AIOU)-3', 'XI', 'XI', |
651
|
|
|
'KT(SßXZ)', 'X', 'X', |
652
|
|
|
'KY9^', 'KÜ', None, |
653
|
|
|
'K\'S$', 'X', 'X', |
654
|
|
|
'K´S$', 'X', 'X', |
655
|
|
|
'LANGES$', ' LANGES', ' LANKEZ', |
656
|
|
|
'LANGE$', ' LANGE', ' LANKE', |
657
|
|
|
'LANG$', ' LANK', ' LANK', |
658
|
|
|
'LARVE-', 'LARF', 'LARF', |
659
|
|
|
'LD(SßZ)$', 'LS', 'LZ', |
660
|
|
|
'LD\'S$', 'LS', 'LZ', |
661
|
|
|
'LD´S$', 'LS', 'LZ', |
662
|
|
|
'LEAND-^', 'LEAN', 'LEAN', |
663
|
|
|
'LEERSTEHE-----^', 'LER ', 'LER ', |
664
|
|
|
'LEICHBLEIB-----', 'LEICH ', 'LEIK ', |
665
|
|
|
'LEICHLAUTE-----', 'LEICH ', 'LEIK ', |
666
|
|
|
'LEIDERREGE------', 'LEIT ', 'LEIT ', |
667
|
|
|
'LEIDGEPR----^', 'LEIT ', 'LEIT ', |
668
|
|
|
'LEINSTEHE-----', 'LEIN ', 'LEIN ', |
669
|
|
|
'LEL-', 'LE', 'LE', |
670
|
|
|
'LE(MNRST)-3^', 'LE', 'LE', |
671
|
|
|
'LETTE$', 'LET', 'LET', |
672
|
|
|
'LFGNAG-', 'LFGAN', 'LFKAN', |
673
|
|
|
'LICHERWEIS----', 'LICHA ', 'LIKA ', |
674
|
|
|
'LIC$', 'LIZ', 'LIZ', |
675
|
|
|
'LIVE^$', 'LEIF', 'LEIF', |
676
|
|
|
'LT(SßZ)$', 'LS', 'LZ', |
677
|
|
|
'LT\'S$', 'LS', 'LZ', |
678
|
|
|
'LT´S$', 'LS', 'LZ', |
679
|
|
|
'LUI(GS)--', 'LU', 'LU', |
680
|
|
|
'LV(AIO)-', 'LW', None, |
681
|
|
|
'LY9^', 'LÜ', None, |
682
|
|
|
'LSTS$', 'LS', 'LZ', |
683
|
|
|
'LZ(BDFGKLMNPQRSTVWX)-', 'LS', None, |
684
|
|
|
'L(SßZ)$', 'LS', None, |
685
|
|
|
'MAIR-<', 'MEI', 'NEI', |
686
|
|
|
'MANAG-', 'MENE', 'NENE', |
687
|
|
|
'MANUEL', 'MANUEL', None, |
688
|
|
|
'MASSEU(RS)-', 'MASÖ', 'NAZÖ', |
689
|
|
|
'MATCH', 'MESH', 'NEZ', |
690
|
|
|
'MAURICE', 'MORIS', 'NURIZ', |
691
|
|
|
'MBH^$', 'MBH', 'MBH', |
692
|
|
|
'MB(ßZ)$', 'MS', None, |
693
|
|
|
'MB(SßTZ)-', 'M', 'N', |
694
|
|
|
'MCG9^', 'MAK', 'NAK', |
695
|
|
|
'MC9^', 'MAK', 'NAK', |
696
|
|
|
'MEMOIR-^', 'MEMOA', 'NENUA', |
697
|
|
|
'MERHAVEN$', 'MAHAFN', None, |
698
|
|
|
'ME(LMNRST)-3^', 'ME', 'NE', |
699
|
|
|
'MEN(STZ)--3', 'ME', None, |
700
|
|
|
'MEN$', 'MEN', None, |
701
|
|
|
'MIGUEL-', 'MIGE', 'NIKE', |
702
|
|
|
'MIKE^$', 'MEIK', 'NEIK', |
703
|
|
|
'MITHILFE----^$', 'MIT H', 'NIT ', |
704
|
|
|
'MN$', 'M', None, |
705
|
|
|
'MN', 'N', 'N', |
706
|
|
|
'MPJUTE-', 'MPUT', 'NBUT', |
707
|
|
|
'MP(ßZ)$', 'MS', None, |
708
|
|
|
'MP(SßTZ)-', 'M', 'N', |
709
|
|
|
'MP(BDJLMNPQVW)-', 'MB', 'NB', |
710
|
|
|
'MY9^', 'MÜ', None, |
711
|
|
|
'M(ßZ)$', 'MS', None, |
712
|
|
|
'M´G7^', 'MAK', 'NAK', |
713
|
|
|
'M\'G7^', 'MAK', 'NAK', |
714
|
|
|
'M´^', 'MAK', 'NAK', |
715
|
|
|
'M\'^', 'MAK', 'NAK', |
716
|
|
|
'M', None, 'N', |
717
|
|
|
'NACH^^', 'NACH', 'NAK', |
718
|
|
|
'NADINE', 'NADIN', 'NATIN', |
719
|
|
|
'NAIV--', 'NA', 'NA', |
720
|
|
|
'NAISE$', 'NESE', 'NEZE', |
721
|
|
|
'NAUGENOMM------', 'NAU ', 'NAU ', |
722
|
|
|
'NAUSOGUT$', 'NAUSO GUT', 'NAUZU KUT', |
723
|
|
|
'NCH$', 'NSH', 'NZ', |
724
|
|
|
'NCOISE$', 'SOA', 'ZUA', |
725
|
|
|
'NCOIS$', 'SOA', 'ZUA', |
726
|
|
|
'NDAR$', 'NDA', 'NTA', |
727
|
|
|
'NDERINGEN------', 'NDE ', 'NTE ', |
728
|
|
|
'NDRO(CDKTZ)-', 'NTRO', None, |
729
|
|
|
'ND(BFGJLMNPQVW)-', 'NT', None, |
730
|
|
|
'ND(SßZ)$', 'NS', 'NZ', |
731
|
|
|
'ND\'S$', 'NS', 'NZ', |
732
|
|
|
'ND´S$', 'NS', 'NZ', |
733
|
|
|
'NEBEN^^', 'NEBN', 'NEBN', |
734
|
|
|
'NENGELERN------', 'NEN ', 'NEN ', |
735
|
|
|
'NENLERN(ET)---', 'NEN LE', 'NEN LE', |
736
|
|
|
'NENZULERNE---', 'NEN ZU LE', 'NEN ZU LE', |
737
|
|
|
'NE(LMNRST)-3^', 'NE', 'NE', |
738
|
|
|
'NEN-3', 'NE', 'NE', |
739
|
|
|
'NETTE$', 'NET', 'NET', |
740
|
|
|
'NGU^^', 'NU', 'NU', |
741
|
|
|
'NG(BDFJLMNPQRTVW)-', 'NK', 'NK', |
742
|
|
|
'NH(AUO)-$', 'NI', 'NI', |
743
|
|
|
'NICHTSAHNEN-----', 'NIX ', 'NIX ', |
744
|
|
|
'NICHTSSAGE----', 'NIX ', 'NIX ', |
745
|
|
|
'NICHTS^^', 'NIX', 'NIX', |
746
|
|
|
'NICHT^^', 'NICHT', 'NIKT', |
747
|
|
|
'NINE$', 'NIN', 'NIN', |
748
|
|
|
'NON^^', 'NON', 'NUN', |
749
|
|
|
'NOTLEIDE-----^', 'NOT ', 'NUT ', |
750
|
|
|
'NOT^^', 'NOT', 'NUT', |
751
|
|
|
'NTI(AIOU)-3', 'NZI', 'NZI', |
752
|
|
|
'NTIEL--3', 'NZI', 'NZI', |
753
|
|
|
'NT(SßZ)$', 'NS', 'NZ', |
754
|
|
|
'NT\'S$', 'NS', 'NZ', |
755
|
|
|
'NT´S$', 'NS', 'NZ', |
756
|
|
|
'NYLON', 'NEILON', 'NEILUN', |
757
|
|
|
'NY9^', 'NÜ', None, |
758
|
|
|
'NSTZUNEH---', 'NST ZU ', 'NZT ZU ', |
759
|
|
|
'NSZ-', 'NS', None, |
760
|
|
|
'NSTS$', 'NS', 'NZ', |
761
|
|
|
'NZ(BDFGKLMNPQRSTVWX)-', 'NS', None, |
762
|
|
|
'N(SßZ)$', 'NS', None, |
763
|
|
|
'OBERE-', 'OBER', None, |
764
|
|
|
'OBER^^', 'OBA', 'UBA', |
765
|
|
|
'OEU2', 'Ö', 'Ö', |
766
|
|
|
'OE<2', 'Ö', 'Ö', |
767
|
|
|
'OGL-', 'OK', None, |
768
|
|
|
'OGNIE-', 'ONI', 'UNI', |
769
|
|
|
'OGN(AEOU)-$', 'ONI', 'UNI', |
770
|
|
|
'OH(AIOÖUÜY)-', 'OH', None, |
771
|
|
|
'OIE$', 'Ö', 'Ö', |
772
|
|
|
'OIRE$', 'OA', 'UA', |
773
|
|
|
'OIR$', 'OA', 'UA', |
774
|
|
|
'OIX', 'OA', 'UA', |
775
|
|
|
'OI<3', 'EU', 'EU', |
776
|
|
|
'OKAY^$', 'OKE', 'UKE', |
777
|
|
|
'OLYN$', 'OLIN', 'ULIN', |
778
|
|
|
'OO(DLMZ)-', 'U', None, |
779
|
|
|
'OO$', 'U', None, |
780
|
|
|
'OO-', '', '', |
781
|
|
|
'ORGINAL-----', 'ORI', 'URI', |
782
|
|
|
'OTI(AÄOÖUÜ)-', 'OZI', 'UZI', |
783
|
|
|
'OUI^', 'WI', 'FI', |
784
|
|
|
'OUILLE$', 'ULIE', 'ULIE', |
785
|
|
|
'OU(DT)-^', 'AU', 'AU', |
786
|
|
|
'OUSE$', 'AUS', 'AUZ', |
787
|
|
|
'OUT-', 'AU', 'AU', |
788
|
|
|
'OU', 'U', 'U', |
789
|
|
|
'O(FV)$', 'AU', 'AU', # due to 'OW$' -> 'AU' |
790
|
|
|
'OVER--<', 'OW', None, |
791
|
|
|
'OV(AOU)-', 'OW', None, |
792
|
|
|
'OW$', 'AU', 'AU', |
793
|
|
|
'OWS$', 'OS', 'UZ', |
794
|
|
|
'OJ(AÄEIOÖUÜ)--', 'O', 'U', |
795
|
|
|
'OYER', 'OIA', None, |
796
|
|
|
'OY(AÄEIOÖUÜ)--', 'O', 'U', |
797
|
|
|
'O(JY)<', 'EU', 'EU', |
798
|
|
|
'OZ$', 'OS', None, |
799
|
|
|
'O´^', 'O', 'U', |
800
|
|
|
'O\'^', 'O', 'U', |
801
|
|
|
'O', None, 'U', |
802
|
|
|
'PATIEN--^', 'PAZI', 'PAZI', |
803
|
|
|
'PENSIO-^', 'PANSI', 'PANZI', |
804
|
|
|
'PE(LMNRST)-3^', 'PE', 'PE', |
805
|
|
|
'PFER-^', 'FE', 'FE', |
806
|
|
|
'P(FH)<', 'F', 'F', |
807
|
|
|
'PIC^$', 'PIK', 'PIK', |
808
|
|
|
'PIC$', 'PIZ', 'PIZ', |
809
|
|
|
'PIPELINE', 'PEIBLEIN', 'PEIBLEIN', |
810
|
|
|
'POLYP-', 'POLÜ', None, |
811
|
|
|
'POLY^^', 'POLI', 'PULI', |
812
|
|
|
'PORTRAIT7', 'PORTRE', 'PURTRE', |
813
|
|
|
'POWER7', 'PAUA', 'PAUA', |
814
|
|
|
'PP(FH)--<', 'B', 'B', |
815
|
|
|
'PP-', '', '', |
816
|
|
|
'PRODUZ-^', 'PRODU', 'BRUTU', |
817
|
|
|
'PRODUZI--', ' PRODU', ' BRUTU', |
818
|
|
|
'PRIX^$', 'PRI', 'PRI', |
819
|
|
|
'PS-^^', 'P', None, |
820
|
|
|
'P(SßZ)^', None, 'Z', |
821
|
|
|
'P(SßZ)$', 'BS', None, |
822
|
|
|
'PT-^', '', '', |
823
|
|
|
'PTI(AÄOÖUÜ)-3', 'BZI', 'BZI', |
824
|
|
|
'PY9^', 'PÜ', None, |
825
|
|
|
'P(AÄEIOÖRUÜY)-', 'P', 'P', |
826
|
|
|
'P(ÀÁÂÃÅÈÉÊÌÍÎÙÚÛ)-', 'P', None, |
827
|
|
|
'P.^', None, 'P.', |
828
|
|
|
'P^', 'P', None, |
829
|
|
|
'P', 'B', 'B', |
830
|
|
|
'QI-', 'Z', 'Z', |
831
|
|
|
'QUARANT--', 'KARA', 'KARA', |
832
|
|
|
'QUE(LMNRST)-3', 'KWE', 'KFE', |
833
|
|
|
'QUE$', 'K', 'K', |
834
|
|
|
'QUI(NS)$', 'KI', 'KI', |
835
|
|
|
'QUIZ7', 'KWIS', None, |
836
|
|
|
'Q(UV)7', 'KW', 'KF', |
837
|
|
|
'Q<', 'K', 'K', |
838
|
|
|
'RADFAHR----', 'RAT ', 'RAT ', |
839
|
|
|
'RAEFTEZEHRE-----', 'REFTE ', 'REFTE ', |
840
|
|
|
'RCH', 'RCH', 'RK', |
841
|
|
|
'REA(DU)---3^', 'R', None, |
842
|
|
|
'REBSERZEUG------', 'REBS ', 'REBZ ', |
843
|
|
|
'RECHERCH^', 'RESHASH', 'REZAZ', |
844
|
|
|
'RECYCL--', 'RIZEI', 'RIZEI', |
845
|
|
|
'RE(ALST)-3^', 'RE', None, |
846
|
|
|
'REE$', 'RI', 'RI', |
847
|
|
|
'RER$', 'RA', 'RA', |
848
|
|
|
'RE(MNR)-4', 'RE', 'RE', |
849
|
|
|
'RETTE$', 'RET', 'RET', |
850
|
|
|
'REUZ$', 'REUZ', None, |
851
|
|
|
'REW$', 'RU', 'RU', |
852
|
|
|
'RH<^', 'R', 'R', |
853
|
|
|
'RJA(MN)--', 'RI', 'RI', |
854
|
|
|
'ROWD-^', 'RAU', 'RAU', |
855
|
|
|
'RTEMONNAIE-', 'RTMON', 'RTNUN', |
856
|
|
|
'RTI(AÄOÖUÜ)-3', 'RZI', 'RZI', |
857
|
|
|
'RTIEL--3', 'RZI', 'RZI', |
858
|
|
|
'RV(AEOU)-3', 'RW', None, |
859
|
|
|
'RY(KN)-$', 'RI', 'RI', |
860
|
|
|
'RY9^', 'RÜ', None, |
861
|
|
|
'RÄFTEZEHRE-----', 'REFTE ', 'REFTE ', |
862
|
|
|
'SAISO-^', 'SES', 'ZEZ', |
863
|
|
|
'SAFE^$', 'SEIF', 'ZEIF', |
864
|
|
|
'SAUCE-^', 'SOS', 'ZUZ', |
865
|
|
|
'SCHLAGGEBEN-----<', 'SHLAK ', 'ZLAK ', |
866
|
|
|
'SCHSCH---7', '', '', |
867
|
|
|
'SCHTSCH', 'SH', 'Z', |
868
|
|
|
'SC(HZ)<', 'SH', 'Z', |
869
|
|
|
'SC', 'SK', 'ZK', |
870
|
|
|
'SELBSTST--7^^', 'SELB', 'ZELB', |
871
|
|
|
'SELBST7^^', 'SELBST', 'ZELBZT', |
872
|
|
|
'SERVICE7^', 'SÖRWIS', 'ZÖRFIZ', |
873
|
|
|
'SERVI-^', 'SERW', None, |
874
|
|
|
'SE(LMNRST)-3^', 'SE', 'ZE', |
875
|
|
|
'SETTE$', 'SET', 'ZET', |
876
|
|
|
'SHP-^', 'S', 'Z', |
877
|
|
|
'SHST', 'SHT', 'ZT', |
878
|
|
|
'SHTSH', 'SH', 'Z', |
879
|
|
|
'SHT', 'ST', 'Z', |
880
|
|
|
'SHY9^', 'SHÜ', None, |
881
|
|
|
'SH^^', 'SH', None, |
882
|
|
|
'SH3', 'SH', 'Z', |
883
|
|
|
'SICHERGEGAN-----^', 'SICHA ', 'ZIKA ', |
884
|
|
|
'SICHERGEHE----^', 'SICHA ', 'ZIKA ', |
885
|
|
|
'SICHERGESTEL------^', 'SICHA ', 'ZIKA ', |
886
|
|
|
'SICHERSTELL-----^', 'SICHA ', 'ZIKA ', |
887
|
|
|
'SICHERZU(GS)--^', 'SICHA ZU ', 'ZIKA ZU ', |
888
|
|
|
'SIEGLI-^', 'SIKL', 'ZIKL', |
889
|
|
|
'SIGLI-^', 'SIKL', 'ZIKL', |
890
|
|
|
'SIGHT', 'SEIT', 'ZEIT', |
891
|
|
|
'SIGN', 'SEIN', 'ZEIN', |
892
|
|
|
'SKI(NPZ)-', 'SKI', 'ZKI', |
893
|
|
|
'SKI<^', 'SHI', 'ZI', |
894
|
|
|
'SODASS^$', 'SO DAS', 'ZU TAZ', |
895
|
|
|
'SODAß^$', 'SO DAS', 'ZU TAZ', |
896
|
|
|
'SOGENAN--^', 'SO GEN', 'ZU KEN', |
897
|
|
|
'SOUND-', 'SAUN', 'ZAUN', |
898
|
|
|
'STAATS^^', 'STAZ', 'ZTAZ', |
899
|
|
|
'STADT^^', 'STAT', 'ZTAT', |
900
|
|
|
'STANDE$', ' STANDE', ' ZTANTE', |
901
|
|
|
'START^^', 'START', 'ZTART', |
902
|
|
|
'STAURANT7', 'STORAN', 'ZTURAN', |
903
|
|
|
'STEAK-', 'STE', 'ZTE', |
904
|
|
|
'STEPHEN-^$', 'STEW', None, |
905
|
|
|
'STERN', 'STERN', None, |
906
|
|
|
'STRAF^^', 'STRAF', 'ZTRAF', |
907
|
|
|
'ST\'S$', 'Z', 'Z', |
908
|
|
|
'ST´S$', 'Z', 'Z', |
909
|
|
|
'STST--', '', '', |
910
|
|
|
'STS(ACEÈÉÊHIÌÍÎOUÄÜÖ)--', 'ST', 'ZT', |
911
|
|
|
'ST(SZ)', 'Z', 'Z', |
912
|
|
|
'SPAREN---^', 'SPA', 'ZPA', |
913
|
|
|
'SPAREND----', ' SPA', ' ZPA', |
914
|
|
|
'S(PTW)-^^', 'S', None, |
915
|
|
|
'SP', 'SP', None, |
916
|
|
|
'STYN(AE)-$', 'STIN', 'ZTIN', |
917
|
|
|
'ST', 'ST', 'ZT', |
918
|
|
|
'SUITE<', 'SIUT', 'ZIUT', |
919
|
|
|
'SUKE--$', 'S', 'Z', |
920
|
|
|
'SURF(EI)-', 'SÖRF', 'ZÖRF', |
921
|
|
|
'SV(AEÈÉÊIÌÍÎOU)-<^', 'SW', None, |
922
|
|
|
'SYB(IY)--^', 'SIB', None, |
923
|
|
|
'SYL(KVW)--^', 'SI', None, |
924
|
|
|
'SY9^', 'SÜ', None, |
925
|
|
|
'SZE(NPT)-^', 'ZE', 'ZE', |
926
|
|
|
'SZI(ELN)-^', 'ZI', 'ZI', |
927
|
|
|
'SZCZ<', 'SH', 'Z', |
928
|
|
|
'SZT<', 'ST', 'ZT', |
929
|
|
|
'SZ<3', 'SH', 'Z', |
930
|
|
|
'SÜL(KVW)--^', 'SI', None, |
931
|
|
|
'S', None, 'Z', |
932
|
|
|
'TCH', 'SH', 'Z', |
933
|
|
|
'TD(AÄEIOÖRUÜY)-', 'T', None, |
934
|
|
|
'TD(ÀÁÂÃÅÈÉÊËÌÍÎÏÒÓÔÕØÙÚÛÝŸ)-', 'T', None, |
935
|
|
|
'TEAT-^', 'TEA', 'TEA', |
936
|
|
|
'TERRAI7^', 'TERA', 'TERA', |
937
|
|
|
'TE(LMNRST)-3^', 'TE', 'TE', |
938
|
|
|
'TH<', 'T', 'T', |
939
|
|
|
'TICHT-', 'TIK', 'TIK', |
940
|
|
|
'TICH$', 'TIK', 'TIK', |
941
|
|
|
'TIC$', 'TIZ', 'TIZ', |
942
|
|
|
'TIGGESTELL-------', 'TIK ', 'TIK ', |
943
|
|
|
'TIGSTELL-----', 'TIK ', 'TIK ', |
944
|
|
|
'TOAS-^', 'TO', 'TU', |
945
|
|
|
'TOILET-', 'TOLE', 'TULE', |
946
|
|
|
'TOIN-', 'TOA', 'TUA', |
947
|
|
|
'TRAECHTI-^', 'TRECHT', 'TREKT', |
948
|
|
|
'TRAECHTIG--', ' TRECHT', ' TREKT', |
949
|
|
|
'TRAINI-', 'TREN', 'TREN', |
950
|
|
|
'TRÄCHTI-^', 'TRECHT', 'TREKT', |
951
|
|
|
'TRÄCHTIG--', ' TRECHT', ' TREKT', |
952
|
|
|
'TSCH', 'SH', 'Z', |
953
|
|
|
'TSH', 'SH', 'Z', |
954
|
|
|
'TST', 'ZT', 'ZT', |
955
|
|
|
'T(Sß)', 'Z', 'Z', |
956
|
|
|
'TT(SZ)--<', '', '', |
957
|
|
|
'TT9', 'T', 'T', |
958
|
|
|
'TV^$', 'TV', 'TV', |
959
|
|
|
'TX(AEIOU)-3', 'SH', 'Z', |
960
|
|
|
'TY9^', 'TÜ', None, |
961
|
|
|
'TZ-', '', '', |
962
|
|
|
'T\'S3$', 'Z', 'Z', |
963
|
|
|
'T´S3$', 'Z', 'Z', |
964
|
|
|
'UEBEL(GNRW)-^^', 'ÜBL ', 'IBL ', |
965
|
|
|
'UEBER^^', 'ÜBA', 'IBA', |
966
|
|
|
'UE2', 'Ü', 'I', |
967
|
|
|
'UGL-', 'UK', None, |
968
|
|
|
'UH(AOÖUÜY)-', 'UH', None, |
969
|
|
|
'UIE$', 'Ü', 'I', |
970
|
|
|
'UM^^', 'UM', 'UN', |
971
|
|
|
'UNTERE--3', 'UNTE', 'UNTE', |
972
|
|
|
'UNTER^^', 'UNTA', 'UNTA', |
973
|
|
|
'UNVER^^', 'UNFA', 'UNFA', |
974
|
|
|
'UN^^', 'UN', 'UN', |
975
|
|
|
'UTI(AÄOÖUÜ)-', 'UZI', 'UZI', |
976
|
|
|
'UVE-4', 'UW', None, |
977
|
|
|
'UY2', 'UI', None, |
978
|
|
|
'UZZ', 'AS', 'AZ', |
979
|
|
|
'VACL-^', 'WAZ', 'FAZ', |
980
|
|
|
'VAC$', 'WAZ', 'FAZ', |
981
|
|
|
'VAN DEN ^', 'FANDN', 'FANTN', |
982
|
|
|
'VANES-^', 'WANE', None, |
983
|
|
|
'VATRO-', 'WATR', None, |
984
|
|
|
'VA(DHJNT)--^', 'F', None, |
985
|
|
|
'VEDD-^', 'FE', 'FE', |
986
|
|
|
'VE(BEHIU)--^', 'F', None, |
987
|
|
|
'VEL(BDLMNT)-^', 'FEL', None, |
988
|
|
|
'VENTZ-^', 'FEN', None, |
989
|
|
|
'VEN(NRSZ)-^', 'FEN', None, |
990
|
|
|
'VER(AB)-^$', 'WER', None, |
991
|
|
|
'VERBAL^$', 'WERBAL', None, |
992
|
|
|
'VERBAL(EINS)-^', 'WERBAL', None, |
993
|
|
|
'VERTEBR--', 'WERTE', None, |
994
|
|
|
'VEREIN-----', 'F', None, |
995
|
|
|
'VEREN(AEIOU)-^', 'WEREN', None, |
996
|
|
|
'VERIFI', 'WERIFI', None, |
997
|
|
|
'VERON(AEIOU)-^', 'WERON', None, |
998
|
|
|
'VERSEN^', 'FERSN', 'FAZN', |
999
|
|
|
'VERSIERT--^', 'WERSI', None, |
1000
|
|
|
'VERSIO--^', 'WERS', None, |
1001
|
|
|
'VERSUS', 'WERSUS', None, |
1002
|
|
|
'VERTI(GK)-', 'WERTI', None, |
1003
|
|
|
'VER^^', 'FER', 'FA', |
1004
|
|
|
'VERSPRECHE-------', ' FER', ' FA', |
1005
|
|
|
'VER$', 'WA', None, |
1006
|
|
|
'VER', 'FA', 'FA', |
1007
|
|
|
'VET(HT)-^', 'FET', 'FET', |
1008
|
|
|
'VETTE$', 'WET', 'FET', |
1009
|
|
|
'VE^', 'WE', None, |
1010
|
|
|
'VIC$', 'WIZ', 'FIZ', |
1011
|
|
|
'VIELSAGE----', 'FIL ', 'FIL ', |
1012
|
|
|
'VIEL', 'FIL', 'FIL', |
1013
|
|
|
'VIEW', 'WIU', 'FIU', |
1014
|
|
|
'VILL(AE)-', 'WIL', None, |
1015
|
|
|
'VIS(ACEIKUVWZ)-<^', 'WIS', None, |
1016
|
|
|
'VI(ELS)--^', 'F', None, |
1017
|
|
|
'VILLON--', 'WILI', 'FILI', |
1018
|
|
|
'VIZE^^', 'FIZE', 'FIZE', |
1019
|
|
|
'VLIE--^', 'FL', None, |
1020
|
|
|
'VL(AEIOU)--', 'W', None, |
1021
|
|
|
'VOKA-^', 'WOK', None, |
1022
|
|
|
'VOL(ATUVW)--^', 'WO', None, |
1023
|
|
|
'VOR^^', 'FOR', 'FUR', |
1024
|
|
|
'VR(AEIOU)--', 'W', None, |
1025
|
|
|
'VV9', 'W', None, |
1026
|
|
|
'VY9^', 'WÜ', 'FI', |
1027
|
|
|
'V(ÜY)-', 'W', None, |
1028
|
|
|
'V(ÀÁÂÃÅÈÉÊÌÍÎÙÚÛ)-', 'W', None, |
1029
|
|
|
'V(AEIJLRU)-<', 'W', None, |
1030
|
|
|
'V.^', 'V.', None, |
1031
|
|
|
'V<', 'F', 'F', |
1032
|
|
|
'WEITERENTWI-----^', 'WEITA ', 'FEITA ', |
1033
|
|
|
'WEITREICH-----^', 'WEIT ', 'FEIT ', |
1034
|
|
|
'WEITVER^', 'WEIT FER', 'FEIT FA', |
1035
|
|
|
'WE(LMNRST)-3^', 'WE', 'FE', |
1036
|
|
|
'WER(DST)-', 'WER', None, |
1037
|
|
|
'WIC$', 'WIZ', 'FIZ', |
1038
|
|
|
'WIEDERU--', 'WIDE', 'FITE', |
1039
|
|
|
'WIEDER^$', 'WIDA', 'FITA', |
1040
|
|
|
'WIEDER^^', 'WIDA ', 'FITA ', |
1041
|
|
|
'WIEVIEL', 'WI FIL', 'FI FIL', |
1042
|
|
|
'WISUEL', 'WISUEL', None, |
1043
|
|
|
'WR-^', 'W', None, |
1044
|
|
|
'WY9^', 'WÜ', 'FI', |
1045
|
|
|
'W(BDFGJKLMNPQRSTZ)-', 'F', None, |
1046
|
|
|
'W$', 'F', None, |
1047
|
|
|
'W', None, 'F', |
1048
|
|
|
'X<^', 'Z', 'Z', |
1049
|
|
|
'XHAVEN$', 'XAFN', None, |
1050
|
|
|
'X(CSZ)', 'X', 'X', |
1051
|
|
|
'XTS(CH)--', 'XT', 'XT', |
1052
|
|
|
'XT(SZ)', 'Z', 'Z', |
1053
|
|
|
'YE(LMNRST)-3^', 'IE', 'IE', |
1054
|
|
|
'YE-3', 'I', 'I', |
1055
|
|
|
'YOR(GK)^$', 'IÖRK', 'IÖRK', |
1056
|
|
|
'Y(AOU)-<7', 'I', 'I', |
1057
|
|
|
'Y(BKLMNPRSTX)-1', 'Ü', None, |
1058
|
|
|
'YVES^$', 'IF', 'IF', |
1059
|
|
|
'YVONNE^$', 'IWON', 'IFUN', |
1060
|
|
|
'Y.^', 'Y.', None, |
1061
|
|
|
'Y', 'I', 'I', |
1062
|
|
|
'ZC(AOU)-', 'SK', 'ZK', |
1063
|
|
|
'ZE(LMNRST)-3^', 'ZE', 'ZE', |
1064
|
|
|
'ZIEJ$', 'ZI', 'ZI', |
1065
|
|
|
'ZIGERJA(HR)-3', 'ZIGA IA', 'ZIKA IA', |
1066
|
|
|
'ZL(AEIOU)-', 'SL', None, |
1067
|
|
|
'ZS(CHT)--', '', '', |
1068
|
|
|
'ZS', 'SH', 'Z', |
1069
|
|
|
'ZUERST', 'ZUERST', 'ZUERST', |
1070
|
|
|
'ZUGRUNDE^$', 'ZU GRUNDE', 'ZU KRUNTE', |
1071
|
|
|
'ZUGRUNDE', 'ZU GRUNDE ', 'ZU KRUNTE ', |
1072
|
|
|
'ZUGUNSTEN', 'ZU GUNSTN', 'ZU KUNZTN', |
1073
|
|
|
'ZUHAUSE-', 'ZU HAUS', 'ZU AUZ', |
1074
|
|
|
'ZULASTEN^$', 'ZU LASTN', 'ZU LAZTN', |
1075
|
|
|
'ZURUECK^^', 'ZURÜK', 'ZURIK', |
1076
|
|
|
'ZURZEIT', 'ZUR ZEIT', 'ZUR ZEIT', |
1077
|
|
|
'ZURÜCK^^', 'ZURÜK', 'ZURIK', |
1078
|
|
|
'ZUSTANDE', 'ZU STANDE', 'ZU ZTANTE', |
1079
|
|
|
'ZUTAGE', 'ZU TAGE', 'ZU TAKE', |
1080
|
|
|
'ZUVER^^', 'ZUFA', 'ZUFA', |
1081
|
|
|
'ZUVIEL', 'ZU FIL', 'ZU FIL', |
1082
|
|
|
'ZUWENIG', 'ZU WENIK', 'ZU FENIK', |
1083
|
|
|
'ZY9^', 'ZÜ', None, |
1084
|
|
|
'ZYK3$', 'ZIK', None, |
1085
|
|
|
'Z(VW)7^', 'SW', None, |
1086
|
|
|
None, None, None |
1087
|
|
|
# fmt: on |
1088
|
|
|
) |
1089
|
|
|
|
1090
|
1 |
|
phonet_hash = Counter() |
1091
|
1 |
|
alpha_pos = Counter() |
1092
|
|
|
|
1093
|
1 |
|
phonet_hash_1 = Counter() |
1094
|
1 |
|
phonet_hash_2 = Counter() |
1095
|
|
|
|
1096
|
1 |
|
_phonet_upper_translation = dict( |
1097
|
|
|
zip( |
1098
|
|
|
( |
1099
|
|
|
ord(_) |
|
|
|
|
1100
|
|
|
for _ in 'abcdefghijklmnopqrstuvwxyzàáâãåäæ' |
1101
|
|
|
+ 'çðèéêëìíîïñòóôõöøœšßþùúûüýÿ' |
1102
|
|
|
), |
1103
|
|
|
'ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÅÄÆ' |
1104
|
|
|
+ 'ÇÐÈÉÊËÌÍÎÏÑÒÓÔÕÖØŒŠßÞÙÚÛÜÝŸ', |
1105
|
|
|
) |
1106
|
|
|
) |
1107
|
|
|
|
1108
|
1 |
|
def _initialize_phonet(lang): |
1109
|
|
|
"""Initialize phonet variables.""" |
1110
|
1 |
|
if lang == 'none': |
1111
|
1 |
|
_phonet_rules = _phonet_rules_no_lang |
1112
|
|
|
else: |
1113
|
1 |
|
_phonet_rules = _phonet_rules_german |
1114
|
|
|
|
1115
|
1 |
|
phonet_hash[''] = -1 |
1116
|
|
|
|
1117
|
|
|
# German and international umlauts |
1118
|
1 |
|
for j in { |
1119
|
|
|
'À', |
|
|
|
|
1120
|
|
|
'Á', |
|
|
|
|
1121
|
|
|
'Â', |
|
|
|
|
1122
|
|
|
'Ã', |
|
|
|
|
1123
|
|
|
'Ä', |
|
|
|
|
1124
|
|
|
'Å', |
|
|
|
|
1125
|
|
|
'Æ', |
|
|
|
|
1126
|
|
|
'Ç', |
|
|
|
|
1127
|
|
|
'È', |
|
|
|
|
1128
|
|
|
'É', |
|
|
|
|
1129
|
|
|
'Ê', |
|
|
|
|
1130
|
|
|
'Ë', |
|
|
|
|
1131
|
|
|
'Ì', |
|
|
|
|
1132
|
|
|
'Í', |
|
|
|
|
1133
|
|
|
'Î', |
|
|
|
|
1134
|
|
|
'Ï', |
|
|
|
|
1135
|
|
|
'Ð', |
|
|
|
|
1136
|
|
|
'Ñ', |
|
|
|
|
1137
|
|
|
'Ò', |
|
|
|
|
1138
|
|
|
'Ó', |
|
|
|
|
1139
|
|
|
'Ô', |
|
|
|
|
1140
|
|
|
'Õ', |
|
|
|
|
1141
|
|
|
'Ö', |
|
|
|
|
1142
|
|
|
'Ø', |
|
|
|
|
1143
|
|
|
'Ù', |
|
|
|
|
1144
|
|
|
'Ú', |
|
|
|
|
1145
|
|
|
'Û', |
|
|
|
|
1146
|
|
|
'Ü', |
|
|
|
|
1147
|
|
|
'Ý', |
|
|
|
|
1148
|
|
|
'Þ', |
|
|
|
|
1149
|
|
|
'ß', |
|
|
|
|
1150
|
|
|
'Œ', |
|
|
|
|
1151
|
|
|
'Š', |
|
|
|
|
1152
|
|
|
'Ÿ', |
|
|
|
|
1153
|
|
|
}: |
1154
|
1 |
|
alpha_pos[j] = 1 |
1155
|
1 |
|
phonet_hash[j] = -1 |
1156
|
|
|
|
1157
|
|
|
# "normal" letters ('A'-'Z') |
1158
|
1 |
|
for i, j in enumerate('ABCDEFGHIJKLMNOPQRSTUVWXYZ'): |
1159
|
1 |
|
alpha_pos[j] = i + 2 |
1160
|
1 |
|
phonet_hash[j] = -1 |
1161
|
|
|
|
1162
|
1 |
|
for i in range(26): |
1163
|
1 |
|
for j in range(28): |
1164
|
1 |
|
phonet_hash_1[i, j] = -1 |
1165
|
1 |
|
phonet_hash_2[i, j] = -1 |
1166
|
|
|
|
1167
|
|
|
# for each phonetc rule |
1168
|
1 |
|
for i in range(len(_phonet_rules)): |
|
|
|
|
1169
|
1 |
|
rule = _phonet_rules[i] |
1170
|
|
|
|
1171
|
1 |
|
if rule and i % 3 == 0: |
1172
|
|
|
# calculate first hash value |
1173
|
1 |
|
k = _phonet_rules[i][0] |
1174
|
|
|
|
1175
|
1 |
|
if phonet_hash[k] < 0 and ( |
|
|
|
|
1176
|
|
|
_phonet_rules[i + 1] or _phonet_rules[i + 2] |
|
|
|
|
1177
|
|
|
): |
1178
|
1 |
|
phonet_hash[k] = i |
1179
|
|
|
|
1180
|
|
|
# calculate second hash values |
1181
|
1 |
|
if k and alpha_pos[k] >= 2: |
|
|
|
|
1182
|
1 |
|
k = alpha_pos[k] |
1183
|
|
|
|
1184
|
1 |
|
j = k - 2 |
1185
|
1 |
|
rule = rule[1:] |
1186
|
|
|
|
1187
|
1 |
|
if not rule: |
1188
|
1 |
|
rule = ' ' |
1189
|
1 |
|
elif rule[0] == '(': |
1190
|
1 |
|
rule = rule[1:] |
1191
|
|
|
else: |
1192
|
1 |
|
rule = rule[0] |
1193
|
|
|
|
1194
|
1 |
|
while rule and (rule[0] != ')'): |
1195
|
1 |
|
k = alpha_pos[rule[0]] |
1196
|
|
|
|
1197
|
1 |
|
if k > 0: |
1198
|
|
|
# add hash value for this letter |
1199
|
1 |
|
if phonet_hash_1[j, k] < 0: |
|
|
|
|
1200
|
1 |
|
phonet_hash_1[j, k] = i |
1201
|
1 |
|
phonet_hash_2[j, k] = i |
1202
|
|
|
|
1203
|
1 |
|
if phonet_hash_2[j, k] >= (i - 30): |
|
|
|
|
1204
|
1 |
|
phonet_hash_2[j, k] = i |
1205
|
|
|
else: |
1206
|
1 |
|
k = -1 |
1207
|
|
|
|
1208
|
1 |
|
if k <= 0: |
1209
|
|
|
# add hash value for all letters |
1210
|
1 |
|
if phonet_hash_1[j, 0] < 0: |
1211
|
1 |
|
phonet_hash_1[j, 0] = i |
1212
|
|
|
|
1213
|
1 |
|
phonet_hash_2[j, 0] = i |
1214
|
|
|
|
1215
|
1 |
|
rule = rule[1:] |
1216
|
|
|
|
1217
|
1 |
|
def _phonet(term, mode, lang): |
|
|
|
|
1218
|
|
|
"""Return the phonet coded form of a term.""" |
1219
|
1 |
|
if lang == 'none': |
1220
|
1 |
|
_phonet_rules = _phonet_rules_no_lang |
1221
|
|
|
else: |
1222
|
1 |
|
_phonet_rules = _phonet_rules_german |
1223
|
|
|
|
1224
|
1 |
|
char0 = '' |
1225
|
1 |
|
dest = term |
1226
|
|
|
|
1227
|
1 |
|
if not term: |
1228
|
1 |
|
return '' |
1229
|
|
|
|
1230
|
1 |
|
term_length = len(term) |
1231
|
|
|
|
1232
|
|
|
# convert input string to upper-case |
1233
|
1 |
|
src = term.translate(_phonet_upper_translation) |
1234
|
|
|
|
1235
|
|
|
# check "src" |
1236
|
1 |
|
i = 0 |
1237
|
1 |
|
j = 0 |
1238
|
1 |
|
zeta = 0 |
1239
|
|
|
|
1240
|
1 |
|
while i < len(src): |
|
|
|
|
1241
|
1 |
|
char = src[i] |
1242
|
|
|
|
1243
|
1 |
|
pos = alpha_pos[char] |
1244
|
|
|
|
1245
|
1 |
|
if pos >= 2: |
1246
|
1 |
|
xpos = pos - 2 |
1247
|
|
|
|
1248
|
1 |
|
if i + 1 == len(src): |
1249
|
1 |
|
pos = alpha_pos[''] |
1250
|
|
|
else: |
1251
|
1 |
|
pos = alpha_pos[src[i + 1]] |
1252
|
|
|
|
1253
|
1 |
|
start1 = phonet_hash_1[xpos, pos] |
1254
|
1 |
|
start2 = phonet_hash_1[xpos, 0] |
1255
|
1 |
|
end1 = phonet_hash_2[xpos, pos] |
1256
|
1 |
|
end2 = phonet_hash_2[xpos, 0] |
1257
|
|
|
|
1258
|
|
|
# preserve rule priorities |
1259
|
1 |
|
if (start2 >= 0) and ((start1 < 0) or (start2 < start1)): |
1260
|
1 |
|
pos = start1 |
1261
|
1 |
|
start1 = start2 |
1262
|
1 |
|
start2 = pos |
1263
|
1 |
|
pos = end1 |
1264
|
1 |
|
end1 = end2 |
1265
|
1 |
|
end2 = pos |
1266
|
|
|
|
1267
|
1 |
|
if (end1 >= start2) and (start2 >= 0): |
1268
|
1 |
|
if end2 > end1: |
1269
|
1 |
|
end1 = end2 |
1270
|
|
|
|
1271
|
1 |
|
start2 = -1 |
1272
|
1 |
|
end2 = -1 |
1273
|
|
|
else: |
1274
|
1 |
|
pos = phonet_hash[char] |
1275
|
1 |
|
start1 = pos |
1276
|
1 |
|
end1 = 10000 |
1277
|
1 |
|
start2 = -1 |
1278
|
1 |
|
end2 = -1 |
1279
|
|
|
|
1280
|
1 |
|
pos = start1 |
1281
|
1 |
|
zeta0 = 0 |
1282
|
|
|
|
1283
|
1 |
|
if pos >= 0: |
1284
|
|
|
# check rules for this char |
1285
|
1 |
|
while (_phonet_rules[pos] is None) or ( |
1286
|
|
|
_phonet_rules[pos][0] == char |
|
|
|
|
1287
|
|
|
): |
1288
|
1 |
|
if pos > end1: |
1289
|
1 |
|
if start2 > 0: |
1290
|
1 |
|
pos = start2 |
1291
|
1 |
|
start1 = start2 |
1292
|
1 |
|
start2 = -1 |
1293
|
1 |
|
end1 = end2 |
1294
|
1 |
|
end2 = -1 |
1295
|
1 |
|
continue |
1296
|
|
|
|
1297
|
1 |
|
break |
1298
|
|
|
|
1299
|
1 |
|
if (_phonet_rules[pos] is None) or ( |
1300
|
|
|
_phonet_rules[pos + mode] is None |
|
|
|
|
1301
|
|
|
): |
1302
|
|
|
# no conversion rule available |
1303
|
1 |
|
pos += 3 |
1304
|
1 |
|
continue |
1305
|
|
|
|
1306
|
|
|
# check whole string |
1307
|
1 |
|
matches = 1 # number of matching letters |
1308
|
1 |
|
priority = 5 # default priority |
1309
|
1 |
|
rule = _phonet_rules[pos] |
1310
|
1 |
|
rule = rule[1:] |
1311
|
|
|
|
1312
|
1 |
|
while ( |
1313
|
|
|
rule |
|
|
|
|
1314
|
|
|
and (len(src) > (i + matches)) |
|
|
|
|
1315
|
|
|
and (src[i + matches] == rule[0]) |
|
|
|
|
1316
|
|
|
and not rule[0].isdigit() |
|
|
|
|
1317
|
|
|
and (rule not in '(-<^$') |
|
|
|
|
1318
|
|
|
): |
1319
|
1 |
|
matches += 1 |
1320
|
1 |
|
rule = rule[1:] |
1321
|
|
|
|
1322
|
1 |
|
if rule and (rule[0] == '('): |
1323
|
|
|
# check an array of letters |
1324
|
1 |
|
if ( |
1325
|
|
|
(len(src) > (i + matches)) |
|
|
|
|
1326
|
|
|
and src[i + matches].isalpha() |
|
|
|
|
1327
|
|
|
and (src[i + matches] in rule[1:]) |
|
|
|
|
1328
|
|
|
): |
1329
|
1 |
|
matches += 1 |
1330
|
|
|
|
1331
|
1 |
|
while rule and rule[0] != ')': |
1332
|
1 |
|
rule = rule[1:] |
1333
|
|
|
|
1334
|
|
|
# if rule[0] == ')': |
1335
|
1 |
|
rule = rule[1:] |
1336
|
|
|
|
1337
|
1 |
|
if rule: |
1338
|
1 |
|
priority0 = ord(rule[0]) |
1339
|
|
|
else: |
1340
|
1 |
|
priority0 = 0 |
1341
|
|
|
|
1342
|
1 |
|
matches0 = matches |
1343
|
|
|
|
1344
|
1 |
|
while rule and rule[0] == '-' and matches > 1: |
1345
|
1 |
|
matches -= 1 |
1346
|
1 |
|
rule = rule[1:] |
1347
|
|
|
|
1348
|
1 |
|
if rule and rule[0] == '<': |
1349
|
1 |
|
rule = rule[1:] |
1350
|
|
|
|
1351
|
1 |
|
if rule and rule[0].isdigit(): |
1352
|
|
|
# read priority |
1353
|
1 |
|
priority = int(rule[0]) |
1354
|
1 |
|
rule = rule[1:] |
1355
|
|
|
|
1356
|
1 |
|
if rule and rule[0:2] == '^^': |
1357
|
1 |
|
rule = rule[1:] |
1358
|
|
|
|
1359
|
1 |
|
if ( |
1360
|
|
|
not rule |
|
|
|
|
1361
|
|
|
or ( |
|
|
|
|
1362
|
|
|
(rule[0] == '^') |
1363
|
|
|
and ((i == 0) or not src[i - 1].isalpha()) |
1364
|
|
|
and ( |
1365
|
|
|
(rule[1:2] != '$') |
1366
|
|
|
or ( |
1367
|
|
|
not ( |
1368
|
|
|
src[ |
1369
|
|
|
i + matches0 : i + matches0 + 1 |
1370
|
|
|
].isalpha() |
1371
|
|
|
) |
1372
|
|
|
and ( |
1373
|
|
|
src[i + matches0 : i + matches0 + 1] |
1374
|
|
|
!= '.' |
1375
|
|
|
) |
1376
|
|
|
) |
1377
|
|
|
) |
1378
|
|
|
) |
1379
|
|
|
or ( |
|
|
|
|
1380
|
|
|
(rule[0] == '$') |
1381
|
|
|
and (i > 0) |
1382
|
|
|
and src[i - 1].isalpha() |
1383
|
|
|
and ( |
1384
|
|
|
( |
1385
|
|
|
not src[ |
1386
|
|
|
i + matches0 : i + matches0 + 1 |
1387
|
|
|
].isalpha() |
1388
|
|
|
) |
1389
|
|
|
and ( |
1390
|
|
|
src[i + matches0 : i + matches0 + 1] != '.' |
1391
|
|
|
) |
1392
|
|
|
) |
1393
|
|
|
) |
1394
|
|
|
): |
1395
|
|
|
# look for continuation, if: |
1396
|
|
|
# matches > 1 und NO '-' in first string */ |
1397
|
1 |
|
pos0 = -1 |
1398
|
|
|
|
1399
|
1 |
|
start3 = 0 |
1400
|
1 |
|
start4 = 0 |
1401
|
1 |
|
end3 = 0 |
1402
|
1 |
|
end4 = 0 |
1403
|
|
|
|
1404
|
1 |
|
if ( |
1405
|
|
|
(matches > 1) |
|
|
|
|
1406
|
|
|
and src[i + matches : i + matches + 1] |
|
|
|
|
1407
|
|
|
and (priority0 != ord('-')) |
|
|
|
|
1408
|
|
|
): |
1409
|
1 |
|
char0 = src[i + matches - 1] |
1410
|
1 |
|
pos0 = alpha_pos[char0] |
1411
|
|
|
|
1412
|
1 |
|
if pos0 >= 2 and src[i + matches]: |
1413
|
1 |
|
xpos = pos0 - 2 |
1414
|
1 |
|
pos0 = alpha_pos[src[i + matches]] |
1415
|
1 |
|
start3 = phonet_hash_1[xpos, pos0] |
1416
|
1 |
|
start4 = phonet_hash_1[xpos, 0] |
1417
|
1 |
|
end3 = phonet_hash_2[xpos, pos0] |
1418
|
1 |
|
end4 = phonet_hash_2[xpos, 0] |
1419
|
|
|
|
1420
|
|
|
# preserve rule priorities |
1421
|
1 |
|
if (start4 >= 0) and ( |
1422
|
|
|
(start3 < 0) or (start4 < start3) |
|
|
|
|
1423
|
|
|
): |
1424
|
1 |
|
pos0 = start3 |
1425
|
1 |
|
start3 = start4 |
1426
|
1 |
|
start4 = pos0 |
1427
|
1 |
|
pos0 = end3 |
1428
|
1 |
|
end3 = end4 |
1429
|
1 |
|
end4 = pos0 |
1430
|
|
|
|
1431
|
1 |
|
if (end3 >= start4) and (start4 >= 0): |
1432
|
1 |
|
if end4 > end3: |
1433
|
1 |
|
end3 = end4 |
1434
|
|
|
|
1435
|
1 |
|
start4 = -1 |
1436
|
1 |
|
end4 = -1 |
1437
|
|
|
else: |
1438
|
1 |
|
pos0 = phonet_hash[char0] |
1439
|
1 |
|
start3 = pos0 |
1440
|
1 |
|
end3 = 10000 |
1441
|
1 |
|
start4 = -1 |
1442
|
1 |
|
end4 = -1 |
1443
|
|
|
|
1444
|
1 |
|
pos0 = start3 |
1445
|
|
|
|
1446
|
|
|
# check continuation rules for src[i+matches] |
1447
|
1 |
|
if pos0 >= 0: |
1448
|
1 |
|
while (_phonet_rules[pos0] is None) or ( |
1449
|
|
|
_phonet_rules[pos0][0] == char0 |
|
|
|
|
1450
|
|
|
): |
1451
|
1 |
|
if pos0 > end3: |
1452
|
1 |
|
if start4 > 0: |
1453
|
1 |
|
pos0 = start4 |
1454
|
1 |
|
start3 = start4 |
1455
|
1 |
|
start4 = -1 |
1456
|
1 |
|
end3 = end4 |
1457
|
1 |
|
end4 = -1 |
1458
|
1 |
|
continue |
1459
|
|
|
|
1460
|
1 |
|
priority0 = -1 |
1461
|
|
|
|
1462
|
|
|
# important |
1463
|
1 |
|
break |
1464
|
|
|
|
1465
|
1 |
|
if (_phonet_rules[pos0] is None) or ( |
1466
|
|
|
_phonet_rules[pos0 + mode] is None |
|
|
|
|
1467
|
|
|
): |
1468
|
|
|
# no conversion rule available |
1469
|
1 |
|
pos0 += 3 |
1470
|
1 |
|
continue |
1471
|
|
|
|
1472
|
|
|
# check whole string |
1473
|
1 |
|
matches0 = matches |
1474
|
1 |
|
priority0 = 5 |
1475
|
1 |
|
rule = _phonet_rules[pos0] |
1476
|
1 |
|
rule = rule[1:] |
|
|
|
|
1477
|
|
|
|
1478
|
1 |
|
while ( |
1479
|
|
|
rule |
|
|
|
|
1480
|
|
|
and ( |
|
|
|
|
1481
|
|
|
src[i + matches0 : i + matches0 + 1] |
1482
|
|
|
== rule[0] |
1483
|
|
|
) |
1484
|
|
|
and ( |
|
|
|
|
1485
|
|
|
not rule[0].isdigit() |
1486
|
|
|
or (rule in '(-<^$') |
1487
|
|
|
) |
1488
|
|
|
): |
1489
|
1 |
|
matches0 += 1 |
1490
|
1 |
|
rule = rule[1:] |
1491
|
|
|
|
1492
|
1 |
|
if rule and rule[0] == '(': |
1493
|
|
|
# check an array of letters |
1494
|
1 |
|
if src[ |
1495
|
|
|
i + matches0 : i + matches0 + 1 |
|
|
|
|
1496
|
|
|
].isalpha() and ( |
1497
|
|
|
src[i + matches0] in rule[1:] |
1498
|
|
|
): |
1499
|
1 |
|
matches0 += 1 |
1500
|
|
|
|
1501
|
1 |
|
while rule and rule[0] != ')': |
1502
|
1 |
|
rule = rule[1:] |
1503
|
|
|
|
1504
|
|
|
# if rule[0] == ')': |
1505
|
1 |
|
rule = rule[1:] |
1506
|
|
|
|
1507
|
1 |
|
while rule and rule[0] == '-': |
1508
|
|
|
# "matches0" is NOT decremented |
1509
|
|
|
# because of "if (matches0 == matches)" |
1510
|
1 |
|
rule = rule[1:] |
1511
|
|
|
|
1512
|
1 |
|
if rule and rule[0] == '<': |
1513
|
1 |
|
rule = rule[1:] |
1514
|
|
|
|
1515
|
1 |
|
if rule and rule[0].isdigit(): |
1516
|
1 |
|
priority0 = int(rule[0]) |
1517
|
1 |
|
rule = rule[1:] |
1518
|
|
|
|
1519
|
1 |
|
if ( |
1520
|
|
|
not rule |
|
|
|
|
1521
|
|
|
or |
|
|
|
|
1522
|
|
|
# rule == '^' is not possible here |
|
|
|
|
1523
|
|
|
( |
|
|
|
|
1524
|
|
|
(rule[0] == '$') |
1525
|
|
|
and not src[ |
1526
|
|
|
i + matches0 : i + matches0 + 1 |
1527
|
|
|
].isalpha() |
1528
|
|
|
and ( |
1529
|
|
|
src[ |
1530
|
|
|
i + matches0 : i + matches0 + 1 |
1531
|
|
|
] |
1532
|
|
|
!= '.' |
1533
|
|
|
) |
1534
|
|
|
) |
1535
|
|
|
): |
1536
|
1 |
|
if matches0 == matches: |
1537
|
|
|
# this is only a partial string |
1538
|
1 |
|
pos0 += 3 |
1539
|
1 |
|
continue |
1540
|
|
|
|
1541
|
1 |
|
if priority0 < priority: |
1542
|
|
|
# priority is too low |
1543
|
1 |
|
pos0 += 3 |
1544
|
1 |
|
continue |
1545
|
|
|
|
1546
|
|
|
# continuation rule found |
1547
|
1 |
|
break |
1548
|
|
|
|
1549
|
1 |
|
pos0 += 3 |
1550
|
|
|
|
1551
|
|
|
# end of "while" |
1552
|
1 |
|
if (priority0 >= priority) and ( |
1553
|
|
|
(_phonet_rules[pos0] is not None) |
|
|
|
|
1554
|
|
|
and (_phonet_rules[pos0][0] == char0) |
|
|
|
|
1555
|
|
|
): |
1556
|
|
|
|
1557
|
1 |
|
pos += 3 |
1558
|
1 |
|
continue |
1559
|
|
|
|
1560
|
|
|
# replace string |
1561
|
1 |
|
if _phonet_rules[pos] and ( |
1562
|
|
|
'<' in _phonet_rules[pos][1:] |
|
|
|
|
1563
|
|
|
): |
1564
|
1 |
|
priority0 = 1 |
1565
|
|
|
else: |
1566
|
1 |
|
priority0 = 0 |
1567
|
|
|
|
1568
|
1 |
|
rule = _phonet_rules[pos + mode] |
1569
|
|
|
|
1570
|
1 |
|
if (priority0 == 1) and (zeta == 0): |
1571
|
|
|
# rule with '<' is applied |
1572
|
1 |
|
if ( |
1573
|
|
|
(j > 0) |
|
|
|
|
1574
|
|
|
and rule |
|
|
|
|
1575
|
|
|
and ( |
|
|
|
|
1576
|
|
|
(dest[j - 1] == char) |
1577
|
|
|
or (dest[j - 1] == rule[0]) |
1578
|
|
|
) |
1579
|
|
|
): |
1580
|
1 |
|
j -= 1 |
1581
|
|
|
|
1582
|
1 |
|
zeta0 = 1 |
1583
|
1 |
|
zeta += 1 |
1584
|
1 |
|
matches0 = 0 |
1585
|
|
|
|
1586
|
1 |
|
while rule and src[i + matches0]: |
1587
|
1 |
|
src = ( |
1588
|
|
|
src[0 : i + matches0] |
1589
|
|
|
+ rule[0] |
1590
|
|
|
+ src[i + matches0 + 1 :] |
1591
|
|
|
) |
1592
|
1 |
|
matches0 += 1 |
1593
|
1 |
|
rule = rule[1:] |
1594
|
|
|
|
1595
|
1 |
|
if matches0 < matches: |
1596
|
1 |
|
src = ( |
1597
|
|
|
src[0 : i + matches0] + src[i + matches :] |
1598
|
|
|
) |
1599
|
|
|
|
1600
|
1 |
|
char = src[i] |
1601
|
|
|
else: |
1602
|
1 |
|
i = i + matches - 1 |
1603
|
1 |
|
zeta = 0 |
1604
|
|
|
|
1605
|
1 |
|
while len(rule) > 1: |
1606
|
1 |
|
if (j == 0) or (dest[j - 1] != rule[0]): |
1607
|
1 |
|
dest = ( |
1608
|
|
|
dest[0:j] |
1609
|
|
|
+ rule[0] |
1610
|
|
|
+ dest[min(len(dest), j + 1) :] |
1611
|
|
|
) |
1612
|
1 |
|
j += 1 |
1613
|
|
|
|
1614
|
1 |
|
rule = rule[1:] |
1615
|
|
|
|
1616
|
|
|
# new "current char" |
1617
|
1 |
|
if not rule: |
1618
|
1 |
|
rule = '' |
1619
|
1 |
|
char = '' |
1620
|
|
|
else: |
1621
|
1 |
|
char = rule[0] |
1622
|
|
|
|
1623
|
1 |
|
if ( |
1624
|
|
|
_phonet_rules[pos] |
|
|
|
|
1625
|
|
|
and '^^' in _phonet_rules[pos][1:] |
|
|
|
|
1626
|
|
|
): |
1627
|
1 |
|
if char: |
1628
|
1 |
|
dest = ( |
1629
|
|
|
dest[0:j] |
1630
|
|
|
+ char |
1631
|
|
|
+ dest[min(len(dest), j + 1) :] |
1632
|
|
|
) |
1633
|
1 |
|
j += 1 |
1634
|
|
|
|
1635
|
1 |
|
src = src[i + 1 :] |
1636
|
1 |
|
i = 0 |
1637
|
1 |
|
zeta0 = 1 |
1638
|
|
|
|
1639
|
1 |
|
break |
1640
|
|
|
|
1641
|
1 |
|
pos += 3 |
1642
|
|
|
|
1643
|
1 |
|
if pos > end1 and start2 > 0: |
1644
|
1 |
|
pos = start2 |
1645
|
1 |
|
start1 = start2 |
1646
|
1 |
|
end1 = end2 |
1647
|
1 |
|
start2 = -1 |
1648
|
1 |
|
end2 = -1 |
1649
|
|
|
|
1650
|
1 |
|
if zeta0 == 0: |
1651
|
1 |
|
if char and ((j == 0) or (dest[j - 1] != char)): |
1652
|
|
|
# delete multiple letters only |
1653
|
1 |
|
dest = dest[0:j] + char + dest[min(j + 1, term_length) :] |
1654
|
1 |
|
j += 1 |
1655
|
|
|
|
1656
|
1 |
|
i += 1 |
1657
|
1 |
|
zeta = 0 |
1658
|
|
|
|
1659
|
1 |
|
dest = dest[0:j] |
1660
|
|
|
|
1661
|
1 |
|
return dest |
1662
|
|
|
|
1663
|
1 |
|
_initialize_phonet(lang) |
1664
|
|
|
|
1665
|
1 |
|
word = unicode_normalize('NFKC', text_type(word)) |
1666
|
1 |
|
return _phonet(word, mode, lang) |
1667
|
|
|
|
1668
|
|
|
|
1669
|
|
|
if __name__ == '__main__': |
1670
|
|
|
import doctest |
1671
|
|
|
|
1672
|
|
|
doctest.testmod() |
1673
|
|
|
|