abydos.phonetic._phonet.Phonet.encode() - Code Metrics - Inspection of "Merge pull request #149 from chrislit/0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( f43547...71985b )

by Chris

created 2018-11-17 08:52 UTC

abydos.phonetic._phonet.Phonet.encode() F

↳ Parent: abydos.phonetic._phonet

Complexity

Conditions

142

Size

Total Lines	648
Code Lines	381

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	269
CRAP Score	142

Importance

Changes

Metric	Value
cc	142
eloc	381
nop	4
dl	0
loc	648
ccs	269
cts	269
cp	1
crap	142
rs	0
c	0
b	0
f	0

How to fix Long Method Complexity

1		# -- coding: utf-8 --
		0 ignored issues – show coding-style introduced 2018-11-14 09:24 UTC by Report Bug Copy Issue Report Too many lines in module (1780/1000) Loading history...
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._phonet.
20
21		phonet algorithm (a.k.a. Hannoveraner Phonetik), intended chiefly for German
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from collections import Counter
32	1	from unicodedata import normalize as unicode_normalize
33
34	1	from six import text_type
35	1	from six.moves import range
36
37	1	from ._phonetic import _Phonetic
38
39	1	__all__ = ['Phonet', 'phonet']
40
41
42	1	class Phonet(_Phonetic):
		0 ignored issues – show Unused Code introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
43		"""Phonet code.
44
45		phonet ("Hannoveraner Phonetik") was developed by Jörg Michael and
46		documented in :cite:`Michael:1999`.
47
48		This is a port of Jesper Zedlitz's code, which is licensed LGPL
49		:cite:`Zedlitz:2015`.
50
51		That is, in turn, based on Michael's C code, which is also licensed LGPL
52		:cite:`Michael:2007`.
53		"""
54
55	1	_rules_no_lang = ( # separator chars
56		# fmt: off
57		'´', ' ', ' ',
58		'"', ' ', ' ',
59		'`$', '', '',
60		'\'', ' ', ' ',
61		',', ',', ',',
62		';', ',', ',',
63		'-', ' ', ' ',
64		' ', ' ', ' ',
65		'.', '.', '.',
66		':', '.', '.',
67		# German umlauts
68		'Ä', 'AE', 'AE',
69		'Ö', 'OE', 'OE',
70		'Ü', 'UE', 'UE',
71		'ß', 'S', 'S',
72		# international umlauts
73		'À', 'A', 'A',
74		'Á', 'A', 'A',
75		'Â', 'A', 'A',
76		'Ã', 'A', 'A',
77		'Å', 'A', 'A',
78		'Æ', 'AE', 'AE',
79		'Ç', 'C', 'C',
80		'Ð', 'DJ', 'DJ',
81		'È', 'E', 'E',
82		'É', 'E', 'E',
83		'Ê', 'E', 'E',
84		'Ë', 'E', 'E',
85		'Ì', 'I', 'I',
86		'Í', 'I', 'I',
87		'Î', 'I', 'I',
88		'Ï', 'I', 'I',
89		'Ñ', 'NH', 'NH',
90		'Ò', 'O', 'O',
91		'Ó', 'O', 'O',
92		'Ô', 'O', 'O',
93		'Õ', 'O', 'O',
94		'Œ', 'OE', 'OE',
95		'Ø', 'OE', 'OE',
96		'Š', 'SH', 'SH',
97		'Þ', 'TH', 'TH',
98		'Ù', 'U', 'U',
99		'Ú', 'U', 'U',
100		'Û', 'U', 'U',
101		'Ý', 'Y', 'Y',
102		'Ÿ', 'Y', 'Y',
103		# 'normal' letters (A-Z)
104		'MC^', 'MAC', 'MAC',
105		'MC^', 'MAC', 'MAC',
106		'M´^', 'MAC', 'MAC',
107		'M\'^', 'MAC', 'MAC',
108		'O´^', 'O', 'O',
109		'O\'^', 'O', 'O',
110		'VAN DEN ^', 'VANDEN', 'VANDEN',
111		None, None, None
112		# fmt: on
113		)
114
115	1	_rules_german = ( # separator chars
116		# fmt: off
117		'´', ' ', ' ',
118		'"', ' ', ' ',
119		'`$', '', '',
120		'\'', ' ', ' ',
121		',', ' ', ' ',
122		';', ' ', ' ',
123		'-', ' ', ' ',
124		' ', ' ', ' ',
125		'.', '.', '.',
126		':', '.', '.',
127		# German umlauts
128		'ÄE', 'E', 'E',
129		'ÄU<', 'EU', 'EU',
130		'ÄV(AEOU)-<', 'EW', None,
131		'Ä$', 'Ä', None,
132		'Ä<', None, 'E',
133		'Ä', 'E', None,
134		'ÖE', 'Ö', 'Ö',
135		'ÖU', 'Ö', 'Ö',
136		'ÖVER--<', 'ÖW', None,
137		'ÖV(AOU)-', 'ÖW', None,
138		'ÜBEL(GNRW)-^^', 'ÜBL ', 'IBL ',
139		'ÜBER^^', 'ÜBA', 'IBA',
140		'ÜE', 'Ü', 'I',
141		'ÜVER--<', 'ÜW', None,
142		'ÜV(AOU)-', 'ÜW', None,
143		'Ü', None, 'I',
144		'ßCH<', None, 'Z',
145		'ß<', 'S', 'Z',
146		# international umlauts
147		'À<', 'A', 'A',
148		'Á<', 'A', 'A',
149		'Â<', 'A', 'A',
150		'Ã<', 'A', 'A',
151		'Å<', 'A', 'A',
152		'ÆER-', 'E', 'E',
153		'ÆU<', 'EU', 'EU',
154		'ÆV(AEOU)-<', 'EW', None,
155		'Æ$', 'Ä', None,
156		'Æ<', None, 'E',
157		'Æ', 'E', None,
158		'Ç', 'Z', 'Z',
159		'ÐÐ-', '', '',
160		'Ð', 'DI', 'TI',
161		'È<', 'E', 'E',
162		'É<', 'E', 'E',
163		'Ê<', 'E', 'E',
164		'Ë', 'E', 'E',
165		'Ì<', 'I', 'I',
166		'Í<', 'I', 'I',
167		'Î<', 'I', 'I',
168		'Ï', 'I', 'I',
169		'ÑÑ-', '', '',
170		'Ñ', 'NI', 'NI',
171		'Ò<', 'O', 'U',
172		'Ó<', 'O', 'U',
173		'Ô<', 'O', 'U',
174		'Õ<', 'O', 'U',
175		'Œ<', 'Ö', 'Ö',
176		'Ø(IJY)-<', 'E', 'E',
177		'Ø<', 'Ö', 'Ö',
178		'Š', 'SH', 'Z',
179		'Þ', 'T', 'T',
180		'Ù<', 'U', 'U',
181		'Ú<', 'U', 'U',
182		'Û<', 'U', 'U',
183		'Ý<', 'I', 'I',
184		'Ÿ<', 'I', 'I',
185		# 'normal' letters (A-Z)
186		'ABELLE$', 'ABL', 'ABL',
187		'ABELL$', 'ABL', 'ABL',
188		'ABIENNE$', 'ABIN', 'ABIN',
189		'ACHME---^', 'ACH', 'AK',
190		'ACEY$', 'AZI', 'AZI',
191		'ADV', 'ATW', None,
192		'AEGL-', 'EK', None,
193		'AEU<', 'EU', 'EU',
194		'AE2', 'E', 'E',
195		'AFTRAUBEN------', 'AFT ', 'AFT ',
196		'AGL-1', 'AK', None,
197		'AGNI-^', 'AKN', 'AKN',
198		'AGNIE-', 'ANI', 'ANI',
199		'AGN(AEOU)-$', 'ANI', 'ANI',
200		'AH(AIOÖUÜY)-', 'AH', None,
201		'AIA2', 'AIA', 'AIA',
202		'AIE$', 'E', 'E',
203		'AILL(EOU)-', 'ALI', 'ALI',
204		'AINE$', 'EN', 'EN',
205		'AIRE$', 'ER', 'ER',
206		'AIR-', 'E', 'E',
207		'AISE$', 'ES', 'EZ',
208		'AISSANCE$', 'ESANS', 'EZANZ',
209		'AISSE$', 'ES', 'EZ',
210		'AIX$', 'EX', 'EX',
211		'AJ(AÄEÈÉÊIOÖUÜ)--', 'A', 'A',
212		'AKTIE', 'AXIE', 'AXIE',
213		'AKTUEL', 'AKTUEL', None,
214		'ALOI^', 'ALOI', 'ALUI', # Don't merge these rules
215		'ALOY^', 'ALOI', 'ALUI', # needed by 'check_rules'
216		'AMATEU(RS)-', 'AMATÖ', 'ANATÖ',
217		'ANCH(OEI)-', 'ANSH', 'ANZ',
218		'ANDERGEGANG----', 'ANDA GE', 'ANTA KE',
219		'ANDERGEHE----', 'ANDA ', 'ANTA ',
220		'ANDERGESETZ----', 'ANDA GE', 'ANTA KE',
221		'ANDERGING----', 'ANDA ', 'ANTA ',
222		'ANDERSETZ(ET)-----', 'ANDA ', 'ANTA ',
223		'ANDERZUGEHE----', 'ANDA ZU ', 'ANTA ZU ',
224		'ANDERZUSETZE-----', 'ANDA ZU ', 'ANTA ZU ',
225		'ANER(BKO)---^^', 'AN', None,
226		'ANHAND---^$', 'AN H', 'AN ',
227		'ANH(AÄEIOÖUÜY)--^^', 'AN', None,
228		'ANIELLE$', 'ANIEL', 'ANIL',
229		'ANIEL', 'ANIEL', None,
230		'ANSTELLE----^$', 'AN ST', 'AN ZT',
231		'ANTI^^', 'ANTI', 'ANTI',
232		'ANVER^^', 'ANFA', 'ANFA',
233		'ATIA$', 'ATIA', 'ATIA',
234		'ATIA(NS)--', 'ATI', 'ATI',
235		'ATI(AÄOÖUÜ)-', 'AZI', 'AZI',
236		'AUAU--', '', '',
237		'AUERE$', 'AUERE', None,
238		'AUERE(NS)-$', 'AUERE', None,
239		'AUERE(AIOUY)--', 'AUER', None,
240		'AUER(AÄIOÖUÜY)-', 'AUER', None,
241		'AUER<', 'AUA', 'AUA',
242		'AUF^^', 'AUF', 'AUF',
243		'AULT$', 'O', 'U',
244		'AUR(BCDFGKLMNQSTVWZ)-', 'AUA', 'AUA',
245		'AUR$', 'AUA', 'AUA',
246		'AUSSE$', 'OS', 'UZ',
247		'AUS(ST)-^', 'AUS', 'AUS',
248		'AUS^^', 'AUS', 'AUS',
249		'AUTOFAHR----', 'AUTO ', 'AUTU ',
250		'AUTO^^', 'AUTO', 'AUTU',
251		'AUX(IY)-', 'AUX', 'AUX',
252		'AUX', 'O', 'U',
253		'AU', 'AU', 'AU',
254		'AVER--<', 'AW', None,
255		'AVIER$', 'AWIE', 'AFIE',
256		'AV(EÈÉÊI)-^', 'AW', None,
257		'AV(AOU)-', 'AW', None,
258		'AYRE$', 'EIRE', 'EIRE',
259		'AYRE(NS)-$', 'EIRE', 'EIRE',
260		'AYRE(AIOUY)--', 'EIR', 'EIR',
261		'AYR(AÄIOÖUÜY)-', 'EIR', 'EIR',
262		'AYR<', 'EIA', 'EIA',
263		'AYER--<', 'EI', 'EI',
264		'AY(AÄEIOÖUÜY)--', 'A', 'A',
265		'AË', 'E', 'E',
266		'A(IJY)<', 'EI', 'EI',
267		'BABY^$', 'BEBI', 'BEBI',
268		'BAB(IY)^', 'BEBI', 'BEBI',
269		'BEAU^$', 'BO', None,
270		'BEA(BCMNRU)-^', 'BEA', 'BEA',
271		'BEAT(AEIMORU)-^', 'BEAT', 'BEAT',
272		'BEE$', 'BI', 'BI',
273		'BEIGE^$', 'BESH', 'BEZ',
274		'BENOIT--', 'BENO', 'BENU',
275		'BER(DT)-', 'BER', None,
276		'BERN(DT)-', 'BERN', None,
277		'BE(LMNRST)-^', 'BE', 'BE',
278		'BETTE$', 'BET', 'BET',
279		'BEVOR^$', 'BEFOR', None,
280		'BIC$', 'BIZ', 'BIZ',
281		'BOWL(EI)-', 'BOL', 'BUL',
282		'BP(AÄEÈÉÊIÌÍÎOÖRUÜY)-', 'B', 'B',
283		'BRINGEND-----^', 'BRI', 'BRI',
284		'BRINGEND-----', ' BRI', ' BRI',
285		'BROW(NS)-', 'BRAU', 'BRAU',
286		'BUDGET7', 'BÜGE', 'BIKE',
287		'BUFFET7', 'BÜFE', 'BIFE',
288		'BYLLE$', 'BILE', 'BILE',
289		'BYLL$', 'BIL', 'BIL',
290		'BYPA--^', 'BEI', 'BEI',
291		'BYTE<', 'BEIT', 'BEIT',
292		'BY9^', 'BÜ', None,
293		'B(SßZ)$', 'BS', None,
294		'CACH(EI)-^', 'KESH', 'KEZ',
295		'CAE--', 'Z', 'Z',
296		'CA(IY)$', 'ZEI', 'ZEI',
297		'CE(EIJUY)--', 'Z', 'Z',
298		'CENT<', 'ZENT', 'ZENT',
299		'CERST(EI)----^', 'KE', 'KE',
300		'CER$', 'ZA', 'ZA',
301		'CE3', 'ZE', 'ZE',
302		'CH\'S$', 'X', 'X',
303		'CH´S$', 'X', 'X',
304		'CHAO(ST)-', 'KAO', 'KAU',
305		'CHAMPIO-^', 'SHEMPI', 'ZENBI',
306		'CHAR(AI)-^', 'KAR', 'KAR',
307		'CHAU(CDFSVWXZ)-', 'SHO', 'ZU',
308		'CHÄ(CF)-', 'SHE', 'ZE',
309		'CHE(CF)-', 'SHE', 'ZE',
310		'CHEM-^', 'KE', 'KE', # or: 'CHE', 'KE'
311		'CHEQUE<', 'SHEK', 'ZEK',
312		'CHI(CFGPVW)-', 'SHI', 'ZI',
313		'CH(AEUY)-<^', 'SH', 'Z',
314		'CHK-', '', '',
315		'CHO(CKPS)-^', 'SHO', 'ZU',
316		'CHRIS-', 'KRI', None,
317		'CHRO-', 'KR', None,
318		'CH(LOR)-<^', 'K', 'K',
319		'CHST-', 'X', 'X',
320		'CH(SßXZ)3', 'X', 'X',
321		'CHTNI-3', 'CHN', 'KN',
322		'CH^', 'K', 'K', # or: 'CH', 'K'
323		'CH', 'CH', 'K',
324		'CIC$', 'ZIZ', 'ZIZ',
325		'CIENCEFICT----', 'EIENS ', 'EIENZ ',
326		'CIENCE$', 'EIENS', 'EIENZ',
327		'CIER$', 'ZIE', 'ZIE',
328		'CYB-^', 'ZEI', 'ZEI',
329		'CY9^', 'ZÜ', 'ZI',
330		'C(IJY)-<3', 'Z', 'Z',
331		'CLOWN-', 'KLAU', 'KLAU',
332		'CCH', 'Z', 'Z',
333		'CCE-', 'X', 'X',
334		'C(CK)-', '', '',
335		'CLAUDET---', 'KLO', 'KLU',
336		'CLAUDINE^$', 'KLODIN', 'KLUTIN',
337		'COACH', 'KOSH', 'KUZ',
338		'COLE$', 'KOL', 'KUL',
339		'COUCH', 'KAUSH', 'KAUZ',
340		'COW', 'KAU', 'KAU',
341		'CQUES$', 'K', 'K',
342		'CQUE', 'K', 'K',
343		'CRASH--9', 'KRE', 'KRE',
344		'CREAT-^', 'KREA', 'KREA',
345		'CST', 'XT', 'XT',
346		'CS<^', 'Z', 'Z',
347		'C(SßX)', 'X', 'X',
348		'CT\'S$', 'X', 'X',
349		'CT(SßXZ)', 'X', 'X',
350		'CZ<', 'Z', 'Z',
351		'C(ÈÉÊÌÍÎÝ)3', 'Z', 'Z',
352		'C.^', 'C.', 'C.',
353		'CÄ-', 'Z', 'Z',
354		'CÜ$', 'ZÜ', 'ZI',
355		'C\'S$', 'X', 'X',
356		'C<', 'K', 'K',
357		'DAHER^$', 'DAHER', None,
358		'DARAUFFOLGE-----', 'DARAUF ', 'TARAUF ',
359		'DAVO(NR)-^$', 'DAFO', 'TAFU',
360		'DD(SZ)--<', '', '',
361		'DD9', 'D', None,
362		'DEPOT7', 'DEPO', 'TEBU',
363		'DESIGN', 'DISEIN', 'TIZEIN',
364		'DE(LMNRST)-3^', 'DE', 'TE',
365		'DETTE$', 'DET', 'TET',
366		'DH$', 'T', None,
367		'DIC$', 'DIZ', 'TIZ',
368		'DIDR-^', 'DIT', None,
369		'DIEDR-^', 'DIT', None,
370		'DJ(AEIOU)-^', 'I', 'I',
371		'DMITR-^', 'DIMIT', 'TINIT',
372		'DRY9^', 'DRÜ', None,
373		'DT-', '', '',
374		'DUIS-^', 'DÜ', 'TI',
375		'DURCH^^', 'DURCH', 'TURK',
376		'DVA$', 'TWA', None,
377		'DY9^', 'DÜ', None,
378		'DYS$', 'DIS', None,
379		'DS(CH)--<', 'T', 'T',
380		'DST', 'ZT', 'ZT',
381		'DZS(CH)--', 'T', 'T',
382		'D(SßZ)', 'Z', 'Z',
383		'D(AÄEIOÖRUÜY)-', 'D', None,
384		'D(ÀÁÂÃÅÈÉÊÌÍÎÙÚÛ)-', 'D', None,
385		'D\'H^', 'D', 'T',
386		'D´H^', 'D', 'T',
387		'D`H^', 'D', 'T',
388		'D\'S3$', 'Z', 'Z',
389		'D´S3$', 'Z', 'Z',
390		'D^', 'D', None,
391		'D', 'T', 'T',
392		'EAULT$', 'O', 'U',
393		'EAUX$', 'O', 'U',
394		'EAU', 'O', 'U',
395		'EAV', 'IW', 'IF',
396		'EAS3$', 'EAS', None,
397		'EA(AÄEIOÖÜY)-3', 'EA', 'EA',
398		'EA3$', 'EA', 'EA',
399		'EA3', 'I', 'I',
400		'EBENSO^$', 'EBNSO', 'EBNZU',
401		'EBENSO^^', 'EBNSO ', 'EBNZU ',
402		'EBEN^^', 'EBN', 'EBN',
403		'EE9', 'E', 'E',
404		'EGL-1', 'EK', None,
405		'EHE(IUY)--1', 'EH', None,
406		'EHUNG---1', 'E', None,
407		'EH(AÄIOÖUÜY)-1', 'EH', None,
408		'EIEI--', '', '',
409		'EIERE^$', 'EIERE', None,
410		'EIERE$', 'EIERE', None,
411		'EIERE(NS)-$', 'EIERE', None,
412		'EIERE(AIOUY)--', 'EIER', None,
413		'EIER(AÄIOÖUÜY)-', 'EIER', None,
414		'EIER<', 'EIA', None,
415		'EIGL-1', 'EIK', None,
416		'EIGH$', 'EI', 'EI',
417		'EIH--', 'E', 'E',
418		'EILLE$', 'EI', 'EI',
419		'EIR(BCDFGKLMNQSTVWZ)-', 'EIA', 'EIA',
420		'EIR$', 'EIA', 'EIA',
421		'EITRAUBEN------', 'EIT ', 'EIT ',
422		'EI', 'EI', 'EI',
423		'EJ$', 'EI', 'EI',
424		'ELIZ^', 'ELIS', None,
425		'ELZ^', 'ELS', None,
426		'EL-^', 'E', 'E',
427		'ELANG----1', 'E', 'E',
428		'EL(DKL)--1', 'E', 'E',
429		'EL(MNT)--1$', 'E', 'E',
430		'ELYNE$', 'ELINE', 'ELINE',
431		'ELYN$', 'ELIN', 'ELIN',
432		'EL(AÄEÈÉÊIÌÍÎOÖUÜY)-1', 'EL', 'EL',
433		'EL-1', 'L', 'L',
434		'EM-^', None, 'E',
435		'EM(DFKMPQT)--1', None, 'E',
436		'EM(AÄEÈÉÊIÌÍÎOÖUÜY)--1', None, 'E',
437		'EM-1', None, 'N',
438		'ENGAG-^', 'ANGA', 'ANKA',
439		'EN-^', 'E', 'E',
440		'ENTUEL', 'ENTUEL', None,
441		'EN(CDGKQSTZ)--1', 'E', 'E',
442		'EN(AÄEÈÉÊIÌÍÎNOÖUÜY)-1', 'EN', 'EN',
443		'EN-1', '', '',
444		'ERH(AÄEIOÖUÜ)-^', 'ERH', 'ER',
445		'ER-^', 'E', 'E',
446		'ERREGEND-----', ' ER', ' ER',
447		'ERT1$', 'AT', None,
448		'ER(DGLKMNRQTZß)-1', 'ER', None,
449		'ER(AÄEÈÉÊIÌÍÎOÖUÜY)-1', 'ER', 'A',
450		'ER1$', 'A', 'A',
451		'ER<1', 'A', 'A',
452		'ETAT7', 'ETA', 'ETA',
453		'ETI(AÄOÖÜU)-', 'EZI', 'EZI',
454		'EUERE$', 'EUERE', None,
455		'EUERE(NS)-$', 'EUERE', None,
456		'EUERE(AIOUY)--', 'EUER', None,
457		'EUER(AÄIOÖUÜY)-', 'EUER', None,
458		'EUER<', 'EUA', None,
459		'EUEU--', '', '',
460		'EUILLE$', 'Ö', 'Ö',
461		'EUR$', 'ÖR', 'ÖR',
462		'EUX', 'Ö', 'Ö',
463		'EUSZ$', 'EUS', None,
464		'EUTZ$', 'EUS', None,
465		'EUYS$', 'EUS', 'EUZ',
466		'EUZ$', 'EUS', None,
467		'EU', 'EU', 'EU',
468		'EVER--<1', 'EW', None,
469		'EV(ÄOÖUÜ)-1', 'EW', None,
470		'EYER<', 'EIA', 'EIA',
471		'EY<', 'EI', 'EI',
472		'FACETTE', 'FASET', 'FAZET',
473		'FANS--^$', 'FE', 'FE',
474		'FAN-^$', 'FE', 'FE',
475		'FAULT-', 'FOL', 'FUL',
476		'FEE(DL)-', 'FI', 'FI',
477		'FEHLER', 'FELA', 'FELA',
478		'FE(LMNRST)-3^', 'FE', 'FE',
479		'FOERDERN---^', 'FÖRD', 'FÖRT',
480		'FOERDERN---', ' FÖRD', ' FÖRT',
481		'FOND7', 'FON', 'FUN',
482		'FRAIN$', 'FRA', 'FRA',
483		'FRISEU(RS)-', 'FRISÖ', 'FRIZÖ',
484		'FY9^', 'FÜ', None,
485		'FÖRDERN---^', 'FÖRD', 'FÖRT',
486		'FÖRDERN---', ' FÖRD', ' FÖRT',
487		'GAGS^$', 'GEX', 'KEX',
488		'GAG^$', 'GEK', 'KEK',
489		'GD', 'KT', 'KT',
490		'GEGEN^^', 'GEGN', 'KEKN',
491		'GEGENGEKOM-----', 'GEGN ', 'KEKN ',
492		'GEGENGESET-----', 'GEGN ', 'KEKN ',
493		'GEGENKOMME-----', 'GEGN ', 'KEKN ',
494		'GEGENZUKOM---', 'GEGN ZU ', 'KEKN ZU ',
495		'GENDETWAS-----$', 'GENT ', 'KENT ',
496		'GENRE', 'IORE', 'IURE',
497		'GE(LMNRST)-3^', 'GE', 'KE',
498		'GER(DKT)-', 'GER', None,
499		'GETTE$', 'GET', 'KET',
500		'GGF.', 'GF.', None,
501		'GG-', '', '',
502		'GH', 'G', None,
503		'GI(AOU)-^', 'I', 'I',
504		'GION-3', 'KIO', 'KIU',
505		'G(CK)-', '', '',
506		'GJ(AEIOU)-^', 'I', 'I',
507		'GMBH^$', 'GMBH', 'GMBH',
508		'GNAC$', 'NIAK', 'NIAK',
509		'GNON$', 'NION', 'NIUN',
510		'GN$', 'N', 'N',
511		'GONCAL-^', 'GONZA', 'KUNZA',
512		'GRY9^', 'GRÜ', None,
513		'G(SßXZ)-<', 'K', 'K',
514		'GUCK-', 'KU', 'KU',
515		'GUISEP-^', 'IUSE', 'IUZE',
516		'GUI-^', 'G', 'K',
517		'GUTAUSSEH------^', 'GUT ', 'KUT ',
518		'GUTGEHEND------^', 'GUT ', 'KUT ',
519		'GY9^', 'GÜ', None,
520		'G(AÄEILOÖRUÜY)-', 'G', None,
521		'G(ÀÁÂÃÅÈÉÊÌÍÎÙÚÛ)-', 'G', None,
522		'G\'S$', 'X', 'X',
523		'G´S$', 'X', 'X',
524		'G^', 'G', None,
525		'G', 'K', 'K',
526		'HA(HIUY)--1', 'H', None,
527		'HANDVOL---^', 'HANT ', 'ANT ',
528		'HANNOVE-^', 'HANOF', None,
529		'HAVEN7$', 'HAFN', None,
530		'HEAD-', 'HE', 'E',
531		'HELIEGEN------', 'E ', 'E ',
532		'HESTEHEN------', 'E ', 'E ',
533		'HE(LMNRST)-3^', 'HE', 'E',
534		'HE(LMN)-1', 'E', 'E',
535		'HEUR1$', 'ÖR', 'ÖR',
536		'HE(HIUY)--1', 'H', None,
537		'HIH(AÄEIOÖUÜY)-1', 'IH', None,
538		'HLH(AÄEIOÖUÜY)-1', 'LH', None,
539		'HMH(AÄEIOÖUÜY)-1', 'MH', None,
540		'HNH(AÄEIOÖUÜY)-1', 'NH', None,
541		'HOBBY9^', 'HOBI', None,
542		'HOCHBEGAB-----^', 'HOCH ', 'UK ',
543		'HOCHTALEN-----^', 'HOCH ', 'UK ',
544		'HOCHZUFRI-----^', 'HOCH ', 'UK ',
545		'HO(HIY)--1', 'H', None,
546		'HRH(AÄEIOÖUÜY)-1', 'RH', None,
547		'HUH(AÄEIOÖUÜY)-1', 'UH', None,
548		'HUIS^^', 'HÜS', 'IZ',
549		'HUIS$', 'ÜS', 'IZ',
550		'HUI--1', 'H', None,
551		'HYGIEN^', 'HÜKIEN', None,
552		'HY9^', 'HÜ', None,
553		'HY(BDGMNPST)-', 'Ü', None,
554		'H.^', None, 'H.',
555		'HÄU--1', 'H', None,
556		'H^', 'H', '',
557		'H', '', '',
558		'ICHELL---', 'ISH', 'IZ',
559		'ICHI$', 'ISHI', 'IZI',
560		'IEC$', 'IZ', 'IZ',
561		'IEDENSTELLE------', 'IDN ', 'ITN ',
562		'IEI-3', '', '',
563		'IELL3', 'IEL', 'IEL',
564		'IENNE$', 'IN', 'IN',
565		'IERRE$', 'IER', 'IER',
566		'IERZULAN---', 'IR ZU ', 'IR ZU ',
567		'IETTE$', 'IT', 'IT',
568		'IEU', 'IÖ', 'IÖ',
569		'IE<4', 'I', 'I',
570		'IGL-1', 'IK', None,
571		'IGHT3$', 'EIT', 'EIT',
572		'IGNI(EO)-', 'INI', 'INI',
573		'IGN(AEOU)-$', 'INI', 'INI',
574		'IHER(DGLKRT)--1', 'IHE', None,
575		'IHE(IUY)--', 'IH', None,
576		'IH(AIOÖUÜY)-', 'IH', None,
577		'IJ(AOU)-', 'I', 'I',
578		'IJ$', 'I', 'I',
579		'IJ<', 'EI', 'EI',
580		'IKOLE$', 'IKOL', 'IKUL',
581		'ILLAN(STZ)--4', 'ILIA', 'ILIA',
582		'ILLAR(DT)--4', 'ILIA', 'ILIA',
583		'IMSTAN----^', 'IM ', 'IN ',
584		'INDELERREGE------', 'INDL ', 'INTL ',
585		'INFRAGE-----^$', 'IN ', 'IN ',
586		'INTERN(AOU)-^', 'INTAN', 'INTAN',
587		'INVER-', 'INWE', 'INFE',
588		'ITI(AÄIOÖUÜ)-', 'IZI', 'IZI',
589		'IUSZ$', 'IUS', None,
590		'IUTZ$', 'IUS', None,
591		'IUZ$', 'IUS', None,
592		'IVER--<', 'IW', None,
593		'IVIER$', 'IWIE', 'IFIE',
594		'IV(ÄOÖUÜ)-', 'IW', None,
595		'IV<3', 'IW', None,
596		'IY2', 'I', None,
597		'I(ÈÉÊ)<4', 'I', 'I',
598		'JAVIE---<^', 'ZA', 'ZA',
599		'JEANS^$', 'JINS', 'INZ',
600		'JEANNE^$', 'IAN', 'IAN',
601		'JEAN-^', 'IA', 'IA',
602		'JER-^', 'IE', 'IE',
603		'JE(LMNST)-', 'IE', 'IE',
604		'JI^', 'JI', None,
605		'JOR(GK)^$', 'IÖRK', 'IÖRK',
606		'J', 'I', 'I',
607		'KC(ÄEIJ)-', 'X', 'X',
608		'KD', 'KT', None,
609		'KE(LMNRST)-3^', 'KE', 'KE',
610		'KG(AÄEILOÖRUÜY)-', 'K', None,
611		'KH<^', 'K', 'K',
612		'KIC$', 'KIZ', 'KIZ',
613		'KLE(LMNRST)-3^', 'KLE', 'KLE',
614		'KOTELE-^', 'KOTL', 'KUTL',
615		'KREAT-^', 'KREA', 'KREA',
616		'KRÜS(TZ)--^', 'KRI', None,
617		'KRYS(TZ)--^', 'KRI', None,
618		'KRY9^', 'KRÜ', None,
619		'KSCH---', 'K', 'K',
620		'KSH--', 'K', 'K',
621		'K(SßXZ)7', 'X', 'X', # implies 'KST' -> 'XT'
622		'KT\'S$', 'X', 'X',
623		'KTI(AIOU)-3', 'XI', 'XI',
624		'KT(SßXZ)', 'X', 'X',
625		'KY9^', 'KÜ', None,
626		'K\'S$', 'X', 'X',
627		'K´S$', 'X', 'X',
628		'LANGES$', ' LANGES', ' LANKEZ',
629		'LANGE$', ' LANGE', ' LANKE',
630		'LANG$', ' LANK', ' LANK',
631		'LARVE-', 'LARF', 'LARF',
632		'LD(SßZ)$', 'LS', 'LZ',
633		'LD\'S$', 'LS', 'LZ',
634		'LD´S$', 'LS', 'LZ',
635		'LEAND-^', 'LEAN', 'LEAN',
636		'LEERSTEHE-----^', 'LER ', 'LER ',
637		'LEICHBLEIB-----', 'LEICH ', 'LEIK ',
638		'LEICHLAUTE-----', 'LEICH ', 'LEIK ',
639		'LEIDERREGE------', 'LEIT ', 'LEIT ',
640		'LEIDGEPR----^', 'LEIT ', 'LEIT ',
641		'LEINSTEHE-----', 'LEIN ', 'LEIN ',
642		'LEL-', 'LE', 'LE',
643		'LE(MNRST)-3^', 'LE', 'LE',
644		'LETTE$', 'LET', 'LET',
645		'LFGNAG-', 'LFGAN', 'LFKAN',
646		'LICHERWEIS----', 'LICHA ', 'LIKA ',
647		'LIC$', 'LIZ', 'LIZ',
648		'LIVE^$', 'LEIF', 'LEIF',
649		'LT(SßZ)$', 'LS', 'LZ',
650		'LT\'S$', 'LS', 'LZ',
651		'LT´S$', 'LS', 'LZ',
652		'LUI(GS)--', 'LU', 'LU',
653		'LV(AIO)-', 'LW', None,
654		'LY9^', 'LÜ', None,
655		'LSTS$', 'LS', 'LZ',
656		'LZ(BDFGKLMNPQRSTVWX)-', 'LS', None,
657		'L(SßZ)$', 'LS', None,
658		'MAIR-<', 'MEI', 'NEI',
659		'MANAG-', 'MENE', 'NENE',
660		'MANUEL', 'MANUEL', None,
661		'MASSEU(RS)-', 'MASÖ', 'NAZÖ',
662		'MATCH', 'MESH', 'NEZ',
663		'MAURICE', 'MORIS', 'NURIZ',
664		'MBH^$', 'MBH', 'MBH',
665		'MB(ßZ)$', 'MS', None,
666		'MB(SßTZ)-', 'M', 'N',
667		'MCG9^', 'MAK', 'NAK',
668		'MC9^', 'MAK', 'NAK',
669		'MEMOIR-^', 'MEMOA', 'NENUA',
670		'MERHAVEN$', 'MAHAFN', None,
671		'ME(LMNRST)-3^', 'ME', 'NE',
672		'MEN(STZ)--3', 'ME', None,
673		'MEN$', 'MEN', None,
674		'MIGUEL-', 'MIGE', 'NIKE',
675		'MIKE^$', 'MEIK', 'NEIK',
676		'MITHILFE----^$', 'MIT H', 'NIT ',
677		'MN$', 'M', None,
678		'MN', 'N', 'N',
679		'MPJUTE-', 'MPUT', 'NBUT',
680		'MP(ßZ)$', 'MS', None,
681		'MP(SßTZ)-', 'M', 'N',
682		'MP(BDJLMNPQVW)-', 'MB', 'NB',
683		'MY9^', 'MÜ', None,
684		'M(ßZ)$', 'MS', None,
685		'M´G7^', 'MAK', 'NAK',
686		'M\'G7^', 'MAK', 'NAK',
687		'M´^', 'MAK', 'NAK',
688		'M\'^', 'MAK', 'NAK',
689		'M', None, 'N',
690		'NACH^^', 'NACH', 'NAK',
691		'NADINE', 'NADIN', 'NATIN',
692		'NAIV--', 'NA', 'NA',
693		'NAISE$', 'NESE', 'NEZE',
694		'NAUGENOMM------', 'NAU ', 'NAU ',
695		'NAUSOGUT$', 'NAUSO GUT', 'NAUZU KUT',
696		'NCH$', 'NSH', 'NZ',
697		'NCOISE$', 'SOA', 'ZUA',
698		'NCOIS$', 'SOA', 'ZUA',
699		'NDAR$', 'NDA', 'NTA',
700		'NDERINGEN------', 'NDE ', 'NTE ',
701		'NDRO(CDKTZ)-', 'NTRO', None,
702		'ND(BFGJLMNPQVW)-', 'NT', None,
703		'ND(SßZ)$', 'NS', 'NZ',
704		'ND\'S$', 'NS', 'NZ',
705		'ND´S$', 'NS', 'NZ',
706		'NEBEN^^', 'NEBN', 'NEBN',
707		'NENGELERN------', 'NEN ', 'NEN ',
708		'NENLERN(ET)---', 'NEN LE', 'NEN LE',
709		'NENZULERNE---', 'NEN ZU LE', 'NEN ZU LE',
710		'NE(LMNRST)-3^', 'NE', 'NE',
711		'NEN-3', 'NE', 'NE',
712		'NETTE$', 'NET', 'NET',
713		'NGU^^', 'NU', 'NU',
714		'NG(BDFJLMNPQRTVW)-', 'NK', 'NK',
715		'NH(AUO)-$', 'NI', 'NI',
716		'NICHTSAHNEN-----', 'NIX ', 'NIX ',
717		'NICHTSSAGE----', 'NIX ', 'NIX ',
718		'NICHTS^^', 'NIX', 'NIX',
719		'NICHT^^', 'NICHT', 'NIKT',
720		'NINE$', 'NIN', 'NIN',
721		'NON^^', 'NON', 'NUN',
722		'NOTLEIDE-----^', 'NOT ', 'NUT ',
723		'NOT^^', 'NOT', 'NUT',
724		'NTI(AIOU)-3', 'NZI', 'NZI',
725		'NTIEL--3', 'NZI', 'NZI',
726		'NT(SßZ)$', 'NS', 'NZ',
727		'NT\'S$', 'NS', 'NZ',
728		'NT´S$', 'NS', 'NZ',
729		'NYLON', 'NEILON', 'NEILUN',
730		'NY9^', 'NÜ', None,
731		'NSTZUNEH---', 'NST ZU ', 'NZT ZU ',
732		'NSZ-', 'NS', None,
733		'NSTS$', 'NS', 'NZ',
734		'NZ(BDFGKLMNPQRSTVWX)-', 'NS', None,
735		'N(SßZ)$', 'NS', None,
736		'OBERE-', 'OBER', None,
737		'OBER^^', 'OBA', 'UBA',
738		'OEU2', 'Ö', 'Ö',
739		'OE<2', 'Ö', 'Ö',
740		'OGL-', 'OK', None,
741		'OGNIE-', 'ONI', 'UNI',
742		'OGN(AEOU)-$', 'ONI', 'UNI',
743		'OH(AIOÖUÜY)-', 'OH', None,
744		'OIE$', 'Ö', 'Ö',
745		'OIRE$', 'OA', 'UA',
746		'OIR$', 'OA', 'UA',
747		'OIX', 'OA', 'UA',
748		'OI<3', 'EU', 'EU',
749		'OKAY^$', 'OKE', 'UKE',
750		'OLYN$', 'OLIN', 'ULIN',
751		'OO(DLMZ)-', 'U', None,
752		'OO$', 'U', None,
753		'OO-', '', '',
754		'ORGINAL-----', 'ORI', 'URI',
755		'OTI(AÄOÖUÜ)-', 'OZI', 'UZI',
756		'OUI^', 'WI', 'FI',
757		'OUILLE$', 'ULIE', 'ULIE',
758		'OU(DT)-^', 'AU', 'AU',
759		'OUSE$', 'AUS', 'AUZ',
760		'OUT-', 'AU', 'AU',
761		'OU', 'U', 'U',
762		'O(FV)$', 'AU', 'AU', # due to 'OW$' -> 'AU'
763		'OVER--<', 'OW', None,
764		'OV(AOU)-', 'OW', None,
765		'OW$', 'AU', 'AU',
766		'OWS$', 'OS', 'UZ',
767		'OJ(AÄEIOÖUÜ)--', 'O', 'U',
768		'OYER', 'OIA', None,
769		'OY(AÄEIOÖUÜ)--', 'O', 'U',
770		'O(JY)<', 'EU', 'EU',
771		'OZ$', 'OS', None,
772		'O´^', 'O', 'U',
773		'O\'^', 'O', 'U',
774		'O', None, 'U',
775		'PATIEN--^', 'PAZI', 'PAZI',
776		'PENSIO-^', 'PANSI', 'PANZI',
777		'PE(LMNRST)-3^', 'PE', 'PE',
778		'PFER-^', 'FE', 'FE',
779		'P(FH)<', 'F', 'F',
780		'PIC^$', 'PIK', 'PIK',
781		'PIC$', 'PIZ', 'PIZ',
782		'PIPELINE', 'PEIBLEIN', 'PEIBLEIN',
783		'POLYP-', 'POLÜ', None,
784		'POLY^^', 'POLI', 'PULI',
785		'PORTRAIT7', 'PORTRE', 'PURTRE',
786		'POWER7', 'PAUA', 'PAUA',
787		'PP(FH)--<', 'B', 'B',
788		'PP-', '', '',
789		'PRODUZ-^', 'PRODU', 'BRUTU',
790		'PRODUZI--', ' PRODU', ' BRUTU',
791		'PRIX^$', 'PRI', 'PRI',
792		'PS-^^', 'P', None,
793		'P(SßZ)^', None, 'Z',
794		'P(SßZ)$', 'BS', None,
795		'PT-^', '', '',
796		'PTI(AÄOÖUÜ)-3', 'BZI', 'BZI',
797		'PY9^', 'PÜ', None,
798		'P(AÄEIOÖRUÜY)-', 'P', 'P',
799		'P(ÀÁÂÃÅÈÉÊÌÍÎÙÚÛ)-', 'P', None,
800		'P.^', None, 'P.',
801		'P^', 'P', None,
802		'P', 'B', 'B',
803		'QI-', 'Z', 'Z',
804		'QUARANT--', 'KARA', 'KARA',
805		'QUE(LMNRST)-3', 'KWE', 'KFE',
806		'QUE$', 'K', 'K',
807		'QUI(NS)$', 'KI', 'KI',
808		'QUIZ7', 'KWIS', None,
809		'Q(UV)7', 'KW', 'KF',
810		'Q<', 'K', 'K',
811		'RADFAHR----', 'RAT ', 'RAT ',
812		'RAEFTEZEHRE-----', 'REFTE ', 'REFTE ',
813		'RCH', 'RCH', 'RK',
814		'REA(DU)---3^', 'R', None,
815		'REBSERZEUG------', 'REBS ', 'REBZ ',
816		'RECHERCH^', 'RESHASH', 'REZAZ',
817		'RECYCL--', 'RIZEI', 'RIZEI',
818		'RE(ALST)-3^', 'RE', None,
819		'REE$', 'RI', 'RI',
820		'RER$', 'RA', 'RA',
821		'RE(MNR)-4', 'RE', 'RE',
822		'RETTE$', 'RET', 'RET',
823		'REUZ$', 'REUZ', None,
824		'REW$', 'RU', 'RU',
825		'RH<^', 'R', 'R',
826		'RJA(MN)--', 'RI', 'RI',
827		'ROWD-^', 'RAU', 'RAU',
828		'RTEMONNAIE-', 'RTMON', 'RTNUN',
829		'RTI(AÄOÖUÜ)-3', 'RZI', 'RZI',
830		'RTIEL--3', 'RZI', 'RZI',
831		'RV(AEOU)-3', 'RW', None,
832		'RY(KN)-$', 'RI', 'RI',
833		'RY9^', 'RÜ', None,
834		'RÄFTEZEHRE-----', 'REFTE ', 'REFTE ',
835		'SAISO-^', 'SES', 'ZEZ',
836		'SAFE^$', 'SEIF', 'ZEIF',
837		'SAUCE-^', 'SOS', 'ZUZ',
838		'SCHLAGGEBEN-----<', 'SHLAK ', 'ZLAK ',
839		'SCHSCH---7', '', '',
840		'SCHTSCH', 'SH', 'Z',
841		'SC(HZ)<', 'SH', 'Z',
842		'SC', 'SK', 'ZK',
843		'SELBSTST--7^^', 'SELB', 'ZELB',
844		'SELBST7^^', 'SELBST', 'ZELBZT',
845		'SERVICE7^', 'SÖRWIS', 'ZÖRFIZ',
846		'SERVI-^', 'SERW', None,
847		'SE(LMNRST)-3^', 'SE', 'ZE',
848		'SETTE$', 'SET', 'ZET',
849		'SHP-^', 'S', 'Z',
850		'SHST', 'SHT', 'ZT',
851		'SHTSH', 'SH', 'Z',
852		'SHT', 'ST', 'Z',
853		'SHY9^', 'SHÜ', None,
854		'SH^^', 'SH', None,
855		'SH3', 'SH', 'Z',
856		'SICHERGEGAN-----^', 'SICHA ', 'ZIKA ',
857		'SICHERGEHE----^', 'SICHA ', 'ZIKA ',
858		'SICHERGESTEL------^', 'SICHA ', 'ZIKA ',
859		'SICHERSTELL-----^', 'SICHA ', 'ZIKA ',
860		'SICHERZU(GS)--^', 'SICHA ZU ', 'ZIKA ZU ',
861		'SIEGLI-^', 'SIKL', 'ZIKL',
862		'SIGLI-^', 'SIKL', 'ZIKL',
863		'SIGHT', 'SEIT', 'ZEIT',
864		'SIGN', 'SEIN', 'ZEIN',
865		'SKI(NPZ)-', 'SKI', 'ZKI',
866		'SKI<^', 'SHI', 'ZI',
867		'SODASS^$', 'SO DAS', 'ZU TAZ',
868		'SODAß^$', 'SO DAS', 'ZU TAZ',
869		'SOGENAN--^', 'SO GEN', 'ZU KEN',
870		'SOUND-', 'SAUN', 'ZAUN',
871		'STAATS^^', 'STAZ', 'ZTAZ',
872		'STADT^^', 'STAT', 'ZTAT',
873		'STANDE$', ' STANDE', ' ZTANTE',
874		'START^^', 'START', 'ZTART',
875		'STAURANT7', 'STORAN', 'ZTURAN',
876		'STEAK-', 'STE', 'ZTE',
877		'STEPHEN-^$', 'STEW', None,
878		'STERN', 'STERN', None,
879		'STRAF^^', 'STRAF', 'ZTRAF',
880		'ST\'S$', 'Z', 'Z',
881		'ST´S$', 'Z', 'Z',
882		'STST--', '', '',
883		'STS(ACEÈÉÊHIÌÍÎOUÄÜÖ)--', 'ST', 'ZT',
884		'ST(SZ)', 'Z', 'Z',
885		'SPAREN---^', 'SPA', 'ZPA',
886		'SPAREND----', ' SPA', ' ZPA',
887		'S(PTW)-^^', 'S', None,
888		'SP', 'SP', None,
889		'STYN(AE)-$', 'STIN', 'ZTIN',
890		'ST', 'ST', 'ZT',
891		'SUITE<', 'SIUT', 'ZIUT',
892		'SUKE--$', 'S', 'Z',
893		'SURF(EI)-', 'SÖRF', 'ZÖRF',
894		'SV(AEÈÉÊIÌÍÎOU)-<^', 'SW', None,
895		'SYB(IY)--^', 'SIB', None,
896		'SYL(KVW)--^', 'SI', None,
897		'SY9^', 'SÜ', None,
898		'SZE(NPT)-^', 'ZE', 'ZE',
899		'SZI(ELN)-^', 'ZI', 'ZI',
900		'SZCZ<', 'SH', 'Z',
901		'SZT<', 'ST', 'ZT',
902		'SZ<3', 'SH', 'Z',
903		'SÜL(KVW)--^', 'SI', None,
904		'S', None, 'Z',
905		'TCH', 'SH', 'Z',
906		'TD(AÄEIOÖRUÜY)-', 'T', None,
907		'TD(ÀÁÂÃÅÈÉÊËÌÍÎÏÒÓÔÕØÙÚÛÝŸ)-', 'T', None,
908		'TEAT-^', 'TEA', 'TEA',
909		'TERRAI7^', 'TERA', 'TERA',
910		'TE(LMNRST)-3^', 'TE', 'TE',
911		'TH<', 'T', 'T',
912		'TICHT-', 'TIK', 'TIK',
913		'TICH$', 'TIK', 'TIK',
914		'TIC$', 'TIZ', 'TIZ',
915		'TIGGESTELL-------', 'TIK ', 'TIK ',
916		'TIGSTELL-----', 'TIK ', 'TIK ',
917		'TOAS-^', 'TO', 'TU',
918		'TOILET-', 'TOLE', 'TULE',
919		'TOIN-', 'TOA', 'TUA',
920		'TRAECHTI-^', 'TRECHT', 'TREKT',
921		'TRAECHTIG--', ' TRECHT', ' TREKT',
922		'TRAINI-', 'TREN', 'TREN',
923		'TRÄCHTI-^', 'TRECHT', 'TREKT',
924		'TRÄCHTIG--', ' TRECHT', ' TREKT',
925		'TSCH', 'SH', 'Z',
926		'TSH', 'SH', 'Z',
927		'TST', 'ZT', 'ZT',
928		'T(Sß)', 'Z', 'Z',
929		'TT(SZ)--<', '', '',
930		'TT9', 'T', 'T',
931		'TV^$', 'TV', 'TV',
932		'TX(AEIOU)-3', 'SH', 'Z',
933		'TY9^', 'TÜ', None,
934		'TZ-', '', '',
935		'T\'S3$', 'Z', 'Z',
936		'T´S3$', 'Z', 'Z',
937		'UEBEL(GNRW)-^^', 'ÜBL ', 'IBL ',
938		'UEBER^^', 'ÜBA', 'IBA',
939		'UE2', 'Ü', 'I',
940		'UGL-', 'UK', None,
941		'UH(AOÖUÜY)-', 'UH', None,
942		'UIE$', 'Ü', 'I',
943		'UM^^', 'UM', 'UN',
944		'UNTERE--3', 'UNTE', 'UNTE',
945		'UNTER^^', 'UNTA', 'UNTA',
946		'UNVER^^', 'UNFA', 'UNFA',
947		'UN^^', 'UN', 'UN',
948		'UTI(AÄOÖUÜ)-', 'UZI', 'UZI',
949		'UVE-4', 'UW', None,
950		'UY2', 'UI', None,
951		'UZZ', 'AS', 'AZ',
952		'VACL-^', 'WAZ', 'FAZ',
953		'VAC$', 'WAZ', 'FAZ',
954		'VAN DEN ^', 'FANDN', 'FANTN',
955		'VANES-^', 'WANE', None,
956		'VATRO-', 'WATR', None,
957		'VA(DHJNT)--^', 'F', None,
958		'VEDD-^', 'FE', 'FE',
959		'VE(BEHIU)--^', 'F', None,
960		'VEL(BDLMNT)-^', 'FEL', None,
961		'VENTZ-^', 'FEN', None,
962		'VEN(NRSZ)-^', 'FEN', None,
963		'VER(AB)-^$', 'WER', None,
964		'VERBAL^$', 'WERBAL', None,
965		'VERBAL(EINS)-^', 'WERBAL', None,
966		'VERTEBR--', 'WERTE', None,
967		'VEREIN-----', 'F', None,
968		'VEREN(AEIOU)-^', 'WEREN', None,
969		'VERIFI', 'WERIFI', None,
970		'VERON(AEIOU)-^', 'WERON', None,
971		'VERSEN^', 'FERSN', 'FAZN',
972		'VERSIERT--^', 'WERSI', None,
973		'VERSIO--^', 'WERS', None,
974		'VERSUS', 'WERSUS', None,
975		'VERTI(GK)-', 'WERTI', None,
976		'VER^^', 'FER', 'FA',
977		'VERSPRECHE-------', ' FER', ' FA',
978		'VER$', 'WA', None,
979		'VER', 'FA', 'FA',
980		'VET(HT)-^', 'FET', 'FET',
981		'VETTE$', 'WET', 'FET',
982		'VE^', 'WE', None,
983		'VIC$', 'WIZ', 'FIZ',
984		'VIELSAGE----', 'FIL ', 'FIL ',
985		'VIEL', 'FIL', 'FIL',
986		'VIEW', 'WIU', 'FIU',
987		'VILL(AE)-', 'WIL', None,
988		'VIS(ACEIKUVWZ)-<^', 'WIS', None,
989		'VI(ELS)--^', 'F', None,
990		'VILLON--', 'WILI', 'FILI',
991		'VIZE^^', 'FIZE', 'FIZE',
992		'VLIE--^', 'FL', None,
993		'VL(AEIOU)--', 'W', None,
994		'VOKA-^', 'WOK', None,
995		'VOL(ATUVW)--^', 'WO', None,
996		'VOR^^', 'FOR', 'FUR',
997		'VR(AEIOU)--', 'W', None,
998		'VV9', 'W', None,
999		'VY9^', 'WÜ', 'FI',
1000		'V(ÜY)-', 'W', None,
1001		'V(ÀÁÂÃÅÈÉÊÌÍÎÙÚÛ)-', 'W', None,
1002		'V(AEIJLRU)-<', 'W', None,
1003		'V.^', 'V.', None,
1004		'V<', 'F', 'F',
1005		'WEITERENTWI-----^', 'WEITA ', 'FEITA ',
1006		'WEITREICH-----^', 'WEIT ', 'FEIT ',
1007		'WEITVER^', 'WEIT FER', 'FEIT FA',
1008		'WE(LMNRST)-3^', 'WE', 'FE',
1009		'WER(DST)-', 'WER', None,
1010		'WIC$', 'WIZ', 'FIZ',
1011		'WIEDERU--', 'WIDE', 'FITE',
1012		'WIEDER^$', 'WIDA', 'FITA',
1013		'WIEDER^^', 'WIDA ', 'FITA ',
1014		'WIEVIEL', 'WI FIL', 'FI FIL',
1015		'WISUEL', 'WISUEL', None,
1016		'WR-^', 'W', None,
1017		'WY9^', 'WÜ', 'FI',
1018		'W(BDFGJKLMNPQRSTZ)-', 'F', None,
1019		'W$', 'F', None,
1020		'W', None, 'F',
1021		'X<^', 'Z', 'Z',
1022		'XHAVEN$', 'XAFN', None,
1023		'X(CSZ)', 'X', 'X',
1024		'XTS(CH)--', 'XT', 'XT',
1025		'XT(SZ)', 'Z', 'Z',
1026		'YE(LMNRST)-3^', 'IE', 'IE',
1027		'YE-3', 'I', 'I',
1028		'YOR(GK)^$', 'IÖRK', 'IÖRK',
1029		'Y(AOU)-<7', 'I', 'I',
1030		'Y(BKLMNPRSTX)-1', 'Ü', None,
1031		'YVES^$', 'IF', 'IF',
1032		'YVONNE^$', 'IWON', 'IFUN',
1033		'Y.^', 'Y.', None,
1034		'Y', 'I', 'I',
1035		'ZC(AOU)-', 'SK', 'ZK',
1036		'ZE(LMNRST)-3^', 'ZE', 'ZE',
1037		'ZIEJ$', 'ZI', 'ZI',
1038		'ZIGERJA(HR)-3', 'ZIGA IA', 'ZIKA IA',
1039		'ZL(AEIOU)-', 'SL', None,
1040		'ZS(CHT)--', '', '',
1041		'ZS', 'SH', 'Z',
1042		'ZUERST', 'ZUERST', 'ZUERST',
1043		'ZUGRUNDE^$', 'ZU GRUNDE', 'ZU KRUNTE',
1044		'ZUGRUNDE', 'ZU GRUNDE ', 'ZU KRUNTE ',
1045		'ZUGUNSTEN', 'ZU GUNSTN', 'ZU KUNZTN',
1046		'ZUHAUSE-', 'ZU HAUS', 'ZU AUZ',
1047		'ZULASTEN^$', 'ZU LASTN', 'ZU LAZTN',
1048		'ZURUECK^^', 'ZURÜK', 'ZURIK',
1049		'ZURZEIT', 'ZUR ZEIT', 'ZUR ZEIT',
1050		'ZURÜCK^^', 'ZURÜK', 'ZURIK',
1051		'ZUSTANDE', 'ZU STANDE', 'ZU ZTANTE',
1052		'ZUTAGE', 'ZU TAGE', 'ZU TAKE',
1053		'ZUVER^^', 'ZUFA', 'ZUFA',
1054		'ZUVIEL', 'ZU FIL', 'ZU FIL',
1055		'ZUWENIG', 'ZU WENIK', 'ZU FENIK',
1056		'ZY9^', 'ZÜ', None,
1057		'ZYK3$', 'ZIK', None,
1058		'Z(VW)7^', 'SW', None,
1059		None, None, None
1060		# fmt: on
1061		)
1062
1063	1	_upper_trans = dict(
1064		zip(
1065		(
1066		ord(_)
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report The variable `_` does not seem to be defined. Loading history...
1067		for _ in 'abcdefghijklmnopqrstuvwxyzàáâãåäæ'
1068		+ 'çðèéêëìíîïñòóôõöøœšßþùúûüýÿ'
1069		),
1070		'ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÅÄÆ'
1071		+ 'ÇÐÈÉÊËÌÍÎÏÑÒÓÔÕÖØŒŠßÞÙÚÛÜÝŸ',
1072		)
1073		)
1074
1075	1	def encode(self, word, mode=1, lang='de'):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'encode' method Loading history...
1076		"""Return the phonet code for a word.
1077
1078		Parameters
1079		----------
1080		word : str
1081		The word to transform
1082		mode : int
1083		The ponet variant to employ (1 or 2)
1084		lang : str
1085		``de`` (default) for German, ``none`` for no language
1086
1087		Returns
1088		-------
1089		str
1090		The phonet value
1091
1092		Examples
1093		--------
1094		>>> pe = Phonet()
1095		>>> pe.encode('Christopher')
1096		'KRISTOFA'
1097		>>> pe.encode('Niall')
1098		'NIAL'
1099		>>> pe.encode('Smith')
1100		'SMIT'
1101		>>> pe.encode('Schmidt')
1102		'SHMIT'
1103
1104		>>> pe.encode('Christopher', mode=2)
1105		'KRIZTUFA'
1106		>>> pe.encode('Niall', mode=2)
1107		'NIAL'
1108		>>> pe.encode('Smith', mode=2)
1109		'ZNIT'
1110		>>> pe.encode('Schmidt', mode=2)
1111		'ZNIT'
1112
1113		>>> pe.encode('Christopher', lang='none')
1114		'CHRISTOPHER'
1115		>>> pe.encode('Niall', lang='none')
1116		'NIAL'
1117		>>> pe.encode('Smith', lang='none')
1118		'SMITH'
1119		>>> pe.encode('Schmidt', lang='none')
1120		'SCHMIDT'
1121
1122		"""
1123	1	phonet_hash = Counter()
1124	1	alpha_pos = Counter()
1125
1126	1	phonet_hash_1 = Counter()
1127	1	phonet_hash_2 = Counter()
1128
1129	1	def _initialize_phonet(lang):
1130		"""Initialize phonet variables.
1131
1132		Parameters
1133		----------
1134		lang : str
1135		Language to use for rules
1136
1137		"""
1138	1	if lang == 'none':
1139	1	_phonet_rules = self._rules_no_lang
1140		else:
1141	1	_phonet_rules = self._rules_german
1142
1143	1	phonet_hash[''] = -1
1144
1145		# German and international umlauts
1146	1	for j in {
1147		'À',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1148		'Á',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1149		'Â',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1150		'Ã',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1151		'Ä',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1152		'Å',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1153		'Æ',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1154		'Ç',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1155		'È',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1156		'É',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1157		'Ê',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1158		'Ë',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1159		'Ì',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1160		'Í',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1161		'Î',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1162		'Ï',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1163		'Ð',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1164		'Ñ',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1165		'Ò',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1166		'Ó',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1167		'Ô',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1168		'Õ',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1169		'Ö',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1170		'Ø',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1171		'Ù',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1172		'Ú',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1173		'Û',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1174		'Ü',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1175		'Ý',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1176		'Þ',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1177		'ß',
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1178		'Œ',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1179		'Š',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1180		'Ÿ',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1181		}:
1182	1	alpha_pos[j] = 1
1183	1	phonet_hash[j] = -1
1184
1185		# "normal" letters ('A'-'Z')
1186	1	for i, j in enumerate('ABCDEFGHIJKLMNOPQRSTUVWXYZ'):
1187	1	alpha_pos[j] = i + 2
1188	1	phonet_hash[j] = -1
1189
1190	1	for i in range(26):
1191	1	for j in range(28):
1192	1	phonet_hash_1[i, j] = -1
1193	1	phonet_hash_2[i, j] = -1
1194
1195		# for each phonetc rule
1196	1	for i in range(len(_phonet_rules)):
		0 ignored issues – show unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Consider using enumerate instead of iterating with range and len Loading history... unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Too many nested blocks (6/5) Loading history...
1197	1	rule = _phonet_rules[i]
1198
1199	1	if rule and i % 3 == 0:
1200		# calculate first hash value
1201	1	k = _phonet_rules[i][0]
1202
1203	1	if phonet_hash[k] < 0 and (
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report The variable `phonet_hash` does not seem to be defined. Loading history...
1204		_phonet_rules[i + 1] or _phonet_rules[i + 2]
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1205		):
1206	1	phonet_hash[k] = i
1207
1208		# calculate second hash values
1209	1	if k and alpha_pos[k] >= 2:
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report The variable `alpha_pos` does not seem to be defined. Loading history...
1210	1	k = alpha_pos[k]
1211
1212	1	j = k - 2
1213	1	rule = rule[1:]
1214
1215	1	if not rule:
1216	1	rule = ' '
1217	1	elif rule[0] == '(':
1218	1	rule = rule[1:]
1219		else:
1220	1	rule = rule[0]
1221
1222	1	while rule and (rule[0] != ')'):
1223	1	k = alpha_pos[rule[0]]
1224
1225	1	if k > 0:
1226		# add hash value for this letter
1227	1	if phonet_hash_1[j, k] < 0:
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report The variable `phonet_hash_1` does not seem to be defined. Loading history...
1228	1	phonet_hash_1[j, k] = i
1229	1	phonet_hash_2[j, k] = i
1230
1231	1	if phonet_hash_2[j, k] >= (i - 30):
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report The variable `phonet_hash_2` does not seem to be defined. Loading history...
1232	1	phonet_hash_2[j, k] = i
1233		else:
1234	1	k = -1
1235
1236	1	if k <= 0:
1237		# add hash value for all letters
1238	1	if phonet_hash_1[j, 0] < 0:
1239	1	phonet_hash_1[j, 0] = i
1240
1241	1	phonet_hash_2[j, 0] = i
1242
1243	1	rule = rule[1:]
1244
1245	1	def _phonet(term, mode, lang):
		0 ignored issues – show Comprehensibility introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report This function exceeds the maximum number of variables (29/15). Loading history...
1246		"""Return the phonet coded form of a term.
1247
1248		Parameters
1249		----------
1250		term : str
1251		Term to transform
1252		mode : int
1253		The ponet variant to employ (1 or 2)
1254		lang : str
1255		``de`` (default) for German, ``none`` for no language
1256
1257		Returns
1258		-------
1259		str
1260		The phonet value
1261
1262		"""
1263	1	if lang == 'none':
1264	1	_phonet_rules = self._rules_no_lang
1265		else:
1266	1	_phonet_rules = self._rules_german
1267
1268	1	char0 = ''
1269	1	dest = term
1270
1271	1	if not term:
1272	1	return ''
1273
1274	1	term_length = len(term)
1275
1276		# convert input string to upper-case
1277	1	src = term.translate(self._upper_trans)
1278
1279		# check "src"
1280	1	i = 0
1281	1	j = 0
1282	1	zeta = 0
1283
1284	1	while i < len(src):
		0 ignored issues – show unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Too many nested blocks (6/5) Loading history... unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Too many nested blocks (8/5) Loading history... unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Too many nested blocks (9/5) Loading history... unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Too many nested blocks (7/5) Loading history...
1285	1	char = src[i]
1286
1287	1	pos = alpha_pos[char]
1288
1289	1	if pos >= 2:
1290	1	xpos = pos - 2
1291
1292	1	if i + 1 == len(src):
1293	1	pos = alpha_pos['']
1294		else:
1295	1	pos = alpha_pos[src[i + 1]]
1296
1297	1	start1 = phonet_hash_1[xpos, pos]
1298	1	start2 = phonet_hash_1[xpos, 0]
1299	1	end1 = phonet_hash_2[xpos, pos]
1300	1	end2 = phonet_hash_2[xpos, 0]
1301
1302		# preserve rule priorities
1303	1	if (start2 >= 0) and ((start1 < 0) or (start2 < start1)):
1304	1	pos = start1
1305	1	start1 = start2
1306	1	start2 = pos
1307	1	pos = end1
1308	1	end1 = end2
1309	1	end2 = pos
1310
1311	1	if (end1 >= start2) and (start2 >= 0):
1312	1	if end2 > end1:
1313	1	end1 = end2
1314
1315	1	start2 = -1
1316	1	end2 = -1
1317		else:
1318	1	pos = phonet_hash[char]
1319	1	start1 = pos
1320	1	end1 = 10000
1321	1	start2 = -1
1322	1	end2 = -1
1323
1324	1	pos = start1
1325	1	zeta0 = 0
1326
1327	1	if pos >= 0:
1328		# check rules for this char
1329	1	while (_phonet_rules[pos] is None) or (
1330		_phonet_rules[pos][0] == char
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1331		):
1332	1	if pos > end1:
1333	1	if start2 > 0:
1334	1	pos = start2
1335	1	start1 = start2
1336	1	start2 = -1
1337	1	end1 = end2
1338	1	end2 = -1
1339	1	continue
1340
1341	1	break
1342
1343	1	if (_phonet_rules[pos] is None) or (
1344		_phonet_rules[pos + mode] is None
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1345		):
1346		# no conversion rule available
1347	1	pos += 3
1348	1	continue
1349
1350		# check whole string
1351	1	matches = 1 # number of matching letters
1352	1	priority = 5 # default priority
1353	1	rule = _phonet_rules[pos]
1354	1	rule = rule[1:]
1355
1356	1	while (
1357		rule
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1358		and (len(src) > (i + matches))
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1359		and (src[i + matches] == rule[0])
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1360		and not rule[0].isdigit()
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1361		and (rule not in '(-<^$')
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1362		):
1363	1	matches += 1
1364	1	rule = rule[1:]
1365
1366	1	if rule and (rule[0] == '('):
1367		# check an array of letters
1368	1	if (
1369		(len(src) > (i + matches))
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1370		and src[i + matches].isalpha()
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1371		and (src[i + matches] in rule[1:])
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1372		):
1373	1	matches += 1
1374
1375	1	while rule and rule[0] != ')':
1376	1	rule = rule[1:]
1377
1378		# if rule[0] == ')':
1379	1	rule = rule[1:]
1380
1381	1	if rule:
1382	1	priority0 = ord(rule[0])
1383		else:
1384	1	priority0 = 0
1385
1386	1	matches0 = matches
1387
1388	1	while rule and rule[0] == '-' and matches > 1:
1389	1	matches -= 1
1390	1	rule = rule[1:]
1391
1392	1	if rule and rule[0] == '<':
1393	1	rule = rule[1:]
1394
1395	1	if rule and rule[0].isdigit():
1396		# read priority
1397	1	priority = int(rule[0])
1398	1	rule = rule[1:]
1399
1400	1	if rule and rule[0:2] == '^^':
1401	1	rule = rule[1:]
1402
1403	1	if (
1404		not rule
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history... best-practice introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Too many boolean expressions in if statement (12/5) Loading history...
1405		or (
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1406		(rule[0] == '^')
1407		and ((i == 0) or not src[i - 1].isalpha())
1408		and (
1409		(rule[1:2] != '$')
1410		or (
1411		not (
1412		src[
1413		i + matches0 : i + matches0 + 1
1414		].isalpha()
1415		)
1416		and (
1417		src[
1418		i + matches0 : i + matches0 + 1
1419		]
1420		!= '.'
1421		)
1422		)
1423		)
1424		)
1425		or (
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1426		(rule[0] == '$')
1427		and (i > 0)
1428		and src[i - 1].isalpha()
1429		and (
1430		(
1431		not src[
1432		i + matches0 : i + matches0 + 1
1433		].isalpha()
1434		)
1435		and (
1436		src[i + matches0 : i + matches0 + 1]
1437		!= '.'
1438		)
1439		)
1440		)
1441		):
1442		# look for continuation, if:
1443		# matches > 1 und NO '-' in first string */
1444	1	pos0 = -1
1445
1446	1	start3 = 0
1447	1	start4 = 0
1448	1	end3 = 0
1449	1	end4 = 0
1450
1451	1	if (
1452		(matches > 1)
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1453		and src[i + matches : i + matches + 1]
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1454		and (priority0 != ord('-'))
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1455		):
1456	1	char0 = src[i + matches - 1]
1457	1	pos0 = alpha_pos[char0]
1458
1459	1	if pos0 >= 2 and src[i + matches]:
1460	1	xpos = pos0 - 2
1461	1	pos0 = alpha_pos[src[i + matches]]
1462	1	start3 = phonet_hash_1[xpos, pos0]
1463	1	start4 = phonet_hash_1[xpos, 0]
1464	1	end3 = phonet_hash_2[xpos, pos0]
1465	1	end4 = phonet_hash_2[xpos, 0]
1466
1467		# preserve rule priorities
1468	1	if (start4 >= 0) and (
1469		(start3 < 0) or (start4 < start3)
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1470		):
1471	1	pos0 = start3
1472	1	start3 = start4
1473	1	start4 = pos0
1474	1	pos0 = end3
1475	1	end3 = end4
1476	1	end4 = pos0
1477
1478	1	if (end3 >= start4) and (start4 >= 0):
1479	1	if end4 > end3:
1480	1	end3 = end4
1481
1482	1	start4 = -1
1483	1	end4 = -1
1484		else:
1485	1	pos0 = phonet_hash[char0]
1486	1	start3 = pos0
1487	1	end3 = 10000
1488	1	start4 = -1
1489	1	end4 = -1
1490
1491	1	pos0 = start3
1492
1493		# check continuation rules for src[i+matches]
1494	1	if pos0 >= 0:
1495	1	while (_phonet_rules[pos0] is None) or (
1496		_phonet_rules[pos0][0] == char0
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history... introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Value '_phonet_rules[pos0]' is unsubscriptable Loading history...
1497		):
1498	1	if pos0 > end3:
1499	1	if start4 > 0:
1500	1	pos0 = start4
1501	1	start3 = start4
1502	1	start4 = -1
1503	1	end3 = end4
1504	1	end4 = -1
1505	1	continue
1506
1507	1	priority0 = -1
1508
1509		# important
1510	1	break
1511
1512	1	if (_phonet_rules[pos0] is None) or (
1513		_phonet_rules[pos0 + mode] is None
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1514		):
1515		# no conversion rule available
1516	1	pos0 += 3
1517	1	continue
1518
1519		# check whole string
1520	1	matches0 = matches
1521	1	priority0 = 5
1522	1	rule = _phonet_rules[pos0]
1523	1	rule = rule[1:]
		0 ignored issues – show introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Value 'rule' is unsubscriptable Loading history...
1524
1525	1	while (
1526		rule
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1527		and (
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1528		src[
1529		i + matches0 : i + matches0 + 1
1530		]
1531		== rule[0]
1532		)
1533		and (
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1534		not rule[0].isdigit()
1535		or (rule in '(-<^$')
1536		)
1537		):
1538	1	matches0 += 1
1539	1	rule = rule[1:]
1540
1541	1	if rule and rule[0] == '(':
1542		# check an array of letters
1543	1	if src[
1544		i + matches0 : i + matches0 + 1
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1545		].isalpha() and (
1546		src[i + matches0] in rule[1:]
1547		):
1548	1	matches0 += 1
1549
1550	1	while rule and rule[0] != ')':
1551	1	rule = rule[1:]
1552
1553		# if rule[0] == ')':
1554	1	rule = rule[1:]
1555
1556	1	while rule and rule[0] == '-':
1557		# "matches0" is NOT decremented
1558		# because of
1559		# "if (matches0 == matches)"
1560	1	rule = rule[1:]
1561
1562	1	if rule and rule[0] == '<':
1563	1	rule = rule[1:]
1564
1565	1	if rule and rule[0].isdigit():
1566	1	priority0 = int(rule[0])
1567	1	rule = rule[1:]
1568
1569	1	if (
1570		not rule
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1571		or
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1572		# rule == '^' is not possible here
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1573		(
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1574		(rule[0] == '$')
1575		and not src[
1576		i + matches0 : i + matches0 + 1
1577		].isalpha()
1578		and (
1579		src[
1580		i
1581		+ matches0 : i
1582		+ matches0
1583		+ 1
1584		]
1585		!= '.'
1586		)
1587		)
1588		):
1589	1	if matches0 == matches:
1590		# this is only a partial string
1591	1	pos0 += 3
1592	1	continue
1593
1594	1	if priority0 < priority:
1595		# priority is too low
1596	1	pos0 += 3
1597	1	continue
1598
1599		# continuation rule found
1600	1	break
1601
1602	1	pos0 += 3
1603
1604		# end of "while"
1605	1	if (priority0 >= priority) and (
1606		(_phonet_rules[pos0] is not None)
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1607		and (_phonet_rules[pos0][0] == char0)
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history... introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Value '_phonet_rules[pos0]' is unsubscriptable Loading history...
1608		):
1609
1610	1	pos += 3
1611	1	continue
1612
1613		# replace string
1614	1	if _phonet_rules[pos] and (
1615		'<' in _phonet_rules[pos][1:]
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1616		):
1617	1	priority0 = 1
1618		else:
1619	1	priority0 = 0
1620
1621	1	rule = _phonet_rules[pos + mode]
1622
1623	1	if (priority0 == 1) and (zeta == 0):
1624		# rule with '<' is applied
1625	1	if (
1626		(j > 0)
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1627		and rule
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1628		and (
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1629		(dest[j - 1] == char)
1630		or (dest[j - 1] == rule[0])
1631		)
1632		):
1633	1	j -= 1
1634
1635	1	zeta0 = 1
1636	1	zeta += 1
1637	1	matches0 = 0
1638
1639	1	while rule and src[i + matches0]:
1640	1	src = (
1641		src[0 : i + matches0]
1642		+ rule[0]
1643		+ src[i + matches0 + 1 :]
1644		)
1645	1	matches0 += 1
1646	1	rule = rule[1:]
1647
1648	1	if matches0 < matches:
1649	1	src = (
1650		src[0 : i + matches0]
1651		+ src[i + matches :]
1652		)
1653
1654	1	char = src[i]
1655		else:
1656	1	i = i + matches - 1
1657	1	zeta = 0
1658
1659	1	while len(rule) > 1:
1660	1	if (j == 0) or (dest[j - 1] != rule[0]):
1661	1	dest = (
1662		dest[0:j]
1663		+ rule[0]
1664		+ dest[min(len(dest), j + 1) :]
1665		)
1666	1	j += 1
1667
1668	1	rule = rule[1:]
1669
1670		# new "current char"
1671	1	if not rule:
1672	1	rule = ''
1673	1	char = ''
1674		else:
1675	1	char = rule[0]
1676
1677	1	if (
1678		_phonet_rules[pos]
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1679		and '^^' in _phonet_rules[pos][1:]
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1680		):
1681	1	if char:
1682	1	dest = (
1683		dest[0:j]
1684		+ char
1685		+ dest[min(len(dest), j + 1) :]
1686		)
1687	1	j += 1
1688
1689	1	src = src[i + 1 :]
1690	1	i = 0
1691	1	zeta0 = 1
1692
1693	1	break
1694
1695	1	pos += 3
1696
1697	1	if pos > end1 and start2 > 0:
1698	1	pos = start2
1699	1	start1 = start2
1700	1	end1 = end2
1701	1	start2 = -1
1702	1	end2 = -1
1703
1704	1	if zeta0 == 0:
1705	1	if char and ((j == 0) or (dest[j - 1] != char)):
1706		# delete multiple letters only
1707	1	dest = (
1708		dest[0:j] + char + dest[min(j + 1, term_length) :]
1709		)
1710	1	j += 1
1711
1712	1	i += 1
1713	1	zeta = 0
1714
1715	1	dest = dest[0:j]
1716
1717	1	return dest
1718
1719	1	_initialize_phonet(lang)
1720
1721	1	word = unicode_normalize('NFKC', text_type(word))
1722	1	return _phonet(word, mode, lang)
1723
1724
1725	1	def phonet(word, mode=1, lang='de'):
1726		"""Return the phonet code for a word.
1727
1728		This is a wrapper for :py:meth:`Phonet.encode`.
1729
1730		Parameters
1731		----------
1732		word : str
1733		The word to transform
1734		mode : int
1735		The ponet variant to employ (1 or 2)
1736		lang : str
1737		``de`` (default) for German, ``none`` for no language
1738
1739		Returns
1740		-------
1741		str
1742		The phonet value
1743
1744		Examples
1745		--------
1746		>>> phonet('Christopher')
1747		'KRISTOFA'
1748		>>> phonet('Niall')
1749		'NIAL'
1750		>>> phonet('Smith')
1751		'SMIT'
1752		>>> phonet('Schmidt')
1753		'SHMIT'
1754
1755		>>> phonet('Christopher', mode=2)
1756		'KRIZTUFA'
1757		>>> phonet('Niall', mode=2)
1758		'NIAL'
1759		>>> phonet('Smith', mode=2)
1760		'ZNIT'
1761		>>> phonet('Schmidt', mode=2)
1762		'ZNIT'
1763
1764		>>> phonet('Christopher', lang='none')
1765		'CHRISTOPHER'
1766		>>> phonet('Niall', lang='none')
1767		'NIAL'
1768		>>> phonet('Smith', lang='none')
1769		'SMITH'
1770		>>> phonet('Schmidt', lang='none')
1771		'SCHMIDT'
1772
1773		"""
1774	1	return Phonet().encode(word, mode, lang)
1775
1776
1777		if __name__ == '__main__':
1778		import doctest
1779
1780		doctest.testmod()
1781

chrislit / abydos

Push — master ( f43547...71985b )

abydos.phonetic._phonet.Phonet.encode() F

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like