abydos.phonetic._phonet.Phonet.encode() - Code Metrics - Inspection of "0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#141)

by Chris

created 2018-11-08 03:44 UTC

abydos.phonetic._phonet.Phonet.encode() F

↳ Parent: abydos.phonetic._phonet

Complexity

Conditions

142

Size

Total Lines	633
Code Lines	381

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	269
CRAP Score	142

Importance

Changes

Metric	Value
eloc	381
dl	0
loc	633
ccs	269
cts	269
cp	1
rs	0
c	0
b	0
f	0
cc	142
nop	4
crap	142

How to fix Long Method Complexity

1		# -- coding: utf-8 --
		0 ignored issues – show coding-style introduced 2018-11-08 03:56 UTC by Report Bug Copy Issue Report Too many lines in module (1754/1000) Loading history...
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._phonet.
20
21		The phonetic._phonet module implements phonet algorithm (a.k.a. Hannoveraner
22		Phonetik), intended chiefly for German.
23		"""
24
25	1	from __future__ import unicode_literals
26
27	1	from collections import Counter
28	1	from unicodedata import normalize as unicode_normalize
29
30	1	from six import text_type
31	1	from six.moves import range
32
33	1	from ._phonetic import Phonetic
34
35	1	__all__ = ['Phonet', 'phonet']
36
37
38	1	class Phonet(Phonetic):
		0 ignored issues – show Unused Code introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
39		"""Phonet code.
40
41		phonet ("Hannoveraner Phonetik") was developed by Jörg Michael and
42		documented in :cite:`Michael:1999`.
43
44		This is a port of Jesper Zedlitz's code, which is licensed LGPL
45		:cite:`Zedlitz:2015`.
46
47		That is, in turn, based on Michael's C code, which is also licensed LGPL
48		:cite:`Michael:2007`.
49		"""
50
51	1	_rules_no_lang = ( # separator chars
52		# fmt: off
53		'´', ' ', ' ',
54		'"', ' ', ' ',
55		'`$', '', '',
56		'\'', ' ', ' ',
57		',', ',', ',',
58		';', ',', ',',
59		'-', ' ', ' ',
60		' ', ' ', ' ',
61		'.', '.', '.',
62		':', '.', '.',
63		# German umlauts
64		'Ä', 'AE', 'AE',
65		'Ö', 'OE', 'OE',
66		'Ü', 'UE', 'UE',
67		'ß', 'S', 'S',
68		# international umlauts
69		'À', 'A', 'A',
70		'Á', 'A', 'A',
71		'Â', 'A', 'A',
72		'Ã', 'A', 'A',
73		'Å', 'A', 'A',
74		'Æ', 'AE', 'AE',
75		'Ç', 'C', 'C',
76		'Ð', 'DJ', 'DJ',
77		'È', 'E', 'E',
78		'É', 'E', 'E',
79		'Ê', 'E', 'E',
80		'Ë', 'E', 'E',
81		'Ì', 'I', 'I',
82		'Í', 'I', 'I',
83		'Î', 'I', 'I',
84		'Ï', 'I', 'I',
85		'Ñ', 'NH', 'NH',
86		'Ò', 'O', 'O',
87		'Ó', 'O', 'O',
88		'Ô', 'O', 'O',
89		'Õ', 'O', 'O',
90		'Œ', 'OE', 'OE',
91		'Ø', 'OE', 'OE',
92		'Š', 'SH', 'SH',
93		'Þ', 'TH', 'TH',
94		'Ù', 'U', 'U',
95		'Ú', 'U', 'U',
96		'Û', 'U', 'U',
97		'Ý', 'Y', 'Y',
98		'Ÿ', 'Y', 'Y',
99		# 'normal' letters (A-Z)
100		'MC^', 'MAC', 'MAC',
101		'MC^', 'MAC', 'MAC',
102		'M´^', 'MAC', 'MAC',
103		'M\'^', 'MAC', 'MAC',
104		'O´^', 'O', 'O',
105		'O\'^', 'O', 'O',
106		'VAN DEN ^', 'VANDEN', 'VANDEN',
107		None, None, None
108		# fmt: on
109		)
110
111	1	_rules_german = ( # separator chars
112		# fmt: off
113		'´', ' ', ' ',
114		'"', ' ', ' ',
115		'`$', '', '',
116		'\'', ' ', ' ',
117		',', ' ', ' ',
118		';', ' ', ' ',
119		'-', ' ', ' ',
120		' ', ' ', ' ',
121		'.', '.', '.',
122		':', '.', '.',
123		# German umlauts
124		'ÄE', 'E', 'E',
125		'ÄU<', 'EU', 'EU',
126		'ÄV(AEOU)-<', 'EW', None,
127		'Ä$', 'Ä', None,
128		'Ä<', None, 'E',
129		'Ä', 'E', None,
130		'ÖE', 'Ö', 'Ö',
131		'ÖU', 'Ö', 'Ö',
132		'ÖVER--<', 'ÖW', None,
133		'ÖV(AOU)-', 'ÖW', None,
134		'ÜBEL(GNRW)-^^', 'ÜBL ', 'IBL ',
135		'ÜBER^^', 'ÜBA', 'IBA',
136		'ÜE', 'Ü', 'I',
137		'ÜVER--<', 'ÜW', None,
138		'ÜV(AOU)-', 'ÜW', None,
139		'Ü', None, 'I',
140		'ßCH<', None, 'Z',
141		'ß<', 'S', 'Z',
142		# international umlauts
143		'À<', 'A', 'A',
144		'Á<', 'A', 'A',
145		'Â<', 'A', 'A',
146		'Ã<', 'A', 'A',
147		'Å<', 'A', 'A',
148		'ÆER-', 'E', 'E',
149		'ÆU<', 'EU', 'EU',
150		'ÆV(AEOU)-<', 'EW', None,
151		'Æ$', 'Ä', None,
152		'Æ<', None, 'E',
153		'Æ', 'E', None,
154		'Ç', 'Z', 'Z',
155		'ÐÐ-', '', '',
156		'Ð', 'DI', 'TI',
157		'È<', 'E', 'E',
158		'É<', 'E', 'E',
159		'Ê<', 'E', 'E',
160		'Ë', 'E', 'E',
161		'Ì<', 'I', 'I',
162		'Í<', 'I', 'I',
163		'Î<', 'I', 'I',
164		'Ï', 'I', 'I',
165		'ÑÑ-', '', '',
166		'Ñ', 'NI', 'NI',
167		'Ò<', 'O', 'U',
168		'Ó<', 'O', 'U',
169		'Ô<', 'O', 'U',
170		'Õ<', 'O', 'U',
171		'Œ<', 'Ö', 'Ö',
172		'Ø(IJY)-<', 'E', 'E',
173		'Ø<', 'Ö', 'Ö',
174		'Š', 'SH', 'Z',
175		'Þ', 'T', 'T',
176		'Ù<', 'U', 'U',
177		'Ú<', 'U', 'U',
178		'Û<', 'U', 'U',
179		'Ý<', 'I', 'I',
180		'Ÿ<', 'I', 'I',
181		# 'normal' letters (A-Z)
182		'ABELLE$', 'ABL', 'ABL',
183		'ABELL$', 'ABL', 'ABL',
184		'ABIENNE$', 'ABIN', 'ABIN',
185		'ACHME---^', 'ACH', 'AK',
186		'ACEY$', 'AZI', 'AZI',
187		'ADV', 'ATW', None,
188		'AEGL-', 'EK', None,
189		'AEU<', 'EU', 'EU',
190		'AE2', 'E', 'E',
191		'AFTRAUBEN------', 'AFT ', 'AFT ',
192		'AGL-1', 'AK', None,
193		'AGNI-^', 'AKN', 'AKN',
194		'AGNIE-', 'ANI', 'ANI',
195		'AGN(AEOU)-$', 'ANI', 'ANI',
196		'AH(AIOÖUÜY)-', 'AH', None,
197		'AIA2', 'AIA', 'AIA',
198		'AIE$', 'E', 'E',
199		'AILL(EOU)-', 'ALI', 'ALI',
200		'AINE$', 'EN', 'EN',
201		'AIRE$', 'ER', 'ER',
202		'AIR-', 'E', 'E',
203		'AISE$', 'ES', 'EZ',
204		'AISSANCE$', 'ESANS', 'EZANZ',
205		'AISSE$', 'ES', 'EZ',
206		'AIX$', 'EX', 'EX',
207		'AJ(AÄEÈÉÊIOÖUÜ)--', 'A', 'A',
208		'AKTIE', 'AXIE', 'AXIE',
209		'AKTUEL', 'AKTUEL', None,
210		'ALOI^', 'ALOI', 'ALUI', # Don't merge these rules
211		'ALOY^', 'ALOI', 'ALUI', # needed by 'check_rules'
212		'AMATEU(RS)-', 'AMATÖ', 'ANATÖ',
213		'ANCH(OEI)-', 'ANSH', 'ANZ',
214		'ANDERGEGANG----', 'ANDA GE', 'ANTA KE',
215		'ANDERGEHE----', 'ANDA ', 'ANTA ',
216		'ANDERGESETZ----', 'ANDA GE', 'ANTA KE',
217		'ANDERGING----', 'ANDA ', 'ANTA ',
218		'ANDERSETZ(ET)-----', 'ANDA ', 'ANTA ',
219		'ANDERZUGEHE----', 'ANDA ZU ', 'ANTA ZU ',
220		'ANDERZUSETZE-----', 'ANDA ZU ', 'ANTA ZU ',
221		'ANER(BKO)---^^', 'AN', None,
222		'ANHAND---^$', 'AN H', 'AN ',
223		'ANH(AÄEIOÖUÜY)--^^', 'AN', None,
224		'ANIELLE$', 'ANIEL', 'ANIL',
225		'ANIEL', 'ANIEL', None,
226		'ANSTELLE----^$', 'AN ST', 'AN ZT',
227		'ANTI^^', 'ANTI', 'ANTI',
228		'ANVER^^', 'ANFA', 'ANFA',
229		'ATIA$', 'ATIA', 'ATIA',
230		'ATIA(NS)--', 'ATI', 'ATI',
231		'ATI(AÄOÖUÜ)-', 'AZI', 'AZI',
232		'AUAU--', '', '',
233		'AUERE$', 'AUERE', None,
234		'AUERE(NS)-$', 'AUERE', None,
235		'AUERE(AIOUY)--', 'AUER', None,
236		'AUER(AÄIOÖUÜY)-', 'AUER', None,
237		'AUER<', 'AUA', 'AUA',
238		'AUF^^', 'AUF', 'AUF',
239		'AULT$', 'O', 'U',
240		'AUR(BCDFGKLMNQSTVWZ)-', 'AUA', 'AUA',
241		'AUR$', 'AUA', 'AUA',
242		'AUSSE$', 'OS', 'UZ',
243		'AUS(ST)-^', 'AUS', 'AUS',
244		'AUS^^', 'AUS', 'AUS',
245		'AUTOFAHR----', 'AUTO ', 'AUTU ',
246		'AUTO^^', 'AUTO', 'AUTU',
247		'AUX(IY)-', 'AUX', 'AUX',
248		'AUX', 'O', 'U',
249		'AU', 'AU', 'AU',
250		'AVER--<', 'AW', None,
251		'AVIER$', 'AWIE', 'AFIE',
252		'AV(EÈÉÊI)-^', 'AW', None,
253		'AV(AOU)-', 'AW', None,
254		'AYRE$', 'EIRE', 'EIRE',
255		'AYRE(NS)-$', 'EIRE', 'EIRE',
256		'AYRE(AIOUY)--', 'EIR', 'EIR',
257		'AYR(AÄIOÖUÜY)-', 'EIR', 'EIR',
258		'AYR<', 'EIA', 'EIA',
259		'AYER--<', 'EI', 'EI',
260		'AY(AÄEIOÖUÜY)--', 'A', 'A',
261		'AË', 'E', 'E',
262		'A(IJY)<', 'EI', 'EI',
263		'BABY^$', 'BEBI', 'BEBI',
264		'BAB(IY)^', 'BEBI', 'BEBI',
265		'BEAU^$', 'BO', None,
266		'BEA(BCMNRU)-^', 'BEA', 'BEA',
267		'BEAT(AEIMORU)-^', 'BEAT', 'BEAT',
268		'BEE$', 'BI', 'BI',
269		'BEIGE^$', 'BESH', 'BEZ',
270		'BENOIT--', 'BENO', 'BENU',
271		'BER(DT)-', 'BER', None,
272		'BERN(DT)-', 'BERN', None,
273		'BE(LMNRST)-^', 'BE', 'BE',
274		'BETTE$', 'BET', 'BET',
275		'BEVOR^$', 'BEFOR', None,
276		'BIC$', 'BIZ', 'BIZ',
277		'BOWL(EI)-', 'BOL', 'BUL',
278		'BP(AÄEÈÉÊIÌÍÎOÖRUÜY)-', 'B', 'B',
279		'BRINGEND-----^', 'BRI', 'BRI',
280		'BRINGEND-----', ' BRI', ' BRI',
281		'BROW(NS)-', 'BRAU', 'BRAU',
282		'BUDGET7', 'BÜGE', 'BIKE',
283		'BUFFET7', 'BÜFE', 'BIFE',
284		'BYLLE$', 'BILE', 'BILE',
285		'BYLL$', 'BIL', 'BIL',
286		'BYPA--^', 'BEI', 'BEI',
287		'BYTE<', 'BEIT', 'BEIT',
288		'BY9^', 'BÜ', None,
289		'B(SßZ)$', 'BS', None,
290		'CACH(EI)-^', 'KESH', 'KEZ',
291		'CAE--', 'Z', 'Z',
292		'CA(IY)$', 'ZEI', 'ZEI',
293		'CE(EIJUY)--', 'Z', 'Z',
294		'CENT<', 'ZENT', 'ZENT',
295		'CERST(EI)----^', 'KE', 'KE',
296		'CER$', 'ZA', 'ZA',
297		'CE3', 'ZE', 'ZE',
298		'CH\'S$', 'X', 'X',
299		'CH´S$', 'X', 'X',
300		'CHAO(ST)-', 'KAO', 'KAU',
301		'CHAMPIO-^', 'SHEMPI', 'ZENBI',
302		'CHAR(AI)-^', 'KAR', 'KAR',
303		'CHAU(CDFSVWXZ)-', 'SHO', 'ZU',
304		'CHÄ(CF)-', 'SHE', 'ZE',
305		'CHE(CF)-', 'SHE', 'ZE',
306		'CHEM-^', 'KE', 'KE', # or: 'CHE', 'KE'
307		'CHEQUE<', 'SHEK', 'ZEK',
308		'CHI(CFGPVW)-', 'SHI', 'ZI',
309		'CH(AEUY)-<^', 'SH', 'Z',
310		'CHK-', '', '',
311		'CHO(CKPS)-^', 'SHO', 'ZU',
312		'CHRIS-', 'KRI', None,
313		'CHRO-', 'KR', None,
314		'CH(LOR)-<^', 'K', 'K',
315		'CHST-', 'X', 'X',
316		'CH(SßXZ)3', 'X', 'X',
317		'CHTNI-3', 'CHN', 'KN',
318		'CH^', 'K', 'K', # or: 'CH', 'K'
319		'CH', 'CH', 'K',
320		'CIC$', 'ZIZ', 'ZIZ',
321		'CIENCEFICT----', 'EIENS ', 'EIENZ ',
322		'CIENCE$', 'EIENS', 'EIENZ',
323		'CIER$', 'ZIE', 'ZIE',
324		'CYB-^', 'ZEI', 'ZEI',
325		'CY9^', 'ZÜ', 'ZI',
326		'C(IJY)-<3', 'Z', 'Z',
327		'CLOWN-', 'KLAU', 'KLAU',
328		'CCH', 'Z', 'Z',
329		'CCE-', 'X', 'X',
330		'C(CK)-', '', '',
331		'CLAUDET---', 'KLO', 'KLU',
332		'CLAUDINE^$', 'KLODIN', 'KLUTIN',
333		'COACH', 'KOSH', 'KUZ',
334		'COLE$', 'KOL', 'KUL',
335		'COUCH', 'KAUSH', 'KAUZ',
336		'COW', 'KAU', 'KAU',
337		'CQUES$', 'K', 'K',
338		'CQUE', 'K', 'K',
339		'CRASH--9', 'KRE', 'KRE',
340		'CREAT-^', 'KREA', 'KREA',
341		'CST', 'XT', 'XT',
342		'CS<^', 'Z', 'Z',
343		'C(SßX)', 'X', 'X',
344		'CT\'S$', 'X', 'X',
345		'CT(SßXZ)', 'X', 'X',
346		'CZ<', 'Z', 'Z',
347		'C(ÈÉÊÌÍÎÝ)3', 'Z', 'Z',
348		'C.^', 'C.', 'C.',
349		'CÄ-', 'Z', 'Z',
350		'CÜ$', 'ZÜ', 'ZI',
351		'C\'S$', 'X', 'X',
352		'C<', 'K', 'K',
353		'DAHER^$', 'DAHER', None,
354		'DARAUFFOLGE-----', 'DARAUF ', 'TARAUF ',
355		'DAVO(NR)-^$', 'DAFO', 'TAFU',
356		'DD(SZ)--<', '', '',
357		'DD9', 'D', None,
358		'DEPOT7', 'DEPO', 'TEBU',
359		'DESIGN', 'DISEIN', 'TIZEIN',
360		'DE(LMNRST)-3^', 'DE', 'TE',
361		'DETTE$', 'DET', 'TET',
362		'DH$', 'T', None,
363		'DIC$', 'DIZ', 'TIZ',
364		'DIDR-^', 'DIT', None,
365		'DIEDR-^', 'DIT', None,
366		'DJ(AEIOU)-^', 'I', 'I',
367		'DMITR-^', 'DIMIT', 'TINIT',
368		'DRY9^', 'DRÜ', None,
369		'DT-', '', '',
370		'DUIS-^', 'DÜ', 'TI',
371		'DURCH^^', 'DURCH', 'TURK',
372		'DVA$', 'TWA', None,
373		'DY9^', 'DÜ', None,
374		'DYS$', 'DIS', None,
375		'DS(CH)--<', 'T', 'T',
376		'DST', 'ZT', 'ZT',
377		'DZS(CH)--', 'T', 'T',
378		'D(SßZ)', 'Z', 'Z',
379		'D(AÄEIOÖRUÜY)-', 'D', None,
380		'D(ÀÁÂÃÅÈÉÊÌÍÎÙÚÛ)-', 'D', None,
381		'D\'H^', 'D', 'T',
382		'D´H^', 'D', 'T',
383		'D`H^', 'D', 'T',
384		'D\'S3$', 'Z', 'Z',
385		'D´S3$', 'Z', 'Z',
386		'D^', 'D', None,
387		'D', 'T', 'T',
388		'EAULT$', 'O', 'U',
389		'EAUX$', 'O', 'U',
390		'EAU', 'O', 'U',
391		'EAV', 'IW', 'IF',
392		'EAS3$', 'EAS', None,
393		'EA(AÄEIOÖÜY)-3', 'EA', 'EA',
394		'EA3$', 'EA', 'EA',
395		'EA3', 'I', 'I',
396		'EBENSO^$', 'EBNSO', 'EBNZU',
397		'EBENSO^^', 'EBNSO ', 'EBNZU ',
398		'EBEN^^', 'EBN', 'EBN',
399		'EE9', 'E', 'E',
400		'EGL-1', 'EK', None,
401		'EHE(IUY)--1', 'EH', None,
402		'EHUNG---1', 'E', None,
403		'EH(AÄIOÖUÜY)-1', 'EH', None,
404		'EIEI--', '', '',
405		'EIERE^$', 'EIERE', None,
406		'EIERE$', 'EIERE', None,
407		'EIERE(NS)-$', 'EIERE', None,
408		'EIERE(AIOUY)--', 'EIER', None,
409		'EIER(AÄIOÖUÜY)-', 'EIER', None,
410		'EIER<', 'EIA', None,
411		'EIGL-1', 'EIK', None,
412		'EIGH$', 'EI', 'EI',
413		'EIH--', 'E', 'E',
414		'EILLE$', 'EI', 'EI',
415		'EIR(BCDFGKLMNQSTVWZ)-', 'EIA', 'EIA',
416		'EIR$', 'EIA', 'EIA',
417		'EITRAUBEN------', 'EIT ', 'EIT ',
418		'EI', 'EI', 'EI',
419		'EJ$', 'EI', 'EI',
420		'ELIZ^', 'ELIS', None,
421		'ELZ^', 'ELS', None,
422		'EL-^', 'E', 'E',
423		'ELANG----1', 'E', 'E',
424		'EL(DKL)--1', 'E', 'E',
425		'EL(MNT)--1$', 'E', 'E',
426		'ELYNE$', 'ELINE', 'ELINE',
427		'ELYN$', 'ELIN', 'ELIN',
428		'EL(AÄEÈÉÊIÌÍÎOÖUÜY)-1', 'EL', 'EL',
429		'EL-1', 'L', 'L',
430		'EM-^', None, 'E',
431		'EM(DFKMPQT)--1', None, 'E',
432		'EM(AÄEÈÉÊIÌÍÎOÖUÜY)--1', None, 'E',
433		'EM-1', None, 'N',
434		'ENGAG-^', 'ANGA', 'ANKA',
435		'EN-^', 'E', 'E',
436		'ENTUEL', 'ENTUEL', None,
437		'EN(CDGKQSTZ)--1', 'E', 'E',
438		'EN(AÄEÈÉÊIÌÍÎNOÖUÜY)-1', 'EN', 'EN',
439		'EN-1', '', '',
440		'ERH(AÄEIOÖUÜ)-^', 'ERH', 'ER',
441		'ER-^', 'E', 'E',
442		'ERREGEND-----', ' ER', ' ER',
443		'ERT1$', 'AT', None,
444		'ER(DGLKMNRQTZß)-1', 'ER', None,
445		'ER(AÄEÈÉÊIÌÍÎOÖUÜY)-1', 'ER', 'A',
446		'ER1$', 'A', 'A',
447		'ER<1', 'A', 'A',
448		'ETAT7', 'ETA', 'ETA',
449		'ETI(AÄOÖÜU)-', 'EZI', 'EZI',
450		'EUERE$', 'EUERE', None,
451		'EUERE(NS)-$', 'EUERE', None,
452		'EUERE(AIOUY)--', 'EUER', None,
453		'EUER(AÄIOÖUÜY)-', 'EUER', None,
454		'EUER<', 'EUA', None,
455		'EUEU--', '', '',
456		'EUILLE$', 'Ö', 'Ö',
457		'EUR$', 'ÖR', 'ÖR',
458		'EUX', 'Ö', 'Ö',
459		'EUSZ$', 'EUS', None,
460		'EUTZ$', 'EUS', None,
461		'EUYS$', 'EUS', 'EUZ',
462		'EUZ$', 'EUS', None,
463		'EU', 'EU', 'EU',
464		'EVER--<1', 'EW', None,
465		'EV(ÄOÖUÜ)-1', 'EW', None,
466		'EYER<', 'EIA', 'EIA',
467		'EY<', 'EI', 'EI',
468		'FACETTE', 'FASET', 'FAZET',
469		'FANS--^$', 'FE', 'FE',
470		'FAN-^$', 'FE', 'FE',
471		'FAULT-', 'FOL', 'FUL',
472		'FEE(DL)-', 'FI', 'FI',
473		'FEHLER', 'FELA', 'FELA',
474		'FE(LMNRST)-3^', 'FE', 'FE',
475		'FOERDERN---^', 'FÖRD', 'FÖRT',
476		'FOERDERN---', ' FÖRD', ' FÖRT',
477		'FOND7', 'FON', 'FUN',
478		'FRAIN$', 'FRA', 'FRA',
479		'FRISEU(RS)-', 'FRISÖ', 'FRIZÖ',
480		'FY9^', 'FÜ', None,
481		'FÖRDERN---^', 'FÖRD', 'FÖRT',
482		'FÖRDERN---', ' FÖRD', ' FÖRT',
483		'GAGS^$', 'GEX', 'KEX',
484		'GAG^$', 'GEK', 'KEK',
485		'GD', 'KT', 'KT',
486		'GEGEN^^', 'GEGN', 'KEKN',
487		'GEGENGEKOM-----', 'GEGN ', 'KEKN ',
488		'GEGENGESET-----', 'GEGN ', 'KEKN ',
489		'GEGENKOMME-----', 'GEGN ', 'KEKN ',
490		'GEGENZUKOM---', 'GEGN ZU ', 'KEKN ZU ',
491		'GENDETWAS-----$', 'GENT ', 'KENT ',
492		'GENRE', 'IORE', 'IURE',
493		'GE(LMNRST)-3^', 'GE', 'KE',
494		'GER(DKT)-', 'GER', None,
495		'GETTE$', 'GET', 'KET',
496		'GGF.', 'GF.', None,
497		'GG-', '', '',
498		'GH', 'G', None,
499		'GI(AOU)-^', 'I', 'I',
500		'GION-3', 'KIO', 'KIU',
501		'G(CK)-', '', '',
502		'GJ(AEIOU)-^', 'I', 'I',
503		'GMBH^$', 'GMBH', 'GMBH',
504		'GNAC$', 'NIAK', 'NIAK',
505		'GNON$', 'NION', 'NIUN',
506		'GN$', 'N', 'N',
507		'GONCAL-^', 'GONZA', 'KUNZA',
508		'GRY9^', 'GRÜ', None,
509		'G(SßXZ)-<', 'K', 'K',
510		'GUCK-', 'KU', 'KU',
511		'GUISEP-^', 'IUSE', 'IUZE',
512		'GUI-^', 'G', 'K',
513		'GUTAUSSEH------^', 'GUT ', 'KUT ',
514		'GUTGEHEND------^', 'GUT ', 'KUT ',
515		'GY9^', 'GÜ', None,
516		'G(AÄEILOÖRUÜY)-', 'G', None,
517		'G(ÀÁÂÃÅÈÉÊÌÍÎÙÚÛ)-', 'G', None,
518		'G\'S$', 'X', 'X',
519		'G´S$', 'X', 'X',
520		'G^', 'G', None,
521		'G', 'K', 'K',
522		'HA(HIUY)--1', 'H', None,
523		'HANDVOL---^', 'HANT ', 'ANT ',
524		'HANNOVE-^', 'HANOF', None,
525		'HAVEN7$', 'HAFN', None,
526		'HEAD-', 'HE', 'E',
527		'HELIEGEN------', 'E ', 'E ',
528		'HESTEHEN------', 'E ', 'E ',
529		'HE(LMNRST)-3^', 'HE', 'E',
530		'HE(LMN)-1', 'E', 'E',
531		'HEUR1$', 'ÖR', 'ÖR',
532		'HE(HIUY)--1', 'H', None,
533		'HIH(AÄEIOÖUÜY)-1', 'IH', None,
534		'HLH(AÄEIOÖUÜY)-1', 'LH', None,
535		'HMH(AÄEIOÖUÜY)-1', 'MH', None,
536		'HNH(AÄEIOÖUÜY)-1', 'NH', None,
537		'HOBBY9^', 'HOBI', None,
538		'HOCHBEGAB-----^', 'HOCH ', 'UK ',
539		'HOCHTALEN-----^', 'HOCH ', 'UK ',
540		'HOCHZUFRI-----^', 'HOCH ', 'UK ',
541		'HO(HIY)--1', 'H', None,
542		'HRH(AÄEIOÖUÜY)-1', 'RH', None,
543		'HUH(AÄEIOÖUÜY)-1', 'UH', None,
544		'HUIS^^', 'HÜS', 'IZ',
545		'HUIS$', 'ÜS', 'IZ',
546		'HUI--1', 'H', None,
547		'HYGIEN^', 'HÜKIEN', None,
548		'HY9^', 'HÜ', None,
549		'HY(BDGMNPST)-', 'Ü', None,
550		'H.^', None, 'H.',
551		'HÄU--1', 'H', None,
552		'H^', 'H', '',
553		'H', '', '',
554		'ICHELL---', 'ISH', 'IZ',
555		'ICHI$', 'ISHI', 'IZI',
556		'IEC$', 'IZ', 'IZ',
557		'IEDENSTELLE------', 'IDN ', 'ITN ',
558		'IEI-3', '', '',
559		'IELL3', 'IEL', 'IEL',
560		'IENNE$', 'IN', 'IN',
561		'IERRE$', 'IER', 'IER',
562		'IERZULAN---', 'IR ZU ', 'IR ZU ',
563		'IETTE$', 'IT', 'IT',
564		'IEU', 'IÖ', 'IÖ',
565		'IE<4', 'I', 'I',
566		'IGL-1', 'IK', None,
567		'IGHT3$', 'EIT', 'EIT',
568		'IGNI(EO)-', 'INI', 'INI',
569		'IGN(AEOU)-$', 'INI', 'INI',
570		'IHER(DGLKRT)--1', 'IHE', None,
571		'IHE(IUY)--', 'IH', None,
572		'IH(AIOÖUÜY)-', 'IH', None,
573		'IJ(AOU)-', 'I', 'I',
574		'IJ$', 'I', 'I',
575		'IJ<', 'EI', 'EI',
576		'IKOLE$', 'IKOL', 'IKUL',
577		'ILLAN(STZ)--4', 'ILIA', 'ILIA',
578		'ILLAR(DT)--4', 'ILIA', 'ILIA',
579		'IMSTAN----^', 'IM ', 'IN ',
580		'INDELERREGE------', 'INDL ', 'INTL ',
581		'INFRAGE-----^$', 'IN ', 'IN ',
582		'INTERN(AOU)-^', 'INTAN', 'INTAN',
583		'INVER-', 'INWE', 'INFE',
584		'ITI(AÄIOÖUÜ)-', 'IZI', 'IZI',
585		'IUSZ$', 'IUS', None,
586		'IUTZ$', 'IUS', None,
587		'IUZ$', 'IUS', None,
588		'IVER--<', 'IW', None,
589		'IVIER$', 'IWIE', 'IFIE',
590		'IV(ÄOÖUÜ)-', 'IW', None,
591		'IV<3', 'IW', None,
592		'IY2', 'I', None,
593		'I(ÈÉÊ)<4', 'I', 'I',
594		'JAVIE---<^', 'ZA', 'ZA',
595		'JEANS^$', 'JINS', 'INZ',
596		'JEANNE^$', 'IAN', 'IAN',
597		'JEAN-^', 'IA', 'IA',
598		'JER-^', 'IE', 'IE',
599		'JE(LMNST)-', 'IE', 'IE',
600		'JI^', 'JI', None,
601		'JOR(GK)^$', 'IÖRK', 'IÖRK',
602		'J', 'I', 'I',
603		'KC(ÄEIJ)-', 'X', 'X',
604		'KD', 'KT', None,
605		'KE(LMNRST)-3^', 'KE', 'KE',
606		'KG(AÄEILOÖRUÜY)-', 'K', None,
607		'KH<^', 'K', 'K',
608		'KIC$', 'KIZ', 'KIZ',
609		'KLE(LMNRST)-3^', 'KLE', 'KLE',
610		'KOTELE-^', 'KOTL', 'KUTL',
611		'KREAT-^', 'KREA', 'KREA',
612		'KRÜS(TZ)--^', 'KRI', None,
613		'KRYS(TZ)--^', 'KRI', None,
614		'KRY9^', 'KRÜ', None,
615		'KSCH---', 'K', 'K',
616		'KSH--', 'K', 'K',
617		'K(SßXZ)7', 'X', 'X', # implies 'KST' -> 'XT'
618		'KT\'S$', 'X', 'X',
619		'KTI(AIOU)-3', 'XI', 'XI',
620		'KT(SßXZ)', 'X', 'X',
621		'KY9^', 'KÜ', None,
622		'K\'S$', 'X', 'X',
623		'K´S$', 'X', 'X',
624		'LANGES$', ' LANGES', ' LANKEZ',
625		'LANGE$', ' LANGE', ' LANKE',
626		'LANG$', ' LANK', ' LANK',
627		'LARVE-', 'LARF', 'LARF',
628		'LD(SßZ)$', 'LS', 'LZ',
629		'LD\'S$', 'LS', 'LZ',
630		'LD´S$', 'LS', 'LZ',
631		'LEAND-^', 'LEAN', 'LEAN',
632		'LEERSTEHE-----^', 'LER ', 'LER ',
633		'LEICHBLEIB-----', 'LEICH ', 'LEIK ',
634		'LEICHLAUTE-----', 'LEICH ', 'LEIK ',
635		'LEIDERREGE------', 'LEIT ', 'LEIT ',
636		'LEIDGEPR----^', 'LEIT ', 'LEIT ',
637		'LEINSTEHE-----', 'LEIN ', 'LEIN ',
638		'LEL-', 'LE', 'LE',
639		'LE(MNRST)-3^', 'LE', 'LE',
640		'LETTE$', 'LET', 'LET',
641		'LFGNAG-', 'LFGAN', 'LFKAN',
642		'LICHERWEIS----', 'LICHA ', 'LIKA ',
643		'LIC$', 'LIZ', 'LIZ',
644		'LIVE^$', 'LEIF', 'LEIF',
645		'LT(SßZ)$', 'LS', 'LZ',
646		'LT\'S$', 'LS', 'LZ',
647		'LT´S$', 'LS', 'LZ',
648		'LUI(GS)--', 'LU', 'LU',
649		'LV(AIO)-', 'LW', None,
650		'LY9^', 'LÜ', None,
651		'LSTS$', 'LS', 'LZ',
652		'LZ(BDFGKLMNPQRSTVWX)-', 'LS', None,
653		'L(SßZ)$', 'LS', None,
654		'MAIR-<', 'MEI', 'NEI',
655		'MANAG-', 'MENE', 'NENE',
656		'MANUEL', 'MANUEL', None,
657		'MASSEU(RS)-', 'MASÖ', 'NAZÖ',
658		'MATCH', 'MESH', 'NEZ',
659		'MAURICE', 'MORIS', 'NURIZ',
660		'MBH^$', 'MBH', 'MBH',
661		'MB(ßZ)$', 'MS', None,
662		'MB(SßTZ)-', 'M', 'N',
663		'MCG9^', 'MAK', 'NAK',
664		'MC9^', 'MAK', 'NAK',
665		'MEMOIR-^', 'MEMOA', 'NENUA',
666		'MERHAVEN$', 'MAHAFN', None,
667		'ME(LMNRST)-3^', 'ME', 'NE',
668		'MEN(STZ)--3', 'ME', None,
669		'MEN$', 'MEN', None,
670		'MIGUEL-', 'MIGE', 'NIKE',
671		'MIKE^$', 'MEIK', 'NEIK',
672		'MITHILFE----^$', 'MIT H', 'NIT ',
673		'MN$', 'M', None,
674		'MN', 'N', 'N',
675		'MPJUTE-', 'MPUT', 'NBUT',
676		'MP(ßZ)$', 'MS', None,
677		'MP(SßTZ)-', 'M', 'N',
678		'MP(BDJLMNPQVW)-', 'MB', 'NB',
679		'MY9^', 'MÜ', None,
680		'M(ßZ)$', 'MS', None,
681		'M´G7^', 'MAK', 'NAK',
682		'M\'G7^', 'MAK', 'NAK',
683		'M´^', 'MAK', 'NAK',
684		'M\'^', 'MAK', 'NAK',
685		'M', None, 'N',
686		'NACH^^', 'NACH', 'NAK',
687		'NADINE', 'NADIN', 'NATIN',
688		'NAIV--', 'NA', 'NA',
689		'NAISE$', 'NESE', 'NEZE',
690		'NAUGENOMM------', 'NAU ', 'NAU ',
691		'NAUSOGUT$', 'NAUSO GUT', 'NAUZU KUT',
692		'NCH$', 'NSH', 'NZ',
693		'NCOISE$', 'SOA', 'ZUA',
694		'NCOIS$', 'SOA', 'ZUA',
695		'NDAR$', 'NDA', 'NTA',
696		'NDERINGEN------', 'NDE ', 'NTE ',
697		'NDRO(CDKTZ)-', 'NTRO', None,
698		'ND(BFGJLMNPQVW)-', 'NT', None,
699		'ND(SßZ)$', 'NS', 'NZ',
700		'ND\'S$', 'NS', 'NZ',
701		'ND´S$', 'NS', 'NZ',
702		'NEBEN^^', 'NEBN', 'NEBN',
703		'NENGELERN------', 'NEN ', 'NEN ',
704		'NENLERN(ET)---', 'NEN LE', 'NEN LE',
705		'NENZULERNE---', 'NEN ZU LE', 'NEN ZU LE',
706		'NE(LMNRST)-3^', 'NE', 'NE',
707		'NEN-3', 'NE', 'NE',
708		'NETTE$', 'NET', 'NET',
709		'NGU^^', 'NU', 'NU',
710		'NG(BDFJLMNPQRTVW)-', 'NK', 'NK',
711		'NH(AUO)-$', 'NI', 'NI',
712		'NICHTSAHNEN-----', 'NIX ', 'NIX ',
713		'NICHTSSAGE----', 'NIX ', 'NIX ',
714		'NICHTS^^', 'NIX', 'NIX',
715		'NICHT^^', 'NICHT', 'NIKT',
716		'NINE$', 'NIN', 'NIN',
717		'NON^^', 'NON', 'NUN',
718		'NOTLEIDE-----^', 'NOT ', 'NUT ',
719		'NOT^^', 'NOT', 'NUT',
720		'NTI(AIOU)-3', 'NZI', 'NZI',
721		'NTIEL--3', 'NZI', 'NZI',
722		'NT(SßZ)$', 'NS', 'NZ',
723		'NT\'S$', 'NS', 'NZ',
724		'NT´S$', 'NS', 'NZ',
725		'NYLON', 'NEILON', 'NEILUN',
726		'NY9^', 'NÜ', None,
727		'NSTZUNEH---', 'NST ZU ', 'NZT ZU ',
728		'NSZ-', 'NS', None,
729		'NSTS$', 'NS', 'NZ',
730		'NZ(BDFGKLMNPQRSTVWX)-', 'NS', None,
731		'N(SßZ)$', 'NS', None,
732		'OBERE-', 'OBER', None,
733		'OBER^^', 'OBA', 'UBA',
734		'OEU2', 'Ö', 'Ö',
735		'OE<2', 'Ö', 'Ö',
736		'OGL-', 'OK', None,
737		'OGNIE-', 'ONI', 'UNI',
738		'OGN(AEOU)-$', 'ONI', 'UNI',
739		'OH(AIOÖUÜY)-', 'OH', None,
740		'OIE$', 'Ö', 'Ö',
741		'OIRE$', 'OA', 'UA',
742		'OIR$', 'OA', 'UA',
743		'OIX', 'OA', 'UA',
744		'OI<3', 'EU', 'EU',
745		'OKAY^$', 'OKE', 'UKE',
746		'OLYN$', 'OLIN', 'ULIN',
747		'OO(DLMZ)-', 'U', None,
748		'OO$', 'U', None,
749		'OO-', '', '',
750		'ORGINAL-----', 'ORI', 'URI',
751		'OTI(AÄOÖUÜ)-', 'OZI', 'UZI',
752		'OUI^', 'WI', 'FI',
753		'OUILLE$', 'ULIE', 'ULIE',
754		'OU(DT)-^', 'AU', 'AU',
755		'OUSE$', 'AUS', 'AUZ',
756		'OUT-', 'AU', 'AU',
757		'OU', 'U', 'U',
758		'O(FV)$', 'AU', 'AU', # due to 'OW$' -> 'AU'
759		'OVER--<', 'OW', None,
760		'OV(AOU)-', 'OW', None,
761		'OW$', 'AU', 'AU',
762		'OWS$', 'OS', 'UZ',
763		'OJ(AÄEIOÖUÜ)--', 'O', 'U',
764		'OYER', 'OIA', None,
765		'OY(AÄEIOÖUÜ)--', 'O', 'U',
766		'O(JY)<', 'EU', 'EU',
767		'OZ$', 'OS', None,
768		'O´^', 'O', 'U',
769		'O\'^', 'O', 'U',
770		'O', None, 'U',
771		'PATIEN--^', 'PAZI', 'PAZI',
772		'PENSIO-^', 'PANSI', 'PANZI',
773		'PE(LMNRST)-3^', 'PE', 'PE',
774		'PFER-^', 'FE', 'FE',
775		'P(FH)<', 'F', 'F',
776		'PIC^$', 'PIK', 'PIK',
777		'PIC$', 'PIZ', 'PIZ',
778		'PIPELINE', 'PEIBLEIN', 'PEIBLEIN',
779		'POLYP-', 'POLÜ', None,
780		'POLY^^', 'POLI', 'PULI',
781		'PORTRAIT7', 'PORTRE', 'PURTRE',
782		'POWER7', 'PAUA', 'PAUA',
783		'PP(FH)--<', 'B', 'B',
784		'PP-', '', '',
785		'PRODUZ-^', 'PRODU', 'BRUTU',
786		'PRODUZI--', ' PRODU', ' BRUTU',
787		'PRIX^$', 'PRI', 'PRI',
788		'PS-^^', 'P', None,
789		'P(SßZ)^', None, 'Z',
790		'P(SßZ)$', 'BS', None,
791		'PT-^', '', '',
792		'PTI(AÄOÖUÜ)-3', 'BZI', 'BZI',
793		'PY9^', 'PÜ', None,
794		'P(AÄEIOÖRUÜY)-', 'P', 'P',
795		'P(ÀÁÂÃÅÈÉÊÌÍÎÙÚÛ)-', 'P', None,
796		'P.^', None, 'P.',
797		'P^', 'P', None,
798		'P', 'B', 'B',
799		'QI-', 'Z', 'Z',
800		'QUARANT--', 'KARA', 'KARA',
801		'QUE(LMNRST)-3', 'KWE', 'KFE',
802		'QUE$', 'K', 'K',
803		'QUI(NS)$', 'KI', 'KI',
804		'QUIZ7', 'KWIS', None,
805		'Q(UV)7', 'KW', 'KF',
806		'Q<', 'K', 'K',
807		'RADFAHR----', 'RAT ', 'RAT ',
808		'RAEFTEZEHRE-----', 'REFTE ', 'REFTE ',
809		'RCH', 'RCH', 'RK',
810		'REA(DU)---3^', 'R', None,
811		'REBSERZEUG------', 'REBS ', 'REBZ ',
812		'RECHERCH^', 'RESHASH', 'REZAZ',
813		'RECYCL--', 'RIZEI', 'RIZEI',
814		'RE(ALST)-3^', 'RE', None,
815		'REE$', 'RI', 'RI',
816		'RER$', 'RA', 'RA',
817		'RE(MNR)-4', 'RE', 'RE',
818		'RETTE$', 'RET', 'RET',
819		'REUZ$', 'REUZ', None,
820		'REW$', 'RU', 'RU',
821		'RH<^', 'R', 'R',
822		'RJA(MN)--', 'RI', 'RI',
823		'ROWD-^', 'RAU', 'RAU',
824		'RTEMONNAIE-', 'RTMON', 'RTNUN',
825		'RTI(AÄOÖUÜ)-3', 'RZI', 'RZI',
826		'RTIEL--3', 'RZI', 'RZI',
827		'RV(AEOU)-3', 'RW', None,
828		'RY(KN)-$', 'RI', 'RI',
829		'RY9^', 'RÜ', None,
830		'RÄFTEZEHRE-----', 'REFTE ', 'REFTE ',
831		'SAISO-^', 'SES', 'ZEZ',
832		'SAFE^$', 'SEIF', 'ZEIF',
833		'SAUCE-^', 'SOS', 'ZUZ',
834		'SCHLAGGEBEN-----<', 'SHLAK ', 'ZLAK ',
835		'SCHSCH---7', '', '',
836		'SCHTSCH', 'SH', 'Z',
837		'SC(HZ)<', 'SH', 'Z',
838		'SC', 'SK', 'ZK',
839		'SELBSTST--7^^', 'SELB', 'ZELB',
840		'SELBST7^^', 'SELBST', 'ZELBZT',
841		'SERVICE7^', 'SÖRWIS', 'ZÖRFIZ',
842		'SERVI-^', 'SERW', None,
843		'SE(LMNRST)-3^', 'SE', 'ZE',
844		'SETTE$', 'SET', 'ZET',
845		'SHP-^', 'S', 'Z',
846		'SHST', 'SHT', 'ZT',
847		'SHTSH', 'SH', 'Z',
848		'SHT', 'ST', 'Z',
849		'SHY9^', 'SHÜ', None,
850		'SH^^', 'SH', None,
851		'SH3', 'SH', 'Z',
852		'SICHERGEGAN-----^', 'SICHA ', 'ZIKA ',
853		'SICHERGEHE----^', 'SICHA ', 'ZIKA ',
854		'SICHERGESTEL------^', 'SICHA ', 'ZIKA ',
855		'SICHERSTELL-----^', 'SICHA ', 'ZIKA ',
856		'SICHERZU(GS)--^', 'SICHA ZU ', 'ZIKA ZU ',
857		'SIEGLI-^', 'SIKL', 'ZIKL',
858		'SIGLI-^', 'SIKL', 'ZIKL',
859		'SIGHT', 'SEIT', 'ZEIT',
860		'SIGN', 'SEIN', 'ZEIN',
861		'SKI(NPZ)-', 'SKI', 'ZKI',
862		'SKI<^', 'SHI', 'ZI',
863		'SODASS^$', 'SO DAS', 'ZU TAZ',
864		'SODAß^$', 'SO DAS', 'ZU TAZ',
865		'SOGENAN--^', 'SO GEN', 'ZU KEN',
866		'SOUND-', 'SAUN', 'ZAUN',
867		'STAATS^^', 'STAZ', 'ZTAZ',
868		'STADT^^', 'STAT', 'ZTAT',
869		'STANDE$', ' STANDE', ' ZTANTE',
870		'START^^', 'START', 'ZTART',
871		'STAURANT7', 'STORAN', 'ZTURAN',
872		'STEAK-', 'STE', 'ZTE',
873		'STEPHEN-^$', 'STEW', None,
874		'STERN', 'STERN', None,
875		'STRAF^^', 'STRAF', 'ZTRAF',
876		'ST\'S$', 'Z', 'Z',
877		'ST´S$', 'Z', 'Z',
878		'STST--', '', '',
879		'STS(ACEÈÉÊHIÌÍÎOUÄÜÖ)--', 'ST', 'ZT',
880		'ST(SZ)', 'Z', 'Z',
881		'SPAREN---^', 'SPA', 'ZPA',
882		'SPAREND----', ' SPA', ' ZPA',
883		'S(PTW)-^^', 'S', None,
884		'SP', 'SP', None,
885		'STYN(AE)-$', 'STIN', 'ZTIN',
886		'ST', 'ST', 'ZT',
887		'SUITE<', 'SIUT', 'ZIUT',
888		'SUKE--$', 'S', 'Z',
889		'SURF(EI)-', 'SÖRF', 'ZÖRF',
890		'SV(AEÈÉÊIÌÍÎOU)-<^', 'SW', None,
891		'SYB(IY)--^', 'SIB', None,
892		'SYL(KVW)--^', 'SI', None,
893		'SY9^', 'SÜ', None,
894		'SZE(NPT)-^', 'ZE', 'ZE',
895		'SZI(ELN)-^', 'ZI', 'ZI',
896		'SZCZ<', 'SH', 'Z',
897		'SZT<', 'ST', 'ZT',
898		'SZ<3', 'SH', 'Z',
899		'SÜL(KVW)--^', 'SI', None,
900		'S', None, 'Z',
901		'TCH', 'SH', 'Z',
902		'TD(AÄEIOÖRUÜY)-', 'T', None,
903		'TD(ÀÁÂÃÅÈÉÊËÌÍÎÏÒÓÔÕØÙÚÛÝŸ)-', 'T', None,
904		'TEAT-^', 'TEA', 'TEA',
905		'TERRAI7^', 'TERA', 'TERA',
906		'TE(LMNRST)-3^', 'TE', 'TE',
907		'TH<', 'T', 'T',
908		'TICHT-', 'TIK', 'TIK',
909		'TICH$', 'TIK', 'TIK',
910		'TIC$', 'TIZ', 'TIZ',
911		'TIGGESTELL-------', 'TIK ', 'TIK ',
912		'TIGSTELL-----', 'TIK ', 'TIK ',
913		'TOAS-^', 'TO', 'TU',
914		'TOILET-', 'TOLE', 'TULE',
915		'TOIN-', 'TOA', 'TUA',
916		'TRAECHTI-^', 'TRECHT', 'TREKT',
917		'TRAECHTIG--', ' TRECHT', ' TREKT',
918		'TRAINI-', 'TREN', 'TREN',
919		'TRÄCHTI-^', 'TRECHT', 'TREKT',
920		'TRÄCHTIG--', ' TRECHT', ' TREKT',
921		'TSCH', 'SH', 'Z',
922		'TSH', 'SH', 'Z',
923		'TST', 'ZT', 'ZT',
924		'T(Sß)', 'Z', 'Z',
925		'TT(SZ)--<', '', '',
926		'TT9', 'T', 'T',
927		'TV^$', 'TV', 'TV',
928		'TX(AEIOU)-3', 'SH', 'Z',
929		'TY9^', 'TÜ', None,
930		'TZ-', '', '',
931		'T\'S3$', 'Z', 'Z',
932		'T´S3$', 'Z', 'Z',
933		'UEBEL(GNRW)-^^', 'ÜBL ', 'IBL ',
934		'UEBER^^', 'ÜBA', 'IBA',
935		'UE2', 'Ü', 'I',
936		'UGL-', 'UK', None,
937		'UH(AOÖUÜY)-', 'UH', None,
938		'UIE$', 'Ü', 'I',
939		'UM^^', 'UM', 'UN',
940		'UNTERE--3', 'UNTE', 'UNTE',
941		'UNTER^^', 'UNTA', 'UNTA',
942		'UNVER^^', 'UNFA', 'UNFA',
943		'UN^^', 'UN', 'UN',
944		'UTI(AÄOÖUÜ)-', 'UZI', 'UZI',
945		'UVE-4', 'UW', None,
946		'UY2', 'UI', None,
947		'UZZ', 'AS', 'AZ',
948		'VACL-^', 'WAZ', 'FAZ',
949		'VAC$', 'WAZ', 'FAZ',
950		'VAN DEN ^', 'FANDN', 'FANTN',
951		'VANES-^', 'WANE', None,
952		'VATRO-', 'WATR', None,
953		'VA(DHJNT)--^', 'F', None,
954		'VEDD-^', 'FE', 'FE',
955		'VE(BEHIU)--^', 'F', None,
956		'VEL(BDLMNT)-^', 'FEL', None,
957		'VENTZ-^', 'FEN', None,
958		'VEN(NRSZ)-^', 'FEN', None,
959		'VER(AB)-^$', 'WER', None,
960		'VERBAL^$', 'WERBAL', None,
961		'VERBAL(EINS)-^', 'WERBAL', None,
962		'VERTEBR--', 'WERTE', None,
963		'VEREIN-----', 'F', None,
964		'VEREN(AEIOU)-^', 'WEREN', None,
965		'VERIFI', 'WERIFI', None,
966		'VERON(AEIOU)-^', 'WERON', None,
967		'VERSEN^', 'FERSN', 'FAZN',
968		'VERSIERT--^', 'WERSI', None,
969		'VERSIO--^', 'WERS', None,
970		'VERSUS', 'WERSUS', None,
971		'VERTI(GK)-', 'WERTI', None,
972		'VER^^', 'FER', 'FA',
973		'VERSPRECHE-------', ' FER', ' FA',
974		'VER$', 'WA', None,
975		'VER', 'FA', 'FA',
976		'VET(HT)-^', 'FET', 'FET',
977		'VETTE$', 'WET', 'FET',
978		'VE^', 'WE', None,
979		'VIC$', 'WIZ', 'FIZ',
980		'VIELSAGE----', 'FIL ', 'FIL ',
981		'VIEL', 'FIL', 'FIL',
982		'VIEW', 'WIU', 'FIU',
983		'VILL(AE)-', 'WIL', None,
984		'VIS(ACEIKUVWZ)-<^', 'WIS', None,
985		'VI(ELS)--^', 'F', None,
986		'VILLON--', 'WILI', 'FILI',
987		'VIZE^^', 'FIZE', 'FIZE',
988		'VLIE--^', 'FL', None,
989		'VL(AEIOU)--', 'W', None,
990		'VOKA-^', 'WOK', None,
991		'VOL(ATUVW)--^', 'WO', None,
992		'VOR^^', 'FOR', 'FUR',
993		'VR(AEIOU)--', 'W', None,
994		'VV9', 'W', None,
995		'VY9^', 'WÜ', 'FI',
996		'V(ÜY)-', 'W', None,
997		'V(ÀÁÂÃÅÈÉÊÌÍÎÙÚÛ)-', 'W', None,
998		'V(AEIJLRU)-<', 'W', None,
999		'V.^', 'V.', None,
1000		'V<', 'F', 'F',
1001		'WEITERENTWI-----^', 'WEITA ', 'FEITA ',
1002		'WEITREICH-----^', 'WEIT ', 'FEIT ',
1003		'WEITVER^', 'WEIT FER', 'FEIT FA',
1004		'WE(LMNRST)-3^', 'WE', 'FE',
1005		'WER(DST)-', 'WER', None,
1006		'WIC$', 'WIZ', 'FIZ',
1007		'WIEDERU--', 'WIDE', 'FITE',
1008		'WIEDER^$', 'WIDA', 'FITA',
1009		'WIEDER^^', 'WIDA ', 'FITA ',
1010		'WIEVIEL', 'WI FIL', 'FI FIL',
1011		'WISUEL', 'WISUEL', None,
1012		'WR-^', 'W', None,
1013		'WY9^', 'WÜ', 'FI',
1014		'W(BDFGJKLMNPQRSTZ)-', 'F', None,
1015		'W$', 'F', None,
1016		'W', None, 'F',
1017		'X<^', 'Z', 'Z',
1018		'XHAVEN$', 'XAFN', None,
1019		'X(CSZ)', 'X', 'X',
1020		'XTS(CH)--', 'XT', 'XT',
1021		'XT(SZ)', 'Z', 'Z',
1022		'YE(LMNRST)-3^', 'IE', 'IE',
1023		'YE-3', 'I', 'I',
1024		'YOR(GK)^$', 'IÖRK', 'IÖRK',
1025		'Y(AOU)-<7', 'I', 'I',
1026		'Y(BKLMNPRSTX)-1', 'Ü', None,
1027		'YVES^$', 'IF', 'IF',
1028		'YVONNE^$', 'IWON', 'IFUN',
1029		'Y.^', 'Y.', None,
1030		'Y', 'I', 'I',
1031		'ZC(AOU)-', 'SK', 'ZK',
1032		'ZE(LMNRST)-3^', 'ZE', 'ZE',
1033		'ZIEJ$', 'ZI', 'ZI',
1034		'ZIGERJA(HR)-3', 'ZIGA IA', 'ZIKA IA',
1035		'ZL(AEIOU)-', 'SL', None,
1036		'ZS(CHT)--', '', '',
1037		'ZS', 'SH', 'Z',
1038		'ZUERST', 'ZUERST', 'ZUERST',
1039		'ZUGRUNDE^$', 'ZU GRUNDE', 'ZU KRUNTE',
1040		'ZUGRUNDE', 'ZU GRUNDE ', 'ZU KRUNTE ',
1041		'ZUGUNSTEN', 'ZU GUNSTN', 'ZU KUNZTN',
1042		'ZUHAUSE-', 'ZU HAUS', 'ZU AUZ',
1043		'ZULASTEN^$', 'ZU LASTN', 'ZU LAZTN',
1044		'ZURUECK^^', 'ZURÜK', 'ZURIK',
1045		'ZURZEIT', 'ZUR ZEIT', 'ZUR ZEIT',
1046		'ZURÜCK^^', 'ZURÜK', 'ZURIK',
1047		'ZUSTANDE', 'ZU STANDE', 'ZU ZTANTE',
1048		'ZUTAGE', 'ZU TAGE', 'ZU TAKE',
1049		'ZUVER^^', 'ZUFA', 'ZUFA',
1050		'ZUVIEL', 'ZU FIL', 'ZU FIL',
1051		'ZUWENIG', 'ZU WENIK', 'ZU FENIK',
1052		'ZY9^', 'ZÜ', None,
1053		'ZYK3$', 'ZIK', None,
1054		'Z(VW)7^', 'SW', None,
1055		None, None, None
1056		# fmt: on
1057		)
1058
1059	1	_upper_trans = dict(
1060		zip(
1061		(
1062		ord(_)
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report The variable `_` does not seem to be defined. Loading history...
1063		for _ in 'abcdefghijklmnopqrstuvwxyzàáâãåäæ'
1064		+ 'çðèéêëìíîïñòóôõöøœšßþùúûüýÿ'
1065		),
1066		'ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÅÄÆ'
1067		+ 'ÇÐÈÉÊËÌÍÎÏÑÒÓÔÕÖØŒŠßÞÙÚÛÜÝŸ',
1068		)
1069		)
1070
1071	1	def encode(self, word, mode=1, lang='de'):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'encode' method Loading history...
1072		"""Return the phonet code for a word.
1073
1074		Args:
1075		word (str): The word to transform
1076		mode (int): The ponet variant to employ (1 or 2)
1077		lang (str): 'de' (default) for German, 'none' for no language
1078
1079		Returns:
1080		str: The phonet value
1081
1082		Examples:
1083		>>> pe = Phonet()
1084		>>> pe.encode('Christopher')
1085		'KRISTOFA'
1086		>>> pe.encode('Niall')
1087		'NIAL'
1088		>>> pe.encode('Smith')
1089		'SMIT'
1090		>>> pe.encode('Schmidt')
1091		'SHMIT'
1092
1093		>>> pe.encode('Christopher', mode=2)
1094		'KRIZTUFA'
1095		>>> pe.encode('Niall', mode=2)
1096		'NIAL'
1097		>>> pe.encode('Smith', mode=2)
1098		'ZNIT'
1099		>>> pe.encode('Schmidt', mode=2)
1100		'ZNIT'
1101
1102		>>> pe.encode('Christopher', lang='none')
1103		'CHRISTOPHER'
1104		>>> pe.encode('Niall', lang='none')
1105		'NIAL'
1106		>>> pe.encode('Smith', lang='none')
1107		'SMITH'
1108		>>> pe.encode('Schmidt', lang='none')
1109		'SCHMIDT'
1110
1111		"""
1112	1	phonet_hash = Counter()
1113	1	alpha_pos = Counter()
1114
1115	1	phonet_hash_1 = Counter()
1116	1	phonet_hash_2 = Counter()
1117
1118	1	def _initialize_phonet(lang):
1119		"""Initialize phonet variables.
1120
1121		Args:
1122		lang (str): Language to use for rules
1123
1124		"""
1125	1	if lang == 'none':
1126	1	_phonet_rules = self._rules_no_lang
1127		else:
1128	1	_phonet_rules = self._rules_german
1129
1130	1	phonet_hash[''] = -1
1131
1132		# German and international umlauts
1133	1	for j in {
1134		'À',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1135		'Á',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1136		'Â',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1137		'Ã',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1138		'Ä',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1139		'Å',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1140		'Æ',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1141		'Ç',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1142		'È',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1143		'É',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1144		'Ê',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1145		'Ë',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1146		'Ì',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1147		'Í',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1148		'Î',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1149		'Ï',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1150		'Ð',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1151		'Ñ',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1152		'Ò',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1153		'Ó',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1154		'Ô',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1155		'Õ',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1156		'Ö',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1157		'Ø',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1158		'Ù',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1159		'Ú',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1160		'Û',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1161		'Ü',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1162		'Ý',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1163		'Þ',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1164		'ß',
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1165		'Œ',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1166		'Š',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1167		'Ÿ',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1168		}:
1169	1	alpha_pos[j] = 1
1170	1	phonet_hash[j] = -1
1171
1172		# "normal" letters ('A'-'Z')
1173	1	for i, j in enumerate('ABCDEFGHIJKLMNOPQRSTUVWXYZ'):
1174	1	alpha_pos[j] = i + 2
1175	1	phonet_hash[j] = -1
1176
1177	1	for i in range(26):
1178	1	for j in range(28):
1179	1	phonet_hash_1[i, j] = -1
1180	1	phonet_hash_2[i, j] = -1
1181
1182		# for each phonetc rule
1183	1	for i in range(len(_phonet_rules)):
		0 ignored issues – show unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Consider using enumerate instead of iterating with range and len Loading history... unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Too many nested blocks (6/5) Loading history...
1184	1	rule = _phonet_rules[i]
1185
1186	1	if rule and i % 3 == 0:
1187		# calculate first hash value
1188	1	k = _phonet_rules[i][0]
1189
1190	1	if phonet_hash[k] < 0 and (
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report The variable `phonet_hash` does not seem to be defined. Loading history...
1191		_phonet_rules[i + 1] or _phonet_rules[i + 2]
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1192		):
1193	1	phonet_hash[k] = i
1194
1195		# calculate second hash values
1196	1	if k and alpha_pos[k] >= 2:
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report The variable `alpha_pos` does not seem to be defined. Loading history...
1197	1	k = alpha_pos[k]
1198
1199	1	j = k - 2
1200	1	rule = rule[1:]
1201
1202	1	if not rule:
1203	1	rule = ' '
1204	1	elif rule[0] == '(':
1205	1	rule = rule[1:]
1206		else:
1207	1	rule = rule[0]
1208
1209	1	while rule and (rule[0] != ')'):
1210	1	k = alpha_pos[rule[0]]
1211
1212	1	if k > 0:
1213		# add hash value for this letter
1214	1	if phonet_hash_1[j, k] < 0:
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report The variable `phonet_hash_1` does not seem to be defined. Loading history...
1215	1	phonet_hash_1[j, k] = i
1216	1	phonet_hash_2[j, k] = i
1217
1218	1	if phonet_hash_2[j, k] >= (i - 30):
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report The variable `phonet_hash_2` does not seem to be defined. Loading history...
1219	1	phonet_hash_2[j, k] = i
1220		else:
1221	1	k = -1
1222
1223	1	if k <= 0:
1224		# add hash value for all letters
1225	1	if phonet_hash_1[j, 0] < 0:
1226	1	phonet_hash_1[j, 0] = i
1227
1228	1	phonet_hash_2[j, 0] = i
1229
1230	1	rule = rule[1:]
1231
1232	1	def _phonet(term, mode, lang):
		0 ignored issues – show Comprehensibility introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report This function exceeds the maximum number of variables (29/15). Loading history...
1233		"""Return the phonet coded form of a term.
1234
1235		Args:
1236		term (str): Term to transform
1237		mode (int): The ponet variant to employ (1 or 2)
1238		lang (str): 'de' (default) for German, 'none' for no language
1239
1240		Returns:
1241		str: The phonet value
1242
1243		"""
1244	1	if lang == 'none':
1245	1	_phonet_rules = self._rules_no_lang
1246		else:
1247	1	_phonet_rules = self._rules_german
1248
1249	1	char0 = ''
1250	1	dest = term
1251
1252	1	if not term:
1253	1	return ''
1254
1255	1	term_length = len(term)
1256
1257		# convert input string to upper-case
1258	1	src = term.translate(self._upper_trans)
1259
1260		# check "src"
1261	1	i = 0
1262	1	j = 0
1263	1	zeta = 0
1264
1265	1	while i < len(src):
		0 ignored issues – show unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Too many nested blocks (6/5) Loading history... unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Too many nested blocks (8/5) Loading history... unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Too many nested blocks (9/5) Loading history... unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Too many nested blocks (7/5) Loading history...
1266	1	char = src[i]
1267
1268	1	pos = alpha_pos[char]
1269
1270	1	if pos >= 2:
1271	1	xpos = pos - 2
1272
1273	1	if i + 1 == len(src):
1274	1	pos = alpha_pos['']
1275		else:
1276	1	pos = alpha_pos[src[i + 1]]
1277
1278	1	start1 = phonet_hash_1[xpos, pos]
1279	1	start2 = phonet_hash_1[xpos, 0]
1280	1	end1 = phonet_hash_2[xpos, pos]
1281	1	end2 = phonet_hash_2[xpos, 0]
1282
1283		# preserve rule priorities
1284	1	if (start2 >= 0) and ((start1 < 0) or (start2 < start1)):
1285	1	pos = start1
1286	1	start1 = start2
1287	1	start2 = pos
1288	1	pos = end1
1289	1	end1 = end2
1290	1	end2 = pos
1291
1292	1	if (end1 >= start2) and (start2 >= 0):
1293	1	if end2 > end1:
1294	1	end1 = end2
1295
1296	1	start2 = -1
1297	1	end2 = -1
1298		else:
1299	1	pos = phonet_hash[char]
1300	1	start1 = pos
1301	1	end1 = 10000
1302	1	start2 = -1
1303	1	end2 = -1
1304
1305	1	pos = start1
1306	1	zeta0 = 0
1307
1308	1	if pos >= 0:
1309		# check rules for this char
1310	1	while (_phonet_rules[pos] is None) or (
1311		_phonet_rules[pos][0] == char
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1312		):
1313	1	if pos > end1:
1314	1	if start2 > 0:
1315	1	pos = start2
1316	1	start1 = start2
1317	1	start2 = -1
1318	1	end1 = end2
1319	1	end2 = -1
1320	1	continue
1321
1322	1	break
1323
1324	1	if (_phonet_rules[pos] is None) or (
1325		_phonet_rules[pos + mode] is None
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1326		):
1327		# no conversion rule available
1328	1	pos += 3
1329	1	continue
1330
1331		# check whole string
1332	1	matches = 1 # number of matching letters
1333	1	priority = 5 # default priority
1334	1	rule = _phonet_rules[pos]
1335	1	rule = rule[1:]
1336
1337	1	while (
1338		rule
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1339		and (len(src) > (i + matches))
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1340		and (src[i + matches] == rule[0])
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1341		and not rule[0].isdigit()
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1342		and (rule not in '(-<^$')
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1343		):
1344	1	matches += 1
1345	1	rule = rule[1:]
1346
1347	1	if rule and (rule[0] == '('):
1348		# check an array of letters
1349	1	if (
1350		(len(src) > (i + matches))
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1351		and src[i + matches].isalpha()
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1352		and (src[i + matches] in rule[1:])
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1353		):
1354	1	matches += 1
1355
1356	1	while rule and rule[0] != ')':
1357	1	rule = rule[1:]
1358
1359		# if rule[0] == ')':
1360	1	rule = rule[1:]
1361
1362	1	if rule:
1363	1	priority0 = ord(rule[0])
1364		else:
1365	1	priority0 = 0
1366
1367	1	matches0 = matches
1368
1369	1	while rule and rule[0] == '-' and matches > 1:
1370	1	matches -= 1
1371	1	rule = rule[1:]
1372
1373	1	if rule and rule[0] == '<':
1374	1	rule = rule[1:]
1375
1376	1	if rule and rule[0].isdigit():
1377		# read priority
1378	1	priority = int(rule[0])
1379	1	rule = rule[1:]
1380
1381	1	if rule and rule[0:2] == '^^':
1382	1	rule = rule[1:]
1383
1384	1	if (
1385		not rule
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history... best-practice introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Too many boolean expressions in if statement (12/5) Loading history...
1386		or (
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1387		(rule[0] == '^')
1388		and ((i == 0) or not src[i - 1].isalpha())
1389		and (
1390		(rule[1:2] != '$')
1391		or (
1392		not (
1393		src[
1394		i + matches0 : i + matches0 + 1
1395		].isalpha()
1396		)
1397		and (
1398		src[
1399		i + matches0 : i + matches0 + 1
1400		]
1401		!= '.'
1402		)
1403		)
1404		)
1405		)
1406		or (
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1407		(rule[0] == '$')
1408		and (i > 0)
1409		and src[i - 1].isalpha()
1410		and (
1411		(
1412		not src[
1413		i + matches0 : i + matches0 + 1
1414		].isalpha()
1415		)
1416		and (
1417		src[i + matches0 : i + matches0 + 1]
1418		!= '.'
1419		)
1420		)
1421		)
1422		):
1423		# look for continuation, if:
1424		# matches > 1 und NO '-' in first string */
1425	1	pos0 = -1
1426
1427	1	start3 = 0
1428	1	start4 = 0
1429	1	end3 = 0
1430	1	end4 = 0
1431
1432	1	if (
1433		(matches > 1)
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1434		and src[i + matches : i + matches + 1]
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1435		and (priority0 != ord('-'))
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1436		):
1437	1	char0 = src[i + matches - 1]
1438	1	pos0 = alpha_pos[char0]
1439
1440	1	if pos0 >= 2 and src[i + matches]:
1441	1	xpos = pos0 - 2
1442	1	pos0 = alpha_pos[src[i + matches]]
1443	1	start3 = phonet_hash_1[xpos, pos0]
1444	1	start4 = phonet_hash_1[xpos, 0]
1445	1	end3 = phonet_hash_2[xpos, pos0]
1446	1	end4 = phonet_hash_2[xpos, 0]
1447
1448		# preserve rule priorities
1449	1	if (start4 >= 0) and (
1450		(start3 < 0) or (start4 < start3)
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1451		):
1452	1	pos0 = start3
1453	1	start3 = start4
1454	1	start4 = pos0
1455	1	pos0 = end3
1456	1	end3 = end4
1457	1	end4 = pos0
1458
1459	1	if (end3 >= start4) and (start4 >= 0):
1460	1	if end4 > end3:
1461	1	end3 = end4
1462
1463	1	start4 = -1
1464	1	end4 = -1
1465		else:
1466	1	pos0 = phonet_hash[char0]
1467	1	start3 = pos0
1468	1	end3 = 10000
1469	1	start4 = -1
1470	1	end4 = -1
1471
1472	1	pos0 = start3
1473
1474		# check continuation rules for src[i+matches]
1475	1	if pos0 >= 0:
1476	1	while (_phonet_rules[pos0] is None) or (
1477		_phonet_rules[pos0][0] == char0
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history... introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Value '_phonet_rules[pos0]' is unsubscriptable Loading history...
1478		):
1479	1	if pos0 > end3:
1480	1	if start4 > 0:
1481	1	pos0 = start4
1482	1	start3 = start4
1483	1	start4 = -1
1484	1	end3 = end4
1485	1	end4 = -1
1486	1	continue
1487
1488	1	priority0 = -1
1489
1490		# important
1491	1	break
1492
1493	1	if (_phonet_rules[pos0] is None) or (
1494		_phonet_rules[pos0 + mode] is None
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1495		):
1496		# no conversion rule available
1497	1	pos0 += 3
1498	1	continue
1499
1500		# check whole string
1501	1	matches0 = matches
1502	1	priority0 = 5
1503	1	rule = _phonet_rules[pos0]
1504	1	rule = rule[1:]
		0 ignored issues – show introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Value 'rule' is unsubscriptable Loading history...
1505
1506	1	while (
1507		rule
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1508		and (
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1509		src[
1510		i + matches0 : i + matches0 + 1
1511		]
1512		== rule[0]
1513		)
1514		and (
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1515		not rule[0].isdigit()
1516		or (rule in '(-<^$')
1517		)
1518		):
1519	1	matches0 += 1
1520	1	rule = rule[1:]
1521
1522	1	if rule and rule[0] == '(':
1523		# check an array of letters
1524	1	if src[
1525		i + matches0 : i + matches0 + 1
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1526		].isalpha() and (
1527		src[i + matches0] in rule[1:]
1528		):
1529	1	matches0 += 1
1530
1531	1	while rule and rule[0] != ')':
1532	1	rule = rule[1:]
1533
1534		# if rule[0] == ')':
1535	1	rule = rule[1:]
1536
1537	1	while rule and rule[0] == '-':
1538		# "matches0" is NOT decremented
1539		# because of
1540		# "if (matches0 == matches)"
1541	1	rule = rule[1:]
1542
1543	1	if rule and rule[0] == '<':
1544	1	rule = rule[1:]
1545
1546	1	if rule and rule[0].isdigit():
1547	1	priority0 = int(rule[0])
1548	1	rule = rule[1:]
1549
1550	1	if (
1551		not rule
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1552		or
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1553		# rule == '^' is not possible here
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1554		(
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1555		(rule[0] == '$')
1556		and not src[
1557		i + matches0 : i + matches0 + 1
1558		].isalpha()
1559		and (
1560		src[
1561		i
1562		+ matches0 : i
1563		+ matches0
1564		+ 1
1565		]
1566		!= '.'
1567		)
1568		)
1569		):
1570	1	if matches0 == matches:
1571		# this is only a partial string
1572	1	pos0 += 3
1573	1	continue
1574
1575	1	if priority0 < priority:
1576		# priority is too low
1577	1	pos0 += 3
1578	1	continue
1579
1580		# continuation rule found
1581	1	break
1582
1583	1	pos0 += 3
1584
1585		# end of "while"
1586	1	if (priority0 >= priority) and (
1587		(_phonet_rules[pos0] is not None)
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1588		and (_phonet_rules[pos0][0] == char0)
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history... introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Value '_phonet_rules[pos0]' is unsubscriptable Loading history...
1589		):
1590
1591	1	pos += 3
1592	1	continue
1593
1594		# replace string
1595	1	if _phonet_rules[pos] and (
1596		'<' in _phonet_rules[pos][1:]
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1597		):
1598	1	priority0 = 1
1599		else:
1600	1	priority0 = 0
1601
1602	1	rule = _phonet_rules[pos + mode]
1603
1604	1	if (priority0 == 1) and (zeta == 0):
1605		# rule with '<' is applied
1606	1	if (
1607		(j > 0)
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1608		and rule
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1609		and (
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1610		(dest[j - 1] == char)
1611		or (dest[j - 1] == rule[0])
1612		)
1613		):
1614	1	j -= 1
1615
1616	1	zeta0 = 1
1617	1	zeta += 1
1618	1	matches0 = 0
1619
1620	1	while rule and src[i + matches0]:
1621	1	src = (
1622		src[0 : i + matches0]
1623		+ rule[0]
1624		+ src[i + matches0 + 1 :]
1625		)
1626	1	matches0 += 1
1627	1	rule = rule[1:]
1628
1629	1	if matches0 < matches:
1630	1	src = (
1631		src[0 : i + matches0]
1632		+ src[i + matches :]
1633		)
1634
1635	1	char = src[i]
1636		else:
1637	1	i = i + matches - 1
1638	1	zeta = 0
1639
1640	1	while len(rule) > 1:
1641	1	if (j == 0) or (dest[j - 1] != rule[0]):
1642	1	dest = (
1643		dest[0:j]
1644		+ rule[0]
1645		+ dest[min(len(dest), j + 1) :]
1646		)
1647	1	j += 1
1648
1649	1	rule = rule[1:]
1650
1651		# new "current char"
1652	1	if not rule:
1653	1	rule = ''
1654	1	char = ''
1655		else:
1656	1	char = rule[0]
1657
1658	1	if (
1659		_phonet_rules[pos]
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1660		and '^^' in _phonet_rules[pos][1:]
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1661		):
1662	1	if char:
1663	1	dest = (
1664		dest[0:j]
1665		+ char
1666		+ dest[min(len(dest), j + 1) :]
1667		)
1668	1	j += 1
1669
1670	1	src = src[i + 1 :]
1671	1	i = 0
1672	1	zeta0 = 1
1673
1674	1	break
1675
1676	1	pos += 3
1677
1678	1	if pos > end1 and start2 > 0:
1679	1	pos = start2
1680	1	start1 = start2
1681	1	end1 = end2
1682	1	start2 = -1
1683	1	end2 = -1
1684
1685	1	if zeta0 == 0:
1686	1	if char and ((j == 0) or (dest[j - 1] != char)):
1687		# delete multiple letters only
1688	1	dest = (
1689		dest[0:j] + char + dest[min(j + 1, term_length) :]
1690		)
1691	1	j += 1
1692
1693	1	i += 1
1694	1	zeta = 0
1695
1696	1	dest = dest[0:j]
1697
1698	1	return dest
1699
1700	1	_initialize_phonet(lang)
1701
1702	1	word = unicode_normalize('NFKC', text_type(word))
1703	1	return _phonet(word, mode, lang)
1704
1705
1706	1	def phonet(word, mode=1, lang='de'):
1707		"""Return the phonet code for a word.
1708
1709		This is a wrapper for :py:meth:`Phonet.encode`.
1710
1711		Args:
1712		word (str): The word to transform
1713		mode (int): The ponet variant to employ (1 or 2)
1714		lang (str): 'de' (default) for German, 'none' for no language
1715
1716		Returns:
1717		str: The phonet value
1718
1719		Examples:
1720		>>> phonet('Christopher')
1721		'KRISTOFA'
1722		>>> phonet('Niall')
1723		'NIAL'
1724		>>> phonet('Smith')
1725		'SMIT'
1726		>>> phonet('Schmidt')
1727		'SHMIT'
1728
1729		>>> phonet('Christopher', mode=2)
1730		'KRIZTUFA'
1731		>>> phonet('Niall', mode=2)
1732		'NIAL'
1733		>>> phonet('Smith', mode=2)
1734		'ZNIT'
1735		>>> phonet('Schmidt', mode=2)
1736		'ZNIT'
1737
1738		>>> phonet('Christopher', lang='none')
1739		'CHRISTOPHER'
1740		>>> phonet('Niall', lang='none')
1741		'NIAL'
1742		>>> phonet('Smith', lang='none')
1743		'SMITH'
1744		>>> phonet('Schmidt', lang='none')
1745		'SCHMIDT'
1746
1747		"""
1748	1	return Phonet().encode(word, mode, lang)
1749
1750
1751		if __name__ == '__main__':
1752		import doctest
1753
1754		doctest.testmod()
1755

chrislit / abydos

Pull Request — master (#141)

abydos.phonetic._phonet.Phonet.encode() F

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like