abydos.phonetic._Phonet.Phonet.encode() - Code Metrics - Inspection of "0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#141)

by Chris

created 2018-11-10 01:31 UTC

abydos.phonetic._Phonet.Phonet.encode() F

↳ Parent: abydos.phonetic._Phonet

Complexity

Conditions

142

Size

Total Lines	633
Code Lines	381

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	269
CRAP Score	142

Importance

Changes

Metric	Value
cc	142
eloc	381
nop	4
dl	0
loc	633
ccs	269
cts	269
cp	1
crap	142
rs	0
c	0
b	0
f	0

How to fix Long Method Complexity

1		# -- coding: utf-8 --
		0 ignored issues – show coding-style introduced 2018-11-10 01:42 UTC by Report Bug Copy Issue Report Too many lines in module (1758/1000) Loading history... Coding Style Naming introduced 2018-11-10 01:42 UTC by Report Bug Copy Issue Report The name `_Phonet` does not conform to the module naming conventions (`(([a-z_][a-z0-9_]*)\|([A-Z][a-zA-Z0-9]+))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._Phonet.
20
21		phonet algorithm (a.k.a. Hannoveraner Phonetik), intended chiefly for German
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from collections import Counter
32	1	from unicodedata import normalize as unicode_normalize
33
34	1	from six import text_type
35	1	from six.moves import range
36
37	1	from ._Phonetic import Phonetic
38
39	1	__all__ = ['Phonet', 'phonet']
40
41
42	1	class Phonet(Phonetic):
		0 ignored issues – show Unused Code introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
43		"""Phonet code.
44
45		phonet ("Hannoveraner Phonetik") was developed by Jörg Michael and
46		documented in :cite:`Michael:1999`.
47
48		This is a port of Jesper Zedlitz's code, which is licensed LGPL
49		:cite:`Zedlitz:2015`.
50
51		That is, in turn, based on Michael's C code, which is also licensed LGPL
52		:cite:`Michael:2007`.
53		"""
54
55	1	_rules_no_lang = ( # separator chars
56		# fmt: off
57		'´', ' ', ' ',
58		'"', ' ', ' ',
59		'`$', '', '',
60		'\'', ' ', ' ',
61		',', ',', ',',
62		';', ',', ',',
63		'-', ' ', ' ',
64		' ', ' ', ' ',
65		'.', '.', '.',
66		':', '.', '.',
67		# German umlauts
68		'Ä', 'AE', 'AE',
69		'Ö', 'OE', 'OE',
70		'Ü', 'UE', 'UE',
71		'ß', 'S', 'S',
72		# international umlauts
73		'À', 'A', 'A',
74		'Á', 'A', 'A',
75		'Â', 'A', 'A',
76		'Ã', 'A', 'A',
77		'Å', 'A', 'A',
78		'Æ', 'AE', 'AE',
79		'Ç', 'C', 'C',
80		'Ð', 'DJ', 'DJ',
81		'È', 'E', 'E',
82		'É', 'E', 'E',
83		'Ê', 'E', 'E',
84		'Ë', 'E', 'E',
85		'Ì', 'I', 'I',
86		'Í', 'I', 'I',
87		'Î', 'I', 'I',
88		'Ï', 'I', 'I',
89		'Ñ', 'NH', 'NH',
90		'Ò', 'O', 'O',
91		'Ó', 'O', 'O',
92		'Ô', 'O', 'O',
93		'Õ', 'O', 'O',
94		'Œ', 'OE', 'OE',
95		'Ø', 'OE', 'OE',
96		'Š', 'SH', 'SH',
97		'Þ', 'TH', 'TH',
98		'Ù', 'U', 'U',
99		'Ú', 'U', 'U',
100		'Û', 'U', 'U',
101		'Ý', 'Y', 'Y',
102		'Ÿ', 'Y', 'Y',
103		# 'normal' letters (A-Z)
104		'MC^', 'MAC', 'MAC',
105		'MC^', 'MAC', 'MAC',
106		'M´^', 'MAC', 'MAC',
107		'M\'^', 'MAC', 'MAC',
108		'O´^', 'O', 'O',
109		'O\'^', 'O', 'O',
110		'VAN DEN ^', 'VANDEN', 'VANDEN',
111		None, None, None
112		# fmt: on
113		)
114
115	1	_rules_german = ( # separator chars
116		# fmt: off
117		'´', ' ', ' ',
118		'"', ' ', ' ',
119		'`$', '', '',
120		'\'', ' ', ' ',
121		',', ' ', ' ',
122		';', ' ', ' ',
123		'-', ' ', ' ',
124		' ', ' ', ' ',
125		'.', '.', '.',
126		':', '.', '.',
127		# German umlauts
128		'ÄE', 'E', 'E',
129		'ÄU<', 'EU', 'EU',
130		'ÄV(AEOU)-<', 'EW', None,
131		'Ä$', 'Ä', None,
132		'Ä<', None, 'E',
133		'Ä', 'E', None,
134		'ÖE', 'Ö', 'Ö',
135		'ÖU', 'Ö', 'Ö',
136		'ÖVER--<', 'ÖW', None,
137		'ÖV(AOU)-', 'ÖW', None,
138		'ÜBEL(GNRW)-^^', 'ÜBL ', 'IBL ',
139		'ÜBER^^', 'ÜBA', 'IBA',
140		'ÜE', 'Ü', 'I',
141		'ÜVER--<', 'ÜW', None,
142		'ÜV(AOU)-', 'ÜW', None,
143		'Ü', None, 'I',
144		'ßCH<', None, 'Z',
145		'ß<', 'S', 'Z',
146		# international umlauts
147		'À<', 'A', 'A',
148		'Á<', 'A', 'A',
149		'Â<', 'A', 'A',
150		'Ã<', 'A', 'A',
151		'Å<', 'A', 'A',
152		'ÆER-', 'E', 'E',
153		'ÆU<', 'EU', 'EU',
154		'ÆV(AEOU)-<', 'EW', None,
155		'Æ$', 'Ä', None,
156		'Æ<', None, 'E',
157		'Æ', 'E', None,
158		'Ç', 'Z', 'Z',
159		'ÐÐ-', '', '',
160		'Ð', 'DI', 'TI',
161		'È<', 'E', 'E',
162		'É<', 'E', 'E',
163		'Ê<', 'E', 'E',
164		'Ë', 'E', 'E',
165		'Ì<', 'I', 'I',
166		'Í<', 'I', 'I',
167		'Î<', 'I', 'I',
168		'Ï', 'I', 'I',
169		'ÑÑ-', '', '',
170		'Ñ', 'NI', 'NI',
171		'Ò<', 'O', 'U',
172		'Ó<', 'O', 'U',
173		'Ô<', 'O', 'U',
174		'Õ<', 'O', 'U',
175		'Œ<', 'Ö', 'Ö',
176		'Ø(IJY)-<', 'E', 'E',
177		'Ø<', 'Ö', 'Ö',
178		'Š', 'SH', 'Z',
179		'Þ', 'T', 'T',
180		'Ù<', 'U', 'U',
181		'Ú<', 'U', 'U',
182		'Û<', 'U', 'U',
183		'Ý<', 'I', 'I',
184		'Ÿ<', 'I', 'I',
185		# 'normal' letters (A-Z)
186		'ABELLE$', 'ABL', 'ABL',
187		'ABELL$', 'ABL', 'ABL',
188		'ABIENNE$', 'ABIN', 'ABIN',
189		'ACHME---^', 'ACH', 'AK',
190		'ACEY$', 'AZI', 'AZI',
191		'ADV', 'ATW', None,
192		'AEGL-', 'EK', None,
193		'AEU<', 'EU', 'EU',
194		'AE2', 'E', 'E',
195		'AFTRAUBEN------', 'AFT ', 'AFT ',
196		'AGL-1', 'AK', None,
197		'AGNI-^', 'AKN', 'AKN',
198		'AGNIE-', 'ANI', 'ANI',
199		'AGN(AEOU)-$', 'ANI', 'ANI',
200		'AH(AIOÖUÜY)-', 'AH', None,
201		'AIA2', 'AIA', 'AIA',
202		'AIE$', 'E', 'E',
203		'AILL(EOU)-', 'ALI', 'ALI',
204		'AINE$', 'EN', 'EN',
205		'AIRE$', 'ER', 'ER',
206		'AIR-', 'E', 'E',
207		'AISE$', 'ES', 'EZ',
208		'AISSANCE$', 'ESANS', 'EZANZ',
209		'AISSE$', 'ES', 'EZ',
210		'AIX$', 'EX', 'EX',
211		'AJ(AÄEÈÉÊIOÖUÜ)--', 'A', 'A',
212		'AKTIE', 'AXIE', 'AXIE',
213		'AKTUEL', 'AKTUEL', None,
214		'ALOI^', 'ALOI', 'ALUI', # Don't merge these rules
215		'ALOY^', 'ALOI', 'ALUI', # needed by 'check_rules'
216		'AMATEU(RS)-', 'AMATÖ', 'ANATÖ',
217		'ANCH(OEI)-', 'ANSH', 'ANZ',
218		'ANDERGEGANG----', 'ANDA GE', 'ANTA KE',
219		'ANDERGEHE----', 'ANDA ', 'ANTA ',
220		'ANDERGESETZ----', 'ANDA GE', 'ANTA KE',
221		'ANDERGING----', 'ANDA ', 'ANTA ',
222		'ANDERSETZ(ET)-----', 'ANDA ', 'ANTA ',
223		'ANDERZUGEHE----', 'ANDA ZU ', 'ANTA ZU ',
224		'ANDERZUSETZE-----', 'ANDA ZU ', 'ANTA ZU ',
225		'ANER(BKO)---^^', 'AN', None,
226		'ANHAND---^$', 'AN H', 'AN ',
227		'ANH(AÄEIOÖUÜY)--^^', 'AN', None,
228		'ANIELLE$', 'ANIEL', 'ANIL',
229		'ANIEL', 'ANIEL', None,
230		'ANSTELLE----^$', 'AN ST', 'AN ZT',
231		'ANTI^^', 'ANTI', 'ANTI',
232		'ANVER^^', 'ANFA', 'ANFA',
233		'ATIA$', 'ATIA', 'ATIA',
234		'ATIA(NS)--', 'ATI', 'ATI',
235		'ATI(AÄOÖUÜ)-', 'AZI', 'AZI',
236		'AUAU--', '', '',
237		'AUERE$', 'AUERE', None,
238		'AUERE(NS)-$', 'AUERE', None,
239		'AUERE(AIOUY)--', 'AUER', None,
240		'AUER(AÄIOÖUÜY)-', 'AUER', None,
241		'AUER<', 'AUA', 'AUA',
242		'AUF^^', 'AUF', 'AUF',
243		'AULT$', 'O', 'U',
244		'AUR(BCDFGKLMNQSTVWZ)-', 'AUA', 'AUA',
245		'AUR$', 'AUA', 'AUA',
246		'AUSSE$', 'OS', 'UZ',
247		'AUS(ST)-^', 'AUS', 'AUS',
248		'AUS^^', 'AUS', 'AUS',
249		'AUTOFAHR----', 'AUTO ', 'AUTU ',
250		'AUTO^^', 'AUTO', 'AUTU',
251		'AUX(IY)-', 'AUX', 'AUX',
252		'AUX', 'O', 'U',
253		'AU', 'AU', 'AU',
254		'AVER--<', 'AW', None,
255		'AVIER$', 'AWIE', 'AFIE',
256		'AV(EÈÉÊI)-^', 'AW', None,
257		'AV(AOU)-', 'AW', None,
258		'AYRE$', 'EIRE', 'EIRE',
259		'AYRE(NS)-$', 'EIRE', 'EIRE',
260		'AYRE(AIOUY)--', 'EIR', 'EIR',
261		'AYR(AÄIOÖUÜY)-', 'EIR', 'EIR',
262		'AYR<', 'EIA', 'EIA',
263		'AYER--<', 'EI', 'EI',
264		'AY(AÄEIOÖUÜY)--', 'A', 'A',
265		'AË', 'E', 'E',
266		'A(IJY)<', 'EI', 'EI',
267		'BABY^$', 'BEBI', 'BEBI',
268		'BAB(IY)^', 'BEBI', 'BEBI',
269		'BEAU^$', 'BO', None,
270		'BEA(BCMNRU)-^', 'BEA', 'BEA',
271		'BEAT(AEIMORU)-^', 'BEAT', 'BEAT',
272		'BEE$', 'BI', 'BI',
273		'BEIGE^$', 'BESH', 'BEZ',
274		'BENOIT--', 'BENO', 'BENU',
275		'BER(DT)-', 'BER', None,
276		'BERN(DT)-', 'BERN', None,
277		'BE(LMNRST)-^', 'BE', 'BE',
278		'BETTE$', 'BET', 'BET',
279		'BEVOR^$', 'BEFOR', None,
280		'BIC$', 'BIZ', 'BIZ',
281		'BOWL(EI)-', 'BOL', 'BUL',
282		'BP(AÄEÈÉÊIÌÍÎOÖRUÜY)-', 'B', 'B',
283		'BRINGEND-----^', 'BRI', 'BRI',
284		'BRINGEND-----', ' BRI', ' BRI',
285		'BROW(NS)-', 'BRAU', 'BRAU',
286		'BUDGET7', 'BÜGE', 'BIKE',
287		'BUFFET7', 'BÜFE', 'BIFE',
288		'BYLLE$', 'BILE', 'BILE',
289		'BYLL$', 'BIL', 'BIL',
290		'BYPA--^', 'BEI', 'BEI',
291		'BYTE<', 'BEIT', 'BEIT',
292		'BY9^', 'BÜ', None,
293		'B(SßZ)$', 'BS', None,
294		'CACH(EI)-^', 'KESH', 'KEZ',
295		'CAE--', 'Z', 'Z',
296		'CA(IY)$', 'ZEI', 'ZEI',
297		'CE(EIJUY)--', 'Z', 'Z',
298		'CENT<', 'ZENT', 'ZENT',
299		'CERST(EI)----^', 'KE', 'KE',
300		'CER$', 'ZA', 'ZA',
301		'CE3', 'ZE', 'ZE',
302		'CH\'S$', 'X', 'X',
303		'CH´S$', 'X', 'X',
304		'CHAO(ST)-', 'KAO', 'KAU',
305		'CHAMPIO-^', 'SHEMPI', 'ZENBI',
306		'CHAR(AI)-^', 'KAR', 'KAR',
307		'CHAU(CDFSVWXZ)-', 'SHO', 'ZU',
308		'CHÄ(CF)-', 'SHE', 'ZE',
309		'CHE(CF)-', 'SHE', 'ZE',
310		'CHEM-^', 'KE', 'KE', # or: 'CHE', 'KE'
311		'CHEQUE<', 'SHEK', 'ZEK',
312		'CHI(CFGPVW)-', 'SHI', 'ZI',
313		'CH(AEUY)-<^', 'SH', 'Z',
314		'CHK-', '', '',
315		'CHO(CKPS)-^', 'SHO', 'ZU',
316		'CHRIS-', 'KRI', None,
317		'CHRO-', 'KR', None,
318		'CH(LOR)-<^', 'K', 'K',
319		'CHST-', 'X', 'X',
320		'CH(SßXZ)3', 'X', 'X',
321		'CHTNI-3', 'CHN', 'KN',
322		'CH^', 'K', 'K', # or: 'CH', 'K'
323		'CH', 'CH', 'K',
324		'CIC$', 'ZIZ', 'ZIZ',
325		'CIENCEFICT----', 'EIENS ', 'EIENZ ',
326		'CIENCE$', 'EIENS', 'EIENZ',
327		'CIER$', 'ZIE', 'ZIE',
328		'CYB-^', 'ZEI', 'ZEI',
329		'CY9^', 'ZÜ', 'ZI',
330		'C(IJY)-<3', 'Z', 'Z',
331		'CLOWN-', 'KLAU', 'KLAU',
332		'CCH', 'Z', 'Z',
333		'CCE-', 'X', 'X',
334		'C(CK)-', '', '',
335		'CLAUDET---', 'KLO', 'KLU',
336		'CLAUDINE^$', 'KLODIN', 'KLUTIN',
337		'COACH', 'KOSH', 'KUZ',
338		'COLE$', 'KOL', 'KUL',
339		'COUCH', 'KAUSH', 'KAUZ',
340		'COW', 'KAU', 'KAU',
341		'CQUES$', 'K', 'K',
342		'CQUE', 'K', 'K',
343		'CRASH--9', 'KRE', 'KRE',
344		'CREAT-^', 'KREA', 'KREA',
345		'CST', 'XT', 'XT',
346		'CS<^', 'Z', 'Z',
347		'C(SßX)', 'X', 'X',
348		'CT\'S$', 'X', 'X',
349		'CT(SßXZ)', 'X', 'X',
350		'CZ<', 'Z', 'Z',
351		'C(ÈÉÊÌÍÎÝ)3', 'Z', 'Z',
352		'C.^', 'C.', 'C.',
353		'CÄ-', 'Z', 'Z',
354		'CÜ$', 'ZÜ', 'ZI',
355		'C\'S$', 'X', 'X',
356		'C<', 'K', 'K',
357		'DAHER^$', 'DAHER', None,
358		'DARAUFFOLGE-----', 'DARAUF ', 'TARAUF ',
359		'DAVO(NR)-^$', 'DAFO', 'TAFU',
360		'DD(SZ)--<', '', '',
361		'DD9', 'D', None,
362		'DEPOT7', 'DEPO', 'TEBU',
363		'DESIGN', 'DISEIN', 'TIZEIN',
364		'DE(LMNRST)-3^', 'DE', 'TE',
365		'DETTE$', 'DET', 'TET',
366		'DH$', 'T', None,
367		'DIC$', 'DIZ', 'TIZ',
368		'DIDR-^', 'DIT', None,
369		'DIEDR-^', 'DIT', None,
370		'DJ(AEIOU)-^', 'I', 'I',
371		'DMITR-^', 'DIMIT', 'TINIT',
372		'DRY9^', 'DRÜ', None,
373		'DT-', '', '',
374		'DUIS-^', 'DÜ', 'TI',
375		'DURCH^^', 'DURCH', 'TURK',
376		'DVA$', 'TWA', None,
377		'DY9^', 'DÜ', None,
378		'DYS$', 'DIS', None,
379		'DS(CH)--<', 'T', 'T',
380		'DST', 'ZT', 'ZT',
381		'DZS(CH)--', 'T', 'T',
382		'D(SßZ)', 'Z', 'Z',
383		'D(AÄEIOÖRUÜY)-', 'D', None,
384		'D(ÀÁÂÃÅÈÉÊÌÍÎÙÚÛ)-', 'D', None,
385		'D\'H^', 'D', 'T',
386		'D´H^', 'D', 'T',
387		'D`H^', 'D', 'T',
388		'D\'S3$', 'Z', 'Z',
389		'D´S3$', 'Z', 'Z',
390		'D^', 'D', None,
391		'D', 'T', 'T',
392		'EAULT$', 'O', 'U',
393		'EAUX$', 'O', 'U',
394		'EAU', 'O', 'U',
395		'EAV', 'IW', 'IF',
396		'EAS3$', 'EAS', None,
397		'EA(AÄEIOÖÜY)-3', 'EA', 'EA',
398		'EA3$', 'EA', 'EA',
399		'EA3', 'I', 'I',
400		'EBENSO^$', 'EBNSO', 'EBNZU',
401		'EBENSO^^', 'EBNSO ', 'EBNZU ',
402		'EBEN^^', 'EBN', 'EBN',
403		'EE9', 'E', 'E',
404		'EGL-1', 'EK', None,
405		'EHE(IUY)--1', 'EH', None,
406		'EHUNG---1', 'E', None,
407		'EH(AÄIOÖUÜY)-1', 'EH', None,
408		'EIEI--', '', '',
409		'EIERE^$', 'EIERE', None,
410		'EIERE$', 'EIERE', None,
411		'EIERE(NS)-$', 'EIERE', None,
412		'EIERE(AIOUY)--', 'EIER', None,
413		'EIER(AÄIOÖUÜY)-', 'EIER', None,
414		'EIER<', 'EIA', None,
415		'EIGL-1', 'EIK', None,
416		'EIGH$', 'EI', 'EI',
417		'EIH--', 'E', 'E',
418		'EILLE$', 'EI', 'EI',
419		'EIR(BCDFGKLMNQSTVWZ)-', 'EIA', 'EIA',
420		'EIR$', 'EIA', 'EIA',
421		'EITRAUBEN------', 'EIT ', 'EIT ',
422		'EI', 'EI', 'EI',
423		'EJ$', 'EI', 'EI',
424		'ELIZ^', 'ELIS', None,
425		'ELZ^', 'ELS', None,
426		'EL-^', 'E', 'E',
427		'ELANG----1', 'E', 'E',
428		'EL(DKL)--1', 'E', 'E',
429		'EL(MNT)--1$', 'E', 'E',
430		'ELYNE$', 'ELINE', 'ELINE',
431		'ELYN$', 'ELIN', 'ELIN',
432		'EL(AÄEÈÉÊIÌÍÎOÖUÜY)-1', 'EL', 'EL',
433		'EL-1', 'L', 'L',
434		'EM-^', None, 'E',
435		'EM(DFKMPQT)--1', None, 'E',
436		'EM(AÄEÈÉÊIÌÍÎOÖUÜY)--1', None, 'E',
437		'EM-1', None, 'N',
438		'ENGAG-^', 'ANGA', 'ANKA',
439		'EN-^', 'E', 'E',
440		'ENTUEL', 'ENTUEL', None,
441		'EN(CDGKQSTZ)--1', 'E', 'E',
442		'EN(AÄEÈÉÊIÌÍÎNOÖUÜY)-1', 'EN', 'EN',
443		'EN-1', '', '',
444		'ERH(AÄEIOÖUÜ)-^', 'ERH', 'ER',
445		'ER-^', 'E', 'E',
446		'ERREGEND-----', ' ER', ' ER',
447		'ERT1$', 'AT', None,
448		'ER(DGLKMNRQTZß)-1', 'ER', None,
449		'ER(AÄEÈÉÊIÌÍÎOÖUÜY)-1', 'ER', 'A',
450		'ER1$', 'A', 'A',
451		'ER<1', 'A', 'A',
452		'ETAT7', 'ETA', 'ETA',
453		'ETI(AÄOÖÜU)-', 'EZI', 'EZI',
454		'EUERE$', 'EUERE', None,
455		'EUERE(NS)-$', 'EUERE', None,
456		'EUERE(AIOUY)--', 'EUER', None,
457		'EUER(AÄIOÖUÜY)-', 'EUER', None,
458		'EUER<', 'EUA', None,
459		'EUEU--', '', '',
460		'EUILLE$', 'Ö', 'Ö',
461		'EUR$', 'ÖR', 'ÖR',
462		'EUX', 'Ö', 'Ö',
463		'EUSZ$', 'EUS', None,
464		'EUTZ$', 'EUS', None,
465		'EUYS$', 'EUS', 'EUZ',
466		'EUZ$', 'EUS', None,
467		'EU', 'EU', 'EU',
468		'EVER--<1', 'EW', None,
469		'EV(ÄOÖUÜ)-1', 'EW', None,
470		'EYER<', 'EIA', 'EIA',
471		'EY<', 'EI', 'EI',
472		'FACETTE', 'FASET', 'FAZET',
473		'FANS--^$', 'FE', 'FE',
474		'FAN-^$', 'FE', 'FE',
475		'FAULT-', 'FOL', 'FUL',
476		'FEE(DL)-', 'FI', 'FI',
477		'FEHLER', 'FELA', 'FELA',
478		'FE(LMNRST)-3^', 'FE', 'FE',
479		'FOERDERN---^', 'FÖRD', 'FÖRT',
480		'FOERDERN---', ' FÖRD', ' FÖRT',
481		'FOND7', 'FON', 'FUN',
482		'FRAIN$', 'FRA', 'FRA',
483		'FRISEU(RS)-', 'FRISÖ', 'FRIZÖ',
484		'FY9^', 'FÜ', None,
485		'FÖRDERN---^', 'FÖRD', 'FÖRT',
486		'FÖRDERN---', ' FÖRD', ' FÖRT',
487		'GAGS^$', 'GEX', 'KEX',
488		'GAG^$', 'GEK', 'KEK',
489		'GD', 'KT', 'KT',
490		'GEGEN^^', 'GEGN', 'KEKN',
491		'GEGENGEKOM-----', 'GEGN ', 'KEKN ',
492		'GEGENGESET-----', 'GEGN ', 'KEKN ',
493		'GEGENKOMME-----', 'GEGN ', 'KEKN ',
494		'GEGENZUKOM---', 'GEGN ZU ', 'KEKN ZU ',
495		'GENDETWAS-----$', 'GENT ', 'KENT ',
496		'GENRE', 'IORE', 'IURE',
497		'GE(LMNRST)-3^', 'GE', 'KE',
498		'GER(DKT)-', 'GER', None,
499		'GETTE$', 'GET', 'KET',
500		'GGF.', 'GF.', None,
501		'GG-', '', '',
502		'GH', 'G', None,
503		'GI(AOU)-^', 'I', 'I',
504		'GION-3', 'KIO', 'KIU',
505		'G(CK)-', '', '',
506		'GJ(AEIOU)-^', 'I', 'I',
507		'GMBH^$', 'GMBH', 'GMBH',
508		'GNAC$', 'NIAK', 'NIAK',
509		'GNON$', 'NION', 'NIUN',
510		'GN$', 'N', 'N',
511		'GONCAL-^', 'GONZA', 'KUNZA',
512		'GRY9^', 'GRÜ', None,
513		'G(SßXZ)-<', 'K', 'K',
514		'GUCK-', 'KU', 'KU',
515		'GUISEP-^', 'IUSE', 'IUZE',
516		'GUI-^', 'G', 'K',
517		'GUTAUSSEH------^', 'GUT ', 'KUT ',
518		'GUTGEHEND------^', 'GUT ', 'KUT ',
519		'GY9^', 'GÜ', None,
520		'G(AÄEILOÖRUÜY)-', 'G', None,
521		'G(ÀÁÂÃÅÈÉÊÌÍÎÙÚÛ)-', 'G', None,
522		'G\'S$', 'X', 'X',
523		'G´S$', 'X', 'X',
524		'G^', 'G', None,
525		'G', 'K', 'K',
526		'HA(HIUY)--1', 'H', None,
527		'HANDVOL---^', 'HANT ', 'ANT ',
528		'HANNOVE-^', 'HANOF', None,
529		'HAVEN7$', 'HAFN', None,
530		'HEAD-', 'HE', 'E',
531		'HELIEGEN------', 'E ', 'E ',
532		'HESTEHEN------', 'E ', 'E ',
533		'HE(LMNRST)-3^', 'HE', 'E',
534		'HE(LMN)-1', 'E', 'E',
535		'HEUR1$', 'ÖR', 'ÖR',
536		'HE(HIUY)--1', 'H', None,
537		'HIH(AÄEIOÖUÜY)-1', 'IH', None,
538		'HLH(AÄEIOÖUÜY)-1', 'LH', None,
539		'HMH(AÄEIOÖUÜY)-1', 'MH', None,
540		'HNH(AÄEIOÖUÜY)-1', 'NH', None,
541		'HOBBY9^', 'HOBI', None,
542		'HOCHBEGAB-----^', 'HOCH ', 'UK ',
543		'HOCHTALEN-----^', 'HOCH ', 'UK ',
544		'HOCHZUFRI-----^', 'HOCH ', 'UK ',
545		'HO(HIY)--1', 'H', None,
546		'HRH(AÄEIOÖUÜY)-1', 'RH', None,
547		'HUH(AÄEIOÖUÜY)-1', 'UH', None,
548		'HUIS^^', 'HÜS', 'IZ',
549		'HUIS$', 'ÜS', 'IZ',
550		'HUI--1', 'H', None,
551		'HYGIEN^', 'HÜKIEN', None,
552		'HY9^', 'HÜ', None,
553		'HY(BDGMNPST)-', 'Ü', None,
554		'H.^', None, 'H.',
555		'HÄU--1', 'H', None,
556		'H^', 'H', '',
557		'H', '', '',
558		'ICHELL---', 'ISH', 'IZ',
559		'ICHI$', 'ISHI', 'IZI',
560		'IEC$', 'IZ', 'IZ',
561		'IEDENSTELLE------', 'IDN ', 'ITN ',
562		'IEI-3', '', '',
563		'IELL3', 'IEL', 'IEL',
564		'IENNE$', 'IN', 'IN',
565		'IERRE$', 'IER', 'IER',
566		'IERZULAN---', 'IR ZU ', 'IR ZU ',
567		'IETTE$', 'IT', 'IT',
568		'IEU', 'IÖ', 'IÖ',
569		'IE<4', 'I', 'I',
570		'IGL-1', 'IK', None,
571		'IGHT3$', 'EIT', 'EIT',
572		'IGNI(EO)-', 'INI', 'INI',
573		'IGN(AEOU)-$', 'INI', 'INI',
574		'IHER(DGLKRT)--1', 'IHE', None,
575		'IHE(IUY)--', 'IH', None,
576		'IH(AIOÖUÜY)-', 'IH', None,
577		'IJ(AOU)-', 'I', 'I',
578		'IJ$', 'I', 'I',
579		'IJ<', 'EI', 'EI',
580		'IKOLE$', 'IKOL', 'IKUL',
581		'ILLAN(STZ)--4', 'ILIA', 'ILIA',
582		'ILLAR(DT)--4', 'ILIA', 'ILIA',
583		'IMSTAN----^', 'IM ', 'IN ',
584		'INDELERREGE------', 'INDL ', 'INTL ',
585		'INFRAGE-----^$', 'IN ', 'IN ',
586		'INTERN(AOU)-^', 'INTAN', 'INTAN',
587		'INVER-', 'INWE', 'INFE',
588		'ITI(AÄIOÖUÜ)-', 'IZI', 'IZI',
589		'IUSZ$', 'IUS', None,
590		'IUTZ$', 'IUS', None,
591		'IUZ$', 'IUS', None,
592		'IVER--<', 'IW', None,
593		'IVIER$', 'IWIE', 'IFIE',
594		'IV(ÄOÖUÜ)-', 'IW', None,
595		'IV<3', 'IW', None,
596		'IY2', 'I', None,
597		'I(ÈÉÊ)<4', 'I', 'I',
598		'JAVIE---<^', 'ZA', 'ZA',
599		'JEANS^$', 'JINS', 'INZ',
600		'JEANNE^$', 'IAN', 'IAN',
601		'JEAN-^', 'IA', 'IA',
602		'JER-^', 'IE', 'IE',
603		'JE(LMNST)-', 'IE', 'IE',
604		'JI^', 'JI', None,
605		'JOR(GK)^$', 'IÖRK', 'IÖRK',
606		'J', 'I', 'I',
607		'KC(ÄEIJ)-', 'X', 'X',
608		'KD', 'KT', None,
609		'KE(LMNRST)-3^', 'KE', 'KE',
610		'KG(AÄEILOÖRUÜY)-', 'K', None,
611		'KH<^', 'K', 'K',
612		'KIC$', 'KIZ', 'KIZ',
613		'KLE(LMNRST)-3^', 'KLE', 'KLE',
614		'KOTELE-^', 'KOTL', 'KUTL',
615		'KREAT-^', 'KREA', 'KREA',
616		'KRÜS(TZ)--^', 'KRI', None,
617		'KRYS(TZ)--^', 'KRI', None,
618		'KRY9^', 'KRÜ', None,
619		'KSCH---', 'K', 'K',
620		'KSH--', 'K', 'K',
621		'K(SßXZ)7', 'X', 'X', # implies 'KST' -> 'XT'
622		'KT\'S$', 'X', 'X',
623		'KTI(AIOU)-3', 'XI', 'XI',
624		'KT(SßXZ)', 'X', 'X',
625		'KY9^', 'KÜ', None,
626		'K\'S$', 'X', 'X',
627		'K´S$', 'X', 'X',
628		'LANGES$', ' LANGES', ' LANKEZ',
629		'LANGE$', ' LANGE', ' LANKE',
630		'LANG$', ' LANK', ' LANK',
631		'LARVE-', 'LARF', 'LARF',
632		'LD(SßZ)$', 'LS', 'LZ',
633		'LD\'S$', 'LS', 'LZ',
634		'LD´S$', 'LS', 'LZ',
635		'LEAND-^', 'LEAN', 'LEAN',
636		'LEERSTEHE-----^', 'LER ', 'LER ',
637		'LEICHBLEIB-----', 'LEICH ', 'LEIK ',
638		'LEICHLAUTE-----', 'LEICH ', 'LEIK ',
639		'LEIDERREGE------', 'LEIT ', 'LEIT ',
640		'LEIDGEPR----^', 'LEIT ', 'LEIT ',
641		'LEINSTEHE-----', 'LEIN ', 'LEIN ',
642		'LEL-', 'LE', 'LE',
643		'LE(MNRST)-3^', 'LE', 'LE',
644		'LETTE$', 'LET', 'LET',
645		'LFGNAG-', 'LFGAN', 'LFKAN',
646		'LICHERWEIS----', 'LICHA ', 'LIKA ',
647		'LIC$', 'LIZ', 'LIZ',
648		'LIVE^$', 'LEIF', 'LEIF',
649		'LT(SßZ)$', 'LS', 'LZ',
650		'LT\'S$', 'LS', 'LZ',
651		'LT´S$', 'LS', 'LZ',
652		'LUI(GS)--', 'LU', 'LU',
653		'LV(AIO)-', 'LW', None,
654		'LY9^', 'LÜ', None,
655		'LSTS$', 'LS', 'LZ',
656		'LZ(BDFGKLMNPQRSTVWX)-', 'LS', None,
657		'L(SßZ)$', 'LS', None,
658		'MAIR-<', 'MEI', 'NEI',
659		'MANAG-', 'MENE', 'NENE',
660		'MANUEL', 'MANUEL', None,
661		'MASSEU(RS)-', 'MASÖ', 'NAZÖ',
662		'MATCH', 'MESH', 'NEZ',
663		'MAURICE', 'MORIS', 'NURIZ',
664		'MBH^$', 'MBH', 'MBH',
665		'MB(ßZ)$', 'MS', None,
666		'MB(SßTZ)-', 'M', 'N',
667		'MCG9^', 'MAK', 'NAK',
668		'MC9^', 'MAK', 'NAK',
669		'MEMOIR-^', 'MEMOA', 'NENUA',
670		'MERHAVEN$', 'MAHAFN', None,
671		'ME(LMNRST)-3^', 'ME', 'NE',
672		'MEN(STZ)--3', 'ME', None,
673		'MEN$', 'MEN', None,
674		'MIGUEL-', 'MIGE', 'NIKE',
675		'MIKE^$', 'MEIK', 'NEIK',
676		'MITHILFE----^$', 'MIT H', 'NIT ',
677		'MN$', 'M', None,
678		'MN', 'N', 'N',
679		'MPJUTE-', 'MPUT', 'NBUT',
680		'MP(ßZ)$', 'MS', None,
681		'MP(SßTZ)-', 'M', 'N',
682		'MP(BDJLMNPQVW)-', 'MB', 'NB',
683		'MY9^', 'MÜ', None,
684		'M(ßZ)$', 'MS', None,
685		'M´G7^', 'MAK', 'NAK',
686		'M\'G7^', 'MAK', 'NAK',
687		'M´^', 'MAK', 'NAK',
688		'M\'^', 'MAK', 'NAK',
689		'M', None, 'N',
690		'NACH^^', 'NACH', 'NAK',
691		'NADINE', 'NADIN', 'NATIN',
692		'NAIV--', 'NA', 'NA',
693		'NAISE$', 'NESE', 'NEZE',
694		'NAUGENOMM------', 'NAU ', 'NAU ',
695		'NAUSOGUT$', 'NAUSO GUT', 'NAUZU KUT',
696		'NCH$', 'NSH', 'NZ',
697		'NCOISE$', 'SOA', 'ZUA',
698		'NCOIS$', 'SOA', 'ZUA',
699		'NDAR$', 'NDA', 'NTA',
700		'NDERINGEN------', 'NDE ', 'NTE ',
701		'NDRO(CDKTZ)-', 'NTRO', None,
702		'ND(BFGJLMNPQVW)-', 'NT', None,
703		'ND(SßZ)$', 'NS', 'NZ',
704		'ND\'S$', 'NS', 'NZ',
705		'ND´S$', 'NS', 'NZ',
706		'NEBEN^^', 'NEBN', 'NEBN',
707		'NENGELERN------', 'NEN ', 'NEN ',
708		'NENLERN(ET)---', 'NEN LE', 'NEN LE',
709		'NENZULERNE---', 'NEN ZU LE', 'NEN ZU LE',
710		'NE(LMNRST)-3^', 'NE', 'NE',
711		'NEN-3', 'NE', 'NE',
712		'NETTE$', 'NET', 'NET',
713		'NGU^^', 'NU', 'NU',
714		'NG(BDFJLMNPQRTVW)-', 'NK', 'NK',
715		'NH(AUO)-$', 'NI', 'NI',
716		'NICHTSAHNEN-----', 'NIX ', 'NIX ',
717		'NICHTSSAGE----', 'NIX ', 'NIX ',
718		'NICHTS^^', 'NIX', 'NIX',
719		'NICHT^^', 'NICHT', 'NIKT',
720		'NINE$', 'NIN', 'NIN',
721		'NON^^', 'NON', 'NUN',
722		'NOTLEIDE-----^', 'NOT ', 'NUT ',
723		'NOT^^', 'NOT', 'NUT',
724		'NTI(AIOU)-3', 'NZI', 'NZI',
725		'NTIEL--3', 'NZI', 'NZI',
726		'NT(SßZ)$', 'NS', 'NZ',
727		'NT\'S$', 'NS', 'NZ',
728		'NT´S$', 'NS', 'NZ',
729		'NYLON', 'NEILON', 'NEILUN',
730		'NY9^', 'NÜ', None,
731		'NSTZUNEH---', 'NST ZU ', 'NZT ZU ',
732		'NSZ-', 'NS', None,
733		'NSTS$', 'NS', 'NZ',
734		'NZ(BDFGKLMNPQRSTVWX)-', 'NS', None,
735		'N(SßZ)$', 'NS', None,
736		'OBERE-', 'OBER', None,
737		'OBER^^', 'OBA', 'UBA',
738		'OEU2', 'Ö', 'Ö',
739		'OE<2', 'Ö', 'Ö',
740		'OGL-', 'OK', None,
741		'OGNIE-', 'ONI', 'UNI',
742		'OGN(AEOU)-$', 'ONI', 'UNI',
743		'OH(AIOÖUÜY)-', 'OH', None,
744		'OIE$', 'Ö', 'Ö',
745		'OIRE$', 'OA', 'UA',
746		'OIR$', 'OA', 'UA',
747		'OIX', 'OA', 'UA',
748		'OI<3', 'EU', 'EU',
749		'OKAY^$', 'OKE', 'UKE',
750		'OLYN$', 'OLIN', 'ULIN',
751		'OO(DLMZ)-', 'U', None,
752		'OO$', 'U', None,
753		'OO-', '', '',
754		'ORGINAL-----', 'ORI', 'URI',
755		'OTI(AÄOÖUÜ)-', 'OZI', 'UZI',
756		'OUI^', 'WI', 'FI',
757		'OUILLE$', 'ULIE', 'ULIE',
758		'OU(DT)-^', 'AU', 'AU',
759		'OUSE$', 'AUS', 'AUZ',
760		'OUT-', 'AU', 'AU',
761		'OU', 'U', 'U',
762		'O(FV)$', 'AU', 'AU', # due to 'OW$' -> 'AU'
763		'OVER--<', 'OW', None,
764		'OV(AOU)-', 'OW', None,
765		'OW$', 'AU', 'AU',
766		'OWS$', 'OS', 'UZ',
767		'OJ(AÄEIOÖUÜ)--', 'O', 'U',
768		'OYER', 'OIA', None,
769		'OY(AÄEIOÖUÜ)--', 'O', 'U',
770		'O(JY)<', 'EU', 'EU',
771		'OZ$', 'OS', None,
772		'O´^', 'O', 'U',
773		'O\'^', 'O', 'U',
774		'O', None, 'U',
775		'PATIEN--^', 'PAZI', 'PAZI',
776		'PENSIO-^', 'PANSI', 'PANZI',
777		'PE(LMNRST)-3^', 'PE', 'PE',
778		'PFER-^', 'FE', 'FE',
779		'P(FH)<', 'F', 'F',
780		'PIC^$', 'PIK', 'PIK',
781		'PIC$', 'PIZ', 'PIZ',
782		'PIPELINE', 'PEIBLEIN', 'PEIBLEIN',
783		'POLYP-', 'POLÜ', None,
784		'POLY^^', 'POLI', 'PULI',
785		'PORTRAIT7', 'PORTRE', 'PURTRE',
786		'POWER7', 'PAUA', 'PAUA',
787		'PP(FH)--<', 'B', 'B',
788		'PP-', '', '',
789		'PRODUZ-^', 'PRODU', 'BRUTU',
790		'PRODUZI--', ' PRODU', ' BRUTU',
791		'PRIX^$', 'PRI', 'PRI',
792		'PS-^^', 'P', None,
793		'P(SßZ)^', None, 'Z',
794		'P(SßZ)$', 'BS', None,
795		'PT-^', '', '',
796		'PTI(AÄOÖUÜ)-3', 'BZI', 'BZI',
797		'PY9^', 'PÜ', None,
798		'P(AÄEIOÖRUÜY)-', 'P', 'P',
799		'P(ÀÁÂÃÅÈÉÊÌÍÎÙÚÛ)-', 'P', None,
800		'P.^', None, 'P.',
801		'P^', 'P', None,
802		'P', 'B', 'B',
803		'QI-', 'Z', 'Z',
804		'QUARANT--', 'KARA', 'KARA',
805		'QUE(LMNRST)-3', 'KWE', 'KFE',
806		'QUE$', 'K', 'K',
807		'QUI(NS)$', 'KI', 'KI',
808		'QUIZ7', 'KWIS', None,
809		'Q(UV)7', 'KW', 'KF',
810		'Q<', 'K', 'K',
811		'RADFAHR----', 'RAT ', 'RAT ',
812		'RAEFTEZEHRE-----', 'REFTE ', 'REFTE ',
813		'RCH', 'RCH', 'RK',
814		'REA(DU)---3^', 'R', None,
815		'REBSERZEUG------', 'REBS ', 'REBZ ',
816		'RECHERCH^', 'RESHASH', 'REZAZ',
817		'RECYCL--', 'RIZEI', 'RIZEI',
818		'RE(ALST)-3^', 'RE', None,
819		'REE$', 'RI', 'RI',
820		'RER$', 'RA', 'RA',
821		'RE(MNR)-4', 'RE', 'RE',
822		'RETTE$', 'RET', 'RET',
823		'REUZ$', 'REUZ', None,
824		'REW$', 'RU', 'RU',
825		'RH<^', 'R', 'R',
826		'RJA(MN)--', 'RI', 'RI',
827		'ROWD-^', 'RAU', 'RAU',
828		'RTEMONNAIE-', 'RTMON', 'RTNUN',
829		'RTI(AÄOÖUÜ)-3', 'RZI', 'RZI',
830		'RTIEL--3', 'RZI', 'RZI',
831		'RV(AEOU)-3', 'RW', None,
832		'RY(KN)-$', 'RI', 'RI',
833		'RY9^', 'RÜ', None,
834		'RÄFTEZEHRE-----', 'REFTE ', 'REFTE ',
835		'SAISO-^', 'SES', 'ZEZ',
836		'SAFE^$', 'SEIF', 'ZEIF',
837		'SAUCE-^', 'SOS', 'ZUZ',
838		'SCHLAGGEBEN-----<', 'SHLAK ', 'ZLAK ',
839		'SCHSCH---7', '', '',
840		'SCHTSCH', 'SH', 'Z',
841		'SC(HZ)<', 'SH', 'Z',
842		'SC', 'SK', 'ZK',
843		'SELBSTST--7^^', 'SELB', 'ZELB',
844		'SELBST7^^', 'SELBST', 'ZELBZT',
845		'SERVICE7^', 'SÖRWIS', 'ZÖRFIZ',
846		'SERVI-^', 'SERW', None,
847		'SE(LMNRST)-3^', 'SE', 'ZE',
848		'SETTE$', 'SET', 'ZET',
849		'SHP-^', 'S', 'Z',
850		'SHST', 'SHT', 'ZT',
851		'SHTSH', 'SH', 'Z',
852		'SHT', 'ST', 'Z',
853		'SHY9^', 'SHÜ', None,
854		'SH^^', 'SH', None,
855		'SH3', 'SH', 'Z',
856		'SICHERGEGAN-----^', 'SICHA ', 'ZIKA ',
857		'SICHERGEHE----^', 'SICHA ', 'ZIKA ',
858		'SICHERGESTEL------^', 'SICHA ', 'ZIKA ',
859		'SICHERSTELL-----^', 'SICHA ', 'ZIKA ',
860		'SICHERZU(GS)--^', 'SICHA ZU ', 'ZIKA ZU ',
861		'SIEGLI-^', 'SIKL', 'ZIKL',
862		'SIGLI-^', 'SIKL', 'ZIKL',
863		'SIGHT', 'SEIT', 'ZEIT',
864		'SIGN', 'SEIN', 'ZEIN',
865		'SKI(NPZ)-', 'SKI', 'ZKI',
866		'SKI<^', 'SHI', 'ZI',
867		'SODASS^$', 'SO DAS', 'ZU TAZ',
868		'SODAß^$', 'SO DAS', 'ZU TAZ',
869		'SOGENAN--^', 'SO GEN', 'ZU KEN',
870		'SOUND-', 'SAUN', 'ZAUN',
871		'STAATS^^', 'STAZ', 'ZTAZ',
872		'STADT^^', 'STAT', 'ZTAT',
873		'STANDE$', ' STANDE', ' ZTANTE',
874		'START^^', 'START', 'ZTART',
875		'STAURANT7', 'STORAN', 'ZTURAN',
876		'STEAK-', 'STE', 'ZTE',
877		'STEPHEN-^$', 'STEW', None,
878		'STERN', 'STERN', None,
879		'STRAF^^', 'STRAF', 'ZTRAF',
880		'ST\'S$', 'Z', 'Z',
881		'ST´S$', 'Z', 'Z',
882		'STST--', '', '',
883		'STS(ACEÈÉÊHIÌÍÎOUÄÜÖ)--', 'ST', 'ZT',
884		'ST(SZ)', 'Z', 'Z',
885		'SPAREN---^', 'SPA', 'ZPA',
886		'SPAREND----', ' SPA', ' ZPA',
887		'S(PTW)-^^', 'S', None,
888		'SP', 'SP', None,
889		'STYN(AE)-$', 'STIN', 'ZTIN',
890		'ST', 'ST', 'ZT',
891		'SUITE<', 'SIUT', 'ZIUT',
892		'SUKE--$', 'S', 'Z',
893		'SURF(EI)-', 'SÖRF', 'ZÖRF',
894		'SV(AEÈÉÊIÌÍÎOU)-<^', 'SW', None,
895		'SYB(IY)--^', 'SIB', None,
896		'SYL(KVW)--^', 'SI', None,
897		'SY9^', 'SÜ', None,
898		'SZE(NPT)-^', 'ZE', 'ZE',
899		'SZI(ELN)-^', 'ZI', 'ZI',
900		'SZCZ<', 'SH', 'Z',
901		'SZT<', 'ST', 'ZT',
902		'SZ<3', 'SH', 'Z',
903		'SÜL(KVW)--^', 'SI', None,
904		'S', None, 'Z',
905		'TCH', 'SH', 'Z',
906		'TD(AÄEIOÖRUÜY)-', 'T', None,
907		'TD(ÀÁÂÃÅÈÉÊËÌÍÎÏÒÓÔÕØÙÚÛÝŸ)-', 'T', None,
908		'TEAT-^', 'TEA', 'TEA',
909		'TERRAI7^', 'TERA', 'TERA',
910		'TE(LMNRST)-3^', 'TE', 'TE',
911		'TH<', 'T', 'T',
912		'TICHT-', 'TIK', 'TIK',
913		'TICH$', 'TIK', 'TIK',
914		'TIC$', 'TIZ', 'TIZ',
915		'TIGGESTELL-------', 'TIK ', 'TIK ',
916		'TIGSTELL-----', 'TIK ', 'TIK ',
917		'TOAS-^', 'TO', 'TU',
918		'TOILET-', 'TOLE', 'TULE',
919		'TOIN-', 'TOA', 'TUA',
920		'TRAECHTI-^', 'TRECHT', 'TREKT',
921		'TRAECHTIG--', ' TRECHT', ' TREKT',
922		'TRAINI-', 'TREN', 'TREN',
923		'TRÄCHTI-^', 'TRECHT', 'TREKT',
924		'TRÄCHTIG--', ' TRECHT', ' TREKT',
925		'TSCH', 'SH', 'Z',
926		'TSH', 'SH', 'Z',
927		'TST', 'ZT', 'ZT',
928		'T(Sß)', 'Z', 'Z',
929		'TT(SZ)--<', '', '',
930		'TT9', 'T', 'T',
931		'TV^$', 'TV', 'TV',
932		'TX(AEIOU)-3', 'SH', 'Z',
933		'TY9^', 'TÜ', None,
934		'TZ-', '', '',
935		'T\'S3$', 'Z', 'Z',
936		'T´S3$', 'Z', 'Z',
937		'UEBEL(GNRW)-^^', 'ÜBL ', 'IBL ',
938		'UEBER^^', 'ÜBA', 'IBA',
939		'UE2', 'Ü', 'I',
940		'UGL-', 'UK', None,
941		'UH(AOÖUÜY)-', 'UH', None,
942		'UIE$', 'Ü', 'I',
943		'UM^^', 'UM', 'UN',
944		'UNTERE--3', 'UNTE', 'UNTE',
945		'UNTER^^', 'UNTA', 'UNTA',
946		'UNVER^^', 'UNFA', 'UNFA',
947		'UN^^', 'UN', 'UN',
948		'UTI(AÄOÖUÜ)-', 'UZI', 'UZI',
949		'UVE-4', 'UW', None,
950		'UY2', 'UI', None,
951		'UZZ', 'AS', 'AZ',
952		'VACL-^', 'WAZ', 'FAZ',
953		'VAC$', 'WAZ', 'FAZ',
954		'VAN DEN ^', 'FANDN', 'FANTN',
955		'VANES-^', 'WANE', None,
956		'VATRO-', 'WATR', None,
957		'VA(DHJNT)--^', 'F', None,
958		'VEDD-^', 'FE', 'FE',
959		'VE(BEHIU)--^', 'F', None,
960		'VEL(BDLMNT)-^', 'FEL', None,
961		'VENTZ-^', 'FEN', None,
962		'VEN(NRSZ)-^', 'FEN', None,
963		'VER(AB)-^$', 'WER', None,
964		'VERBAL^$', 'WERBAL', None,
965		'VERBAL(EINS)-^', 'WERBAL', None,
966		'VERTEBR--', 'WERTE', None,
967		'VEREIN-----', 'F', None,
968		'VEREN(AEIOU)-^', 'WEREN', None,
969		'VERIFI', 'WERIFI', None,
970		'VERON(AEIOU)-^', 'WERON', None,
971		'VERSEN^', 'FERSN', 'FAZN',
972		'VERSIERT--^', 'WERSI', None,
973		'VERSIO--^', 'WERS', None,
974		'VERSUS', 'WERSUS', None,
975		'VERTI(GK)-', 'WERTI', None,
976		'VER^^', 'FER', 'FA',
977		'VERSPRECHE-------', ' FER', ' FA',
978		'VER$', 'WA', None,
979		'VER', 'FA', 'FA',
980		'VET(HT)-^', 'FET', 'FET',
981		'VETTE$', 'WET', 'FET',
982		'VE^', 'WE', None,
983		'VIC$', 'WIZ', 'FIZ',
984		'VIELSAGE----', 'FIL ', 'FIL ',
985		'VIEL', 'FIL', 'FIL',
986		'VIEW', 'WIU', 'FIU',
987		'VILL(AE)-', 'WIL', None,
988		'VIS(ACEIKUVWZ)-<^', 'WIS', None,
989		'VI(ELS)--^', 'F', None,
990		'VILLON--', 'WILI', 'FILI',
991		'VIZE^^', 'FIZE', 'FIZE',
992		'VLIE--^', 'FL', None,
993		'VL(AEIOU)--', 'W', None,
994		'VOKA-^', 'WOK', None,
995		'VOL(ATUVW)--^', 'WO', None,
996		'VOR^^', 'FOR', 'FUR',
997		'VR(AEIOU)--', 'W', None,
998		'VV9', 'W', None,
999		'VY9^', 'WÜ', 'FI',
1000		'V(ÜY)-', 'W', None,
1001		'V(ÀÁÂÃÅÈÉÊÌÍÎÙÚÛ)-', 'W', None,
1002		'V(AEIJLRU)-<', 'W', None,
1003		'V.^', 'V.', None,
1004		'V<', 'F', 'F',
1005		'WEITERENTWI-----^', 'WEITA ', 'FEITA ',
1006		'WEITREICH-----^', 'WEIT ', 'FEIT ',
1007		'WEITVER^', 'WEIT FER', 'FEIT FA',
1008		'WE(LMNRST)-3^', 'WE', 'FE',
1009		'WER(DST)-', 'WER', None,
1010		'WIC$', 'WIZ', 'FIZ',
1011		'WIEDERU--', 'WIDE', 'FITE',
1012		'WIEDER^$', 'WIDA', 'FITA',
1013		'WIEDER^^', 'WIDA ', 'FITA ',
1014		'WIEVIEL', 'WI FIL', 'FI FIL',
1015		'WISUEL', 'WISUEL', None,
1016		'WR-^', 'W', None,
1017		'WY9^', 'WÜ', 'FI',
1018		'W(BDFGJKLMNPQRSTZ)-', 'F', None,
1019		'W$', 'F', None,
1020		'W', None, 'F',
1021		'X<^', 'Z', 'Z',
1022		'XHAVEN$', 'XAFN', None,
1023		'X(CSZ)', 'X', 'X',
1024		'XTS(CH)--', 'XT', 'XT',
1025		'XT(SZ)', 'Z', 'Z',
1026		'YE(LMNRST)-3^', 'IE', 'IE',
1027		'YE-3', 'I', 'I',
1028		'YOR(GK)^$', 'IÖRK', 'IÖRK',
1029		'Y(AOU)-<7', 'I', 'I',
1030		'Y(BKLMNPRSTX)-1', 'Ü', None,
1031		'YVES^$', 'IF', 'IF',
1032		'YVONNE^$', 'IWON', 'IFUN',
1033		'Y.^', 'Y.', None,
1034		'Y', 'I', 'I',
1035		'ZC(AOU)-', 'SK', 'ZK',
1036		'ZE(LMNRST)-3^', 'ZE', 'ZE',
1037		'ZIEJ$', 'ZI', 'ZI',
1038		'ZIGERJA(HR)-3', 'ZIGA IA', 'ZIKA IA',
1039		'ZL(AEIOU)-', 'SL', None,
1040		'ZS(CHT)--', '', '',
1041		'ZS', 'SH', 'Z',
1042		'ZUERST', 'ZUERST', 'ZUERST',
1043		'ZUGRUNDE^$', 'ZU GRUNDE', 'ZU KRUNTE',
1044		'ZUGRUNDE', 'ZU GRUNDE ', 'ZU KRUNTE ',
1045		'ZUGUNSTEN', 'ZU GUNSTN', 'ZU KUNZTN',
1046		'ZUHAUSE-', 'ZU HAUS', 'ZU AUZ',
1047		'ZULASTEN^$', 'ZU LASTN', 'ZU LAZTN',
1048		'ZURUECK^^', 'ZURÜK', 'ZURIK',
1049		'ZURZEIT', 'ZUR ZEIT', 'ZUR ZEIT',
1050		'ZURÜCK^^', 'ZURÜK', 'ZURIK',
1051		'ZUSTANDE', 'ZU STANDE', 'ZU ZTANTE',
1052		'ZUTAGE', 'ZU TAGE', 'ZU TAKE',
1053		'ZUVER^^', 'ZUFA', 'ZUFA',
1054		'ZUVIEL', 'ZU FIL', 'ZU FIL',
1055		'ZUWENIG', 'ZU WENIK', 'ZU FENIK',
1056		'ZY9^', 'ZÜ', None,
1057		'ZYK3$', 'ZIK', None,
1058		'Z(VW)7^', 'SW', None,
1059		None, None, None
1060		# fmt: on
1061		)
1062
1063	1	_upper_trans = dict(
1064		zip(
1065		(
1066		ord(_)
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report The variable `_` does not seem to be defined. Loading history...
1067		for _ in 'abcdefghijklmnopqrstuvwxyzàáâãåäæ'
1068		+ 'çðèéêëìíîïñòóôõöøœšßþùúûüýÿ'
1069		),
1070		'ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÅÄÆ'
1071		+ 'ÇÐÈÉÊËÌÍÎÏÑÒÓÔÕÖØŒŠßÞÙÚÛÜÝŸ',
1072		)
1073		)
1074
1075	1	def encode(self, word, mode=1, lang='de'):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'encode' method Loading history...
1076		"""Return the phonet code for a word.
1077
1078		Args:
1079		word (str): The word to transform
1080		mode (int): The ponet variant to employ (1 or 2)
1081		lang (str): 'de' (default) for German, 'none' for no language
1082
1083		Returns:
1084		str: The phonet value
1085
1086		Examples:
1087		>>> pe = Phonet()
1088		>>> pe.encode('Christopher')
1089		'KRISTOFA'
1090		>>> pe.encode('Niall')
1091		'NIAL'
1092		>>> pe.encode('Smith')
1093		'SMIT'
1094		>>> pe.encode('Schmidt')
1095		'SHMIT'
1096
1097		>>> pe.encode('Christopher', mode=2)
1098		'KRIZTUFA'
1099		>>> pe.encode('Niall', mode=2)
1100		'NIAL'
1101		>>> pe.encode('Smith', mode=2)
1102		'ZNIT'
1103		>>> pe.encode('Schmidt', mode=2)
1104		'ZNIT'
1105
1106		>>> pe.encode('Christopher', lang='none')
1107		'CHRISTOPHER'
1108		>>> pe.encode('Niall', lang='none')
1109		'NIAL'
1110		>>> pe.encode('Smith', lang='none')
1111		'SMITH'
1112		>>> pe.encode('Schmidt', lang='none')
1113		'SCHMIDT'
1114
1115		"""
1116	1	phonet_hash = Counter()
1117	1	alpha_pos = Counter()
1118
1119	1	phonet_hash_1 = Counter()
1120	1	phonet_hash_2 = Counter()
1121
1122	1	def _initialize_phonet(lang):
1123		"""Initialize phonet variables.
1124
1125		Args:
1126		lang (str): Language to use for rules
1127
1128		"""
1129	1	if lang == 'none':
1130	1	_phonet_rules = self._rules_no_lang
1131		else:
1132	1	_phonet_rules = self._rules_german
1133
1134	1	phonet_hash[''] = -1
1135
1136		# German and international umlauts
1137	1	for j in {
1138		'À',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1139		'Á',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1140		'Â',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1141		'Ã',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1142		'Ä',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1143		'Å',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1144		'Æ',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1145		'Ç',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1146		'È',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1147		'É',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1148		'Ê',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1149		'Ë',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1150		'Ì',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1151		'Í',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1152		'Î',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1153		'Ï',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1154		'Ð',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1155		'Ñ',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1156		'Ò',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1157		'Ó',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1158		'Ô',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1159		'Õ',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1160		'Ö',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1161		'Ø',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1162		'Ù',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1163		'Ú',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1164		'Û',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1165		'Ü',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1166		'Ý',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1167		'Þ',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1168		'ß',
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1169		'Œ',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1170		'Š',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1171		'Ÿ',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1172		}:
1173	1	alpha_pos[j] = 1
1174	1	phonet_hash[j] = -1
1175
1176		# "normal" letters ('A'-'Z')
1177	1	for i, j in enumerate('ABCDEFGHIJKLMNOPQRSTUVWXYZ'):
1178	1	alpha_pos[j] = i + 2
1179	1	phonet_hash[j] = -1
1180
1181	1	for i in range(26):
1182	1	for j in range(28):
1183	1	phonet_hash_1[i, j] = -1
1184	1	phonet_hash_2[i, j] = -1
1185
1186		# for each phonetc rule
1187	1	for i in range(len(_phonet_rules)):
		0 ignored issues – show unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Consider using enumerate instead of iterating with range and len Loading history... unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Too many nested blocks (6/5) Loading history...
1188	1	rule = _phonet_rules[i]
1189
1190	1	if rule and i % 3 == 0:
1191		# calculate first hash value
1192	1	k = _phonet_rules[i][0]
1193
1194	1	if phonet_hash[k] < 0 and (
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report The variable `phonet_hash` does not seem to be defined. Loading history...
1195		_phonet_rules[i + 1] or _phonet_rules[i + 2]
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1196		):
1197	1	phonet_hash[k] = i
1198
1199		# calculate second hash values
1200	1	if k and alpha_pos[k] >= 2:
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report The variable `alpha_pos` does not seem to be defined. Loading history...
1201	1	k = alpha_pos[k]
1202
1203	1	j = k - 2
1204	1	rule = rule[1:]
1205
1206	1	if not rule:
1207	1	rule = ' '
1208	1	elif rule[0] == '(':
1209	1	rule = rule[1:]
1210		else:
1211	1	rule = rule[0]
1212
1213	1	while rule and (rule[0] != ')'):
1214	1	k = alpha_pos[rule[0]]
1215
1216	1	if k > 0:
1217		# add hash value for this letter
1218	1	if phonet_hash_1[j, k] < 0:
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report The variable `phonet_hash_1` does not seem to be defined. Loading history...
1219	1	phonet_hash_1[j, k] = i
1220	1	phonet_hash_2[j, k] = i
1221
1222	1	if phonet_hash_2[j, k] >= (i - 30):
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report The variable `phonet_hash_2` does not seem to be defined. Loading history...
1223	1	phonet_hash_2[j, k] = i
1224		else:
1225	1	k = -1
1226
1227	1	if k <= 0:
1228		# add hash value for all letters
1229	1	if phonet_hash_1[j, 0] < 0:
1230	1	phonet_hash_1[j, 0] = i
1231
1232	1	phonet_hash_2[j, 0] = i
1233
1234	1	rule = rule[1:]
1235
1236	1	def _phonet(term, mode, lang):
		0 ignored issues – show Comprehensibility introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report This function exceeds the maximum number of variables (29/15). Loading history...
1237		"""Return the phonet coded form of a term.
1238
1239		Args:
1240		term (str): Term to transform
1241		mode (int): The ponet variant to employ (1 or 2)
1242		lang (str): 'de' (default) for German, 'none' for no language
1243
1244		Returns:
1245		str: The phonet value
1246
1247		"""
1248	1	if lang == 'none':
1249	1	_phonet_rules = self._rules_no_lang
1250		else:
1251	1	_phonet_rules = self._rules_german
1252
1253	1	char0 = ''
1254	1	dest = term
1255
1256	1	if not term:
1257	1	return ''
1258
1259	1	term_length = len(term)
1260
1261		# convert input string to upper-case
1262	1	src = term.translate(self._upper_trans)
1263
1264		# check "src"
1265	1	i = 0
1266	1	j = 0
1267	1	zeta = 0
1268
1269	1	while i < len(src):
		0 ignored issues – show unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Too many nested blocks (6/5) Loading history... unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Too many nested blocks (8/5) Loading history... unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Too many nested blocks (9/5) Loading history... unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Too many nested blocks (7/5) Loading history...
1270	1	char = src[i]
1271
1272	1	pos = alpha_pos[char]
1273
1274	1	if pos >= 2:
1275	1	xpos = pos - 2
1276
1277	1	if i + 1 == len(src):
1278	1	pos = alpha_pos['']
1279		else:
1280	1	pos = alpha_pos[src[i + 1]]
1281
1282	1	start1 = phonet_hash_1[xpos, pos]
1283	1	start2 = phonet_hash_1[xpos, 0]
1284	1	end1 = phonet_hash_2[xpos, pos]
1285	1	end2 = phonet_hash_2[xpos, 0]
1286
1287		# preserve rule priorities
1288	1	if (start2 >= 0) and ((start1 < 0) or (start2 < start1)):
1289	1	pos = start1
1290	1	start1 = start2
1291	1	start2 = pos
1292	1	pos = end1
1293	1	end1 = end2
1294	1	end2 = pos
1295
1296	1	if (end1 >= start2) and (start2 >= 0):
1297	1	if end2 > end1:
1298	1	end1 = end2
1299
1300	1	start2 = -1
1301	1	end2 = -1
1302		else:
1303	1	pos = phonet_hash[char]
1304	1	start1 = pos
1305	1	end1 = 10000
1306	1	start2 = -1
1307	1	end2 = -1
1308
1309	1	pos = start1
1310	1	zeta0 = 0
1311
1312	1	if pos >= 0:
1313		# check rules for this char
1314	1	while (_phonet_rules[pos] is None) or (
1315		_phonet_rules[pos][0] == char
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1316		):
1317	1	if pos > end1:
1318	1	if start2 > 0:
1319	1	pos = start2
1320	1	start1 = start2
1321	1	start2 = -1
1322	1	end1 = end2
1323	1	end2 = -1
1324	1	continue
1325
1326	1	break
1327
1328	1	if (_phonet_rules[pos] is None) or (
1329		_phonet_rules[pos + mode] is None
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1330		):
1331		# no conversion rule available
1332	1	pos += 3
1333	1	continue
1334
1335		# check whole string
1336	1	matches = 1 # number of matching letters
1337	1	priority = 5 # default priority
1338	1	rule = _phonet_rules[pos]
1339	1	rule = rule[1:]
1340
1341	1	while (
1342		rule
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1343		and (len(src) > (i + matches))
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1344		and (src[i + matches] == rule[0])
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1345		and not rule[0].isdigit()
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1346		and (rule not in '(-<^$')
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1347		):
1348	1	matches += 1
1349	1	rule = rule[1:]
1350
1351	1	if rule and (rule[0] == '('):
1352		# check an array of letters
1353	1	if (
1354		(len(src) > (i + matches))
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1355		and src[i + matches].isalpha()
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1356		and (src[i + matches] in rule[1:])
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1357		):
1358	1	matches += 1
1359
1360	1	while rule and rule[0] != ')':
1361	1	rule = rule[1:]
1362
1363		# if rule[0] == ')':
1364	1	rule = rule[1:]
1365
1366	1	if rule:
1367	1	priority0 = ord(rule[0])
1368		else:
1369	1	priority0 = 0
1370
1371	1	matches0 = matches
1372
1373	1	while rule and rule[0] == '-' and matches > 1:
1374	1	matches -= 1
1375	1	rule = rule[1:]
1376
1377	1	if rule and rule[0] == '<':
1378	1	rule = rule[1:]
1379
1380	1	if rule and rule[0].isdigit():
1381		# read priority
1382	1	priority = int(rule[0])
1383	1	rule = rule[1:]
1384
1385	1	if rule and rule[0:2] == '^^':
1386	1	rule = rule[1:]
1387
1388	1	if (
1389		not rule
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history... best-practice introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Too many boolean expressions in if statement (12/5) Loading history...
1390		or (
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1391		(rule[0] == '^')
1392		and ((i == 0) or not src[i - 1].isalpha())
1393		and (
1394		(rule[1:2] != '$')
1395		or (
1396		not (
1397		src[
1398		i + matches0 : i + matches0 + 1
1399		].isalpha()
1400		)
1401		and (
1402		src[
1403		i + matches0 : i + matches0 + 1
1404		]
1405		!= '.'
1406		)
1407		)
1408		)
1409		)
1410		or (
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1411		(rule[0] == '$')
1412		and (i > 0)
1413		and src[i - 1].isalpha()
1414		and (
1415		(
1416		not src[
1417		i + matches0 : i + matches0 + 1
1418		].isalpha()
1419		)
1420		and (
1421		src[i + matches0 : i + matches0 + 1]
1422		!= '.'
1423		)
1424		)
1425		)
1426		):
1427		# look for continuation, if:
1428		# matches > 1 und NO '-' in first string */
1429	1	pos0 = -1
1430
1431	1	start3 = 0
1432	1	start4 = 0
1433	1	end3 = 0
1434	1	end4 = 0
1435
1436	1	if (
1437		(matches > 1)
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1438		and src[i + matches : i + matches + 1]
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1439		and (priority0 != ord('-'))
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1440		):
1441	1	char0 = src[i + matches - 1]
1442	1	pos0 = alpha_pos[char0]
1443
1444	1	if pos0 >= 2 and src[i + matches]:
1445	1	xpos = pos0 - 2
1446	1	pos0 = alpha_pos[src[i + matches]]
1447	1	start3 = phonet_hash_1[xpos, pos0]
1448	1	start4 = phonet_hash_1[xpos, 0]
1449	1	end3 = phonet_hash_2[xpos, pos0]
1450	1	end4 = phonet_hash_2[xpos, 0]
1451
1452		# preserve rule priorities
1453	1	if (start4 >= 0) and (
1454		(start3 < 0) or (start4 < start3)
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1455		):
1456	1	pos0 = start3
1457	1	start3 = start4
1458	1	start4 = pos0
1459	1	pos0 = end3
1460	1	end3 = end4
1461	1	end4 = pos0
1462
1463	1	if (end3 >= start4) and (start4 >= 0):
1464	1	if end4 > end3:
1465	1	end3 = end4
1466
1467	1	start4 = -1
1468	1	end4 = -1
1469		else:
1470	1	pos0 = phonet_hash[char0]
1471	1	start3 = pos0
1472	1	end3 = 10000
1473	1	start4 = -1
1474	1	end4 = -1
1475
1476	1	pos0 = start3
1477
1478		# check continuation rules for src[i+matches]
1479	1	if pos0 >= 0:
1480	1	while (_phonet_rules[pos0] is None) or (
1481		_phonet_rules[pos0][0] == char0
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history... introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Value '_phonet_rules[pos0]' is unsubscriptable Loading history...
1482		):
1483	1	if pos0 > end3:
1484	1	if start4 > 0:
1485	1	pos0 = start4
1486	1	start3 = start4
1487	1	start4 = -1
1488	1	end3 = end4
1489	1	end4 = -1
1490	1	continue
1491
1492	1	priority0 = -1
1493
1494		# important
1495	1	break
1496
1497	1	if (_phonet_rules[pos0] is None) or (
1498		_phonet_rules[pos0 + mode] is None
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1499		):
1500		# no conversion rule available
1501	1	pos0 += 3
1502	1	continue
1503
1504		# check whole string
1505	1	matches0 = matches
1506	1	priority0 = 5
1507	1	rule = _phonet_rules[pos0]
1508	1	rule = rule[1:]
		0 ignored issues – show introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Value 'rule' is unsubscriptable Loading history...
1509
1510	1	while (
1511		rule
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1512		and (
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1513		src[
1514		i + matches0 : i + matches0 + 1
1515		]
1516		== rule[0]
1517		)
1518		and (
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1519		not rule[0].isdigit()
1520		or (rule in '(-<^$')
1521		)
1522		):
1523	1	matches0 += 1
1524	1	rule = rule[1:]
1525
1526	1	if rule and rule[0] == '(':
1527		# check an array of letters
1528	1	if src[
1529		i + matches0 : i + matches0 + 1
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1530		].isalpha() and (
1531		src[i + matches0] in rule[1:]
1532		):
1533	1	matches0 += 1
1534
1535	1	while rule and rule[0] != ')':
1536	1	rule = rule[1:]
1537
1538		# if rule[0] == ')':
1539	1	rule = rule[1:]
1540
1541	1	while rule and rule[0] == '-':
1542		# "matches0" is NOT decremented
1543		# because of
1544		# "if (matches0 == matches)"
1545	1	rule = rule[1:]
1546
1547	1	if rule and rule[0] == '<':
1548	1	rule = rule[1:]
1549
1550	1	if rule and rule[0].isdigit():
1551	1	priority0 = int(rule[0])
1552	1	rule = rule[1:]
1553
1554	1	if (
1555		not rule
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1556		or
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1557		# rule == '^' is not possible here
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1558		(
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1559		(rule[0] == '$')
1560		and not src[
1561		i + matches0 : i + matches0 + 1
1562		].isalpha()
1563		and (
1564		src[
1565		i
1566		+ matches0 : i
1567		+ matches0
1568		+ 1
1569		]
1570		!= '.'
1571		)
1572		)
1573		):
1574	1	if matches0 == matches:
1575		# this is only a partial string
1576	1	pos0 += 3
1577	1	continue
1578
1579	1	if priority0 < priority:
1580		# priority is too low
1581	1	pos0 += 3
1582	1	continue
1583
1584		# continuation rule found
1585	1	break
1586
1587	1	pos0 += 3
1588
1589		# end of "while"
1590	1	if (priority0 >= priority) and (
1591		(_phonet_rules[pos0] is not None)
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1592		and (_phonet_rules[pos0][0] == char0)
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history... introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Value '_phonet_rules[pos0]' is unsubscriptable Loading history...
1593		):
1594
1595	1	pos += 3
1596	1	continue
1597
1598		# replace string
1599	1	if _phonet_rules[pos] and (
1600		'<' in _phonet_rules[pos][1:]
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1601		):
1602	1	priority0 = 1
1603		else:
1604	1	priority0 = 0
1605
1606	1	rule = _phonet_rules[pos + mode]
1607
1608	1	if (priority0 == 1) and (zeta == 0):
1609		# rule with '<' is applied
1610	1	if (
1611		(j > 0)
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1612		and rule
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1613		and (
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1614		(dest[j - 1] == char)
1615		or (dest[j - 1] == rule[0])
1616		)
1617		):
1618	1	j -= 1
1619
1620	1	zeta0 = 1
1621	1	zeta += 1
1622	1	matches0 = 0
1623
1624	1	while rule and src[i + matches0]:
1625	1	src = (
1626		src[0 : i + matches0]
1627		+ rule[0]
1628		+ src[i + matches0 + 1 :]
1629		)
1630	1	matches0 += 1
1631	1	rule = rule[1:]
1632
1633	1	if matches0 < matches:
1634	1	src = (
1635		src[0 : i + matches0]
1636		+ src[i + matches :]
1637		)
1638
1639	1	char = src[i]
1640		else:
1641	1	i = i + matches - 1
1642	1	zeta = 0
1643
1644	1	while len(rule) > 1:
1645	1	if (j == 0) or (dest[j - 1] != rule[0]):
1646	1	dest = (
1647		dest[0:j]
1648		+ rule[0]
1649		+ dest[min(len(dest), j + 1) :]
1650		)
1651	1	j += 1
1652
1653	1	rule = rule[1:]
1654
1655		# new "current char"
1656	1	if not rule:
1657	1	rule = ''
1658	1	char = ''
1659		else:
1660	1	char = rule[0]
1661
1662	1	if (
1663		_phonet_rules[pos]
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1664		and '^^' in _phonet_rules[pos][1:]
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
1665		):
1666	1	if char:
1667	1	dest = (
1668		dest[0:j]
1669		+ char
1670		+ dest[min(len(dest), j + 1) :]
1671		)
1672	1	j += 1
1673
1674	1	src = src[i + 1 :]
1675	1	i = 0
1676	1	zeta0 = 1
1677
1678	1	break
1679
1680	1	pos += 3
1681
1682	1	if pos > end1 and start2 > 0:
1683	1	pos = start2
1684	1	start1 = start2
1685	1	end1 = end2
1686	1	start2 = -1
1687	1	end2 = -1
1688
1689	1	if zeta0 == 0:
1690	1	if char and ((j == 0) or (dest[j - 1] != char)):
1691		# delete multiple letters only
1692	1	dest = (
1693		dest[0:j] + char + dest[min(j + 1, term_length) :]
1694		)
1695	1	j += 1
1696
1697	1	i += 1
1698	1	zeta = 0
1699
1700	1	dest = dest[0:j]
1701
1702	1	return dest
1703
1704	1	_initialize_phonet(lang)
1705
1706	1	word = unicode_normalize('NFKC', text_type(word))
1707	1	return _phonet(word, mode, lang)
1708
1709
1710	1	def phonet(word, mode=1, lang='de'):
1711		"""Return the phonet code for a word.
1712
1713		This is a wrapper for :py:meth:`Phonet.encode`.
1714
1715		Args:
1716		word (str): The word to transform
1717		mode (int): The ponet variant to employ (1 or 2)
1718		lang (str): 'de' (default) for German, 'none' for no language
1719
1720		Returns:
1721		str: The phonet value
1722
1723		Examples:
1724		>>> phonet('Christopher')
1725		'KRISTOFA'
1726		>>> phonet('Niall')
1727		'NIAL'
1728		>>> phonet('Smith')
1729		'SMIT'
1730		>>> phonet('Schmidt')
1731		'SHMIT'
1732
1733		>>> phonet('Christopher', mode=2)
1734		'KRIZTUFA'
1735		>>> phonet('Niall', mode=2)
1736		'NIAL'
1737		>>> phonet('Smith', mode=2)
1738		'ZNIT'
1739		>>> phonet('Schmidt', mode=2)
1740		'ZNIT'
1741
1742		>>> phonet('Christopher', lang='none')
1743		'CHRISTOPHER'
1744		>>> phonet('Niall', lang='none')
1745		'NIAL'
1746		>>> phonet('Smith', lang='none')
1747		'SMITH'
1748		>>> phonet('Schmidt', lang='none')
1749		'SCHMIDT'
1750
1751		"""
1752	1	return Phonet().encode(word, mode, lang)
1753
1754
1755		if __name__ == '__main__':
1756		import doctest
1757
1758		doctest.testmod()
1759

chrislit / abydos

Pull Request — master (#141)

abydos.phonetic._Phonet.Phonet.encode() F

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like