| 1 |  |  | # -*- coding: utf-8 -*- | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | # Copyright 2014-2018 by Christopher C. Little. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | # This file is part of Abydos. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | # | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  | # Abydos is free software: you can redistribute it and/or modify | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | # it under the terms of the GNU General Public License as published by | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  | # the Free Software Foundation, either version 3 of the License, or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | # (at your option) any later version. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | # | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  | # Abydos is distributed in the hope that it will be useful, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  | # but WITHOUT ANY WARRANTY; without even the implied warranty of | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  | # GNU General Public License for more details. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  | # | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  | # You should have received a copy of the GNU General Public License | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  | # along with Abydos. If not, see <http://www.gnu.org/licenses/>. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  | """abydos.phonetic. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  | The phonetic module implements phonetic algorithms including: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |     - Robert C. Russell's Index | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |     - American Soundex | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |     - Refined Soundex | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |     - Daitch-Mokotoff Soundex | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |     - Kölner Phonetik | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |     - NYSIIS | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |     - Match Rating Algorithm | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |     - Metaphone | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |     - Double Metaphone | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |     - Caverphone | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |     - Alpha Search Inquiry System | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |     - Fuzzy Soundex | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |     - Phonex | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |     - Phonem | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |     - Phonix | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |     - SfinxBis | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |     - phonet | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |     - Standardized Phonetic Frequency Code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |     - Statistics Canada | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |     - Lein | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |     - Roger Root | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |     - Oxford Name Compression Algorithm (ONCA) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |     - Eudex phonetic hash | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |     - Haase Phonetik | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |     - Reth-Schek Phonetik | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |     - FONEM | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |     - Parmar-Kumbharana | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |     - Davidson's Consonant Code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |     - SoundD | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |     - PSHP Soundex/Viewex Coding | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |     - an early version of Henry Code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |     - Norphone | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |     - Dolby Code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |     - Phonetic Spanish | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |     - Spanish Metaphone | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |     - MetaSoundex | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |     - Beider-Morse Phonetic Matching | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  | """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  | from __future__ import division, unicode_literals | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  | from collections import Counter | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  | from itertools import groupby, product | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  | from re import compile as re_compile | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  | from unicodedata import normalize | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  | from six import text_type | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  | from six.moves import range | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  | from ._bm import _bmpm | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  | _INFINITY = float('inf') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  | def _delete_consecutive_repeats(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |     """Delete consecutive repeated characters in a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |     :returns: word with consecutive repeating characters collapsed to | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |         a single instance | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |     return ''.join(char for char, _ in groupby(word)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  | def russell_index(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |     """Return the Russell Index (integer output) of a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |     This follows Robert C. Russell's Index algorithm, as described in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |     :cite:`Russell:1917`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |     :returns: the Russell Index value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |     :rtype: int | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |     >>> russell_index('Christopher') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |     3813428 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |     >>> russell_index('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |     715 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |     >>> russell_index('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |     3614 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |     >>> russell_index('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |     3614 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |     _russell_translation = dict(zip((ord(_) for _ in | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |                                      'ABCDEFGIKLMNOPQRSTUVXYZ'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |                                     '12341231356712383412313')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |     word = normalize('NFKD', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |     word = word.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |     word = word.replace('GH', '')  # discard gh (rule 3) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |     word = word.rstrip('SZ')  # discard /[sz]$/ (rule 3) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |     # translate according to Russell's mapping | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |     word = ''.join(c for c in word if c in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |                    {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'I', 'K', 'L', 'M', 'N', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |                     'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'X', 'Y', 'Z'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |     sdx = word.translate(_russell_translation) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |     # remove any 1s after the first occurrence | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |     one = sdx.find('1')+1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |     if one: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |         sdx = sdx[:one] + ''.join(c for c in sdx[one:] if c != '1') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |     # remove repeating characters | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |     sdx = _delete_consecutive_repeats(sdx) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |     # return as an int | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |     return int(sdx) if sdx else float('NaN') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  | def russell_index_num_to_alpha(num): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |     """Convert the Russell Index integer to an alphabetic string. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |     This follows Robert C. Russell's Index algorithm, as described in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |     :cite:`Russell:1917`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  |     :param int num: a Russell Index integer value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |     :returns: the Russell Index as an alphabetic string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  |     >>> russell_index_num_to_alpha(3813428) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |     'CRACDBR' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 |  |  |     >>> russell_index_num_to_alpha(715) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 |  |  |     'NAL' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  |     >>> russell_index_num_to_alpha(3614) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  |     'CMAD' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  |     _russell_num_translation = dict(zip((ord(_) for _ in '12345678'), | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  |                                         'ABCDLMNR')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |     num = ''.join(c for c in text_type(num) if c in {'1', '2', '3', '4', '5', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  |                                                      '6', '7', '8'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 |  |  |     if num: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |         return num.translate(_russell_num_translation) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  |     return '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  | def russell_index_alpha(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  |     """Return the Russell Index (alphabetic output) for the word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 |  |  |     This follows Robert C. Russell's Index algorithm, as described in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 |  |  |     :cite:`Russell:1917`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 |  |  |     :returns: the Russell Index value as an alphabetic string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 168 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 169 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 170 |  |  |     >>> russell_index_alpha('Christopher') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 171 |  |  |     'CRACDBR' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 172 |  |  |     >>> russell_index_alpha('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 173 |  |  |     'NAL' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 174 |  |  |     >>> russell_index_alpha('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 175 |  |  |     'CMAD' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 176 |  |  |     >>> russell_index_alpha('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 177 |  |  |     'CMAD' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 178 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 179 |  |  |     if word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 180 |  |  |         return russell_index_num_to_alpha(russell_index(word)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 181 |  |  |     return '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 182 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 183 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 184 |  |  | def soundex(word, maxlength=4, var='American', reverse=False, zero_pad=True): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 185 |  |  |     """Return the Soundex code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 186 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 187 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 188 |  |  |     :param int maxlength: the length of the code returned (defaults to 4) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 189 |  |  |     :param str var: the variant of the algorithm to employ (defaults to | 
            
                                                                                                            
                            
            
                                    
            
            
                | 190 |  |  |         'American'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 191 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 192 |  |  |         - 'American' follows the American Soundex algorithm, as described at | 
            
                                                                                                            
                            
            
                                    
            
            
                | 193 |  |  |           :cite:`US:2007` and in :cite:`Knuth:1998`; this is also called | 
            
                                                                                                            
                            
            
                                    
            
            
                | 194 |  |  |           Miracode | 
            
                                                                                                            
                            
            
                                    
            
            
                | 195 |  |  |         - 'special' follows the rules from the 1880-1910 US Census | 
            
                                                                                                            
                            
            
                                    
            
            
                | 196 |  |  |           retrospective re-analysis, in which h & w are not treated as blocking | 
            
                                                                                                            
                            
            
                                    
            
            
                | 197 |  |  |           consonants but as vowels. Cf. :cite:`Repici:2013`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 198 |  |  |         - 'Census' follows the rules laid out in GIL 55 :cite:`US:1997` by the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 199 |  |  |           US Census, including coding prefixed and unprefixed versions of some | 
            
                                                                                                            
                            
            
                                    
            
            
                | 200 |  |  |           names | 
            
                                                                                                            
                            
            
                                    
            
            
                | 201 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 202 |  |  |     :param bool reverse: reverse the word before computing the selected Soundex | 
            
                                                                                                            
                            
            
                                    
            
            
                | 203 |  |  |         (defaults to False); This results in "Reverse Soundex" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 204 |  |  |     :param bool zero_pad: pad the end of the return value with 0s to achieve a | 
            
                                                                                                            
                            
            
                                    
            
            
                | 205 |  |  |         maxlength string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 206 |  |  |     :returns: the Soundex value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 207 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 208 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 209 |  |  |     >>> soundex("Christopher") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 210 |  |  |     'C623' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 211 |  |  |     >>> soundex("Niall") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 212 |  |  |     'N400' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 213 |  |  |     >>> soundex('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 214 |  |  |     'S530' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 215 |  |  |     >>> soundex('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 216 |  |  |     'S530' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 217 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 218 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 219 |  |  |     >>> soundex('Christopher', maxlength=_INFINITY) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 220 |  |  |     'C623160000000000000000000000000000000000000000000000000000000000' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 221 |  |  |     >>> soundex('Christopher', maxlength=_INFINITY, zero_pad=False) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 222 |  |  |     'C62316' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 223 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 224 |  |  |     >>> soundex('Christopher', reverse=True) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 225 |  |  |     'R132' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 226 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 227 |  |  |     >>> soundex('Ashcroft') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 228 |  |  |     'A261' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 229 |  |  |     >>> soundex('Asicroft') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 230 |  |  |     'A226' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 231 |  |  |     >>> soundex('Ashcroft', var='special') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 232 |  |  |     'A226' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 233 |  |  |     >>> soundex('Asicroft', var='special') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 234 |  |  |     'A226' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 235 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 236 |  |  |     _soundex_translation = dict(zip((ord(_) for _ in | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 237 |  |  |                                      'ABCDEFGHIJKLMNOPQRSTUVWXYZ'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 238 |  |  |                                     '01230129022455012623019202')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 239 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 240 |  |  |     # Require a maxlength of at least 4 and not more than 64 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 241 |  |  |     if maxlength is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 242 |  |  |         maxlength = min(max(4, maxlength), 64) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 243 |  |  |     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 244 |  |  |         maxlength = 64 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 245 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 246 |  |  |     # uppercase, normalize, decompose, and filter non-A-Z out | 
            
                                                                                                            
                            
            
                                    
            
            
                | 247 |  |  |     word = normalize('NFKD', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 248 |  |  |     word = word.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 249 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 250 |  |  |     if var == 'Census': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 251 |  |  |         # Should these prefixes be supplemented? (VANDE, DELA, VON) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 252 |  |  |         if word[:3] in {'VAN', 'CON'} and len(word) > 4: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 253 |  |  |             return (soundex(word, maxlength, 'American', reverse, zero_pad), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 254 |  |  |                     soundex(word[3:], maxlength, 'American', reverse, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 255 |  |  |                             zero_pad)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 256 |  |  |         if word[:2] in {'DE', 'DI', 'LA', 'LE'} and len(word) > 3: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 257 |  |  |             return (soundex(word, maxlength, 'American', reverse, zero_pad), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 258 |  |  |                     soundex(word[2:], maxlength, 'American', reverse, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 259 |  |  |                             zero_pad)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 260 |  |  |         # Otherwise, proceed as usual (var='American' mode, ostensibly) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 261 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 262 |  |  |     word = ''.join(c for c in word if c in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 263 |  |  |                    {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 264 |  |  |                     'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 265 |  |  |                     'Y', 'Z'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 266 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 267 |  |  |     # Nothing to convert, return base case | 
            
                                                                                                            
                            
            
                                    
            
            
                | 268 |  |  |     if not word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 269 |  |  |         if zero_pad: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 270 |  |  |             return '0'*maxlength | 
            
                                                                                                            
                            
            
                                    
            
            
                | 271 |  |  |         return '0' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 272 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 273 |  |  |     # Reverse word if computing Reverse Soundex | 
            
                                                                                                            
                            
            
                                    
            
            
                | 274 |  |  |     if reverse: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 275 |  |  |         word = word[::-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 276 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 277 |  |  |     # apply the Soundex algorithm | 
            
                                                                                                            
                            
            
                                    
            
            
                | 278 |  |  |     sdx = word.translate(_soundex_translation) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 279 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 280 |  |  |     if var == 'special': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 281 |  |  |         sdx = sdx.replace('9', '0')  # special rule for 1880-1910 census | 
            
                                                                                                            
                            
            
                                    
            
            
                | 282 |  |  |     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 283 |  |  |         sdx = sdx.replace('9', '')  # rule 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 284 |  |  |     sdx = _delete_consecutive_repeats(sdx)  # rule 3 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 285 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 286 |  |  |     if word[0] in 'HW': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 287 |  |  |         sdx = word[0] + sdx | 
            
                                                                                                            
                            
            
                                    
            
            
                | 288 |  |  |     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 289 |  |  |         sdx = word[0] + sdx[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 290 |  |  |     sdx = sdx.replace('0', '')  # rule 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 291 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 292 |  |  |     if zero_pad: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 293 |  |  |         sdx += ('0'*maxlength)  # rule 4 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 294 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 295 |  |  |     return sdx[:maxlength] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 296 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 297 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 298 |  |  | def refined_soundex(word, maxlength=_INFINITY, reverse=False, zero_pad=False, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 299 |  |  |                     retain_vowels=False): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 300 |  |  |     """Return the Refined Soundex code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 301 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 302 |  |  |     This is Soundex, but with more character classes. It was defined at | 
            
                                                                                                            
                            
            
                                    
            
            
                | 303 |  |  |     :cite:`Boyce:1998`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 304 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 305 |  |  |     :param word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 306 |  |  |     :param maxlength: the length of the code returned (defaults to unlimited) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 307 |  |  |     :param reverse: reverse the word before computing the selected Soundex | 
            
                                                                                                            
                            
            
                                    
            
            
                | 308 |  |  |         (defaults to False); This results in "Reverse Soundex" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 309 |  |  |     :param zero_pad: pad the end of the return value with 0s to achieve a | 
            
                                                                                                            
                            
            
                                    
            
            
                | 310 |  |  |         maxlength string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 311 |  |  |     :param retain_vowels: retain vowels (as 0) in the resulting code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 312 |  |  |     :returns: the Refined Soundex value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 313 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 314 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 315 |  |  |     >>> refined_soundex('Christopher') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 316 |  |  |     'C393619' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 317 |  |  |     >>> refined_soundex('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 318 |  |  |     'N87' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 319 |  |  |     >>> refined_soundex('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 320 |  |  |     'S386' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 321 |  |  |     >>> refined_soundex('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 322 |  |  |     'S386' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 323 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 324 |  |  |     _ref_soundex_translation = dict(zip((ord(_) for _ in | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 325 |  |  |                                          'ABCDEFGHIJKLMNOPQRSTUVWXYZ'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 326 |  |  |                                         '01360240043788015936020505')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 327 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 328 |  |  |     # uppercase, normalize, decompose, and filter non-A-Z out | 
            
                                                                                                            
                            
            
                                    
            
            
                | 329 |  |  |     word = normalize('NFKD', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 330 |  |  |     word = word.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 331 |  |  |     word = ''.join(c for c in word if c in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 332 |  |  |                    {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 333 |  |  |                     'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 334 |  |  |                     'Y', 'Z'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 335 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 336 |  |  |     # Reverse word if computing Reverse Soundex | 
            
                                                                                                            
                            
            
                                    
            
            
                | 337 |  |  |     if reverse: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 338 |  |  |         word = word[::-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 339 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 340 |  |  |     # apply the Soundex algorithm | 
            
                                                                                                            
                            
            
                                    
            
            
                | 341 |  |  |     sdx = word[:1] + word.translate(_ref_soundex_translation) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 342 |  |  |     sdx = _delete_consecutive_repeats(sdx) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 343 |  |  |     if not retain_vowels: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 344 |  |  |         sdx = sdx.replace('0', '')  # Delete vowels, H, W, Y | 
            
                                                                                                            
                            
            
                                    
            
            
                | 345 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 346 |  |  |     if maxlength < _INFINITY: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 347 |  |  |         if zero_pad: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 348 |  |  |             sdx += ('0' * maxlength) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 349 |  |  |         if maxlength: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 350 |  |  |             sdx = sdx[:maxlength] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 351 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 352 |  |  |     return sdx | 
            
                                                                                                            
                            
            
                                    
            
            
                | 353 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 354 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 355 |  |  | def dm_soundex(word, maxlength=6, reverse=False, zero_pad=True): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 356 |  |  |     """Return the Daitch-Mokotoff Soundex code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 357 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 358 |  |  |     Based on Daitch-Mokotoff Soundex :cite:`Mokotoff:1997`, this returns values | 
            
                                                                                                            
                            
            
                                    
            
            
                | 359 |  |  |     of a word as a set. A collection is necessary since there can be multiple | 
            
                                                                                                            
                            
            
                                    
            
            
                | 360 |  |  |     values for a single word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 361 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 362 |  |  |     :param word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 363 |  |  |     :param maxlength: the length of the code returned (defaults to 6) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 364 |  |  |     :param reverse: reverse the word before computing the selected Soundex | 
            
                                                                                                            
                            
            
                                    
            
            
                | 365 |  |  |         (defaults to False); This results in "Reverse Soundex" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 366 |  |  |     :param zero_pad: pad the end of the return value with 0s to achieve a | 
            
                                                                                                            
                            
            
                                    
            
            
                | 367 |  |  |         maxlength string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 368 |  |  |     :returns: the Daitch-Mokotoff Soundex value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 369 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 370 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 371 |  |  |     >>> sorted(dm_soundex('Christopher')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 372 |  |  |     ['494379', '594379'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 373 |  |  |     >>> dm_soundex('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 374 |  |  |     {'680000'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 375 |  |  |     >>> dm_soundex('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 376 |  |  |     {'463000'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 377 |  |  |     >>> dm_soundex('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 378 |  |  |     {'463000'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 379 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 380 |  |  |     >>> sorted(dm_soundex('The quick brown fox', maxlength=20, zero_pad=False)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 381 |  |  |     ['35457976754', '3557976754'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 382 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 383 |  |  |     _dms_table = {'STCH': (2, 4, 4), 'DRZ': (4, 4, 4), 'ZH': (4, 4, 4), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 384 |  |  |                   'ZHDZH': (2, 4, 4), 'DZH': (4, 4, 4), 'DRS': (4, 4, 4), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 385 |  |  |                   'DZS': (4, 4, 4), 'SCHTCH': (2, 4, 4), 'SHTSH': (2, 4, 4), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 386 |  |  |                   'SZCZ': (2, 4, 4), 'TZS': (4, 4, 4), 'SZCS': (2, 4, 4), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 387 |  |  |                   'STSH': (2, 4, 4), 'SHCH': (2, 4, 4), 'D': (3, 3, 3), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 388 |  |  |                   'H': (5, 5, '_'), 'TTSCH': (4, 4, 4), 'THS': (4, 4, 4), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 389 |  |  |                   'L': (8, 8, 8), 'P': (7, 7, 7), 'CHS': (5, 54, 54), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 390 |  |  |                   'T': (3, 3, 3), 'X': (5, 54, 54), 'OJ': (0, 1, '_'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 391 |  |  |                   'OI': (0, 1, '_'), 'SCHTSH': (2, 4, 4), 'OY': (0, 1, '_'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 392 |  |  |                   'Y': (1, '_', '_'), 'TSH': (4, 4, 4), 'ZDZ': (2, 4, 4), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 393 |  |  |                   'TSZ': (4, 4, 4), 'SHT': (2, 43, 43), 'SCHTSCH': (2, 4, 4), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 394 |  |  |                   'TTSZ': (4, 4, 4), 'TTZ': (4, 4, 4), 'SCH': (4, 4, 4), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 395 |  |  |                   'TTS': (4, 4, 4), 'SZD': (2, 43, 43), 'AI': (0, 1, '_'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 396 |  |  |                   'PF': (7, 7, 7), 'TCH': (4, 4, 4), 'PH': (7, 7, 7), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 397 |  |  |                   'TTCH': (4, 4, 4), 'SZT': (2, 43, 43), 'ZDZH': (2, 4, 4), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 398 |  |  |                   'EI': (0, 1, '_'), 'G': (5, 5, 5), 'EJ': (0, 1, '_'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 399 |  |  |                   'ZD': (2, 43, 43), 'IU': (1, '_', '_'), 'K': (5, 5, 5), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 400 |  |  |                   'O': (0, '_', '_'), 'SHTCH': (2, 4, 4), 'S': (4, 4, 4), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 401 |  |  |                   'TRZ': (4, 4, 4), 'SHD': (2, 43, 43), 'DSH': (4, 4, 4), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 402 |  |  |                   'CSZ': (4, 4, 4), 'EU': (1, 1, '_'), 'TRS': (4, 4, 4), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 403 |  |  |                   'ZS': (4, 4, 4), 'STRZ': (2, 4, 4), 'UY': (0, 1, '_'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 404 |  |  |                   'STRS': (2, 4, 4), 'CZS': (4, 4, 4), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 405 |  |  |                   'MN': ('6_6', '6_6', '6_6'), 'UI': (0, 1, '_'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 406 |  |  |                   'UJ': (0, 1, '_'), 'UE': (0, '_', '_'), 'EY': (0, 1, '_'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 407 |  |  |                   'W': (7, 7, 7), 'IA': (1, '_', '_'), 'FB': (7, 7, 7), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 408 |  |  |                   'STSCH': (2, 4, 4), 'SCHT': (2, 43, 43), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 409 |  |  |                   'NM': ('6_6', '6_6', '6_6'), 'SCHD': (2, 43, 43), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 410 |  |  |                   'B': (7, 7, 7), 'DSZ': (4, 4, 4), 'F': (7, 7, 7), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 411 |  |  |                   'N': (6, 6, 6), 'CZ': (4, 4, 4), 'R': (9, 9, 9), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 412 |  |  |                   'U': (0, '_', '_'), 'V': (7, 7, 7), 'CS': (4, 4, 4), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 413 |  |  |                   'Z': (4, 4, 4), 'SZ': (4, 4, 4), 'TSCH': (4, 4, 4), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 414 |  |  |                   'KH': (5, 5, 5), 'ST': (2, 43, 43), 'KS': (5, 54, 54), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 415 |  |  |                   'SH': (4, 4, 4), 'SC': (2, 4, 4), 'SD': (2, 43, 43), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 416 |  |  |                   'DZ': (4, 4, 4), 'ZHD': (2, 43, 43), 'DT': (3, 3, 3), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 417 |  |  |                   'ZSH': (4, 4, 4), 'DS': (4, 4, 4), 'TZ': (4, 4, 4), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 418 |  |  |                   'TS': (4, 4, 4), 'TH': (3, 3, 3), 'TC': (4, 4, 4), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 419 |  |  |                   'A': (0, '_', '_'), 'E': (0, '_', '_'), 'I': (0, '_', '_'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 420 |  |  |                   'AJ': (0, 1, '_'), 'M': (6, 6, 6), 'Q': (5, 5, 5), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 421 |  |  |                   'AU': (0, 7, '_'), 'IO': (1, '_', '_'), 'AY': (0, 1, '_'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 422 |  |  |                   'IE': (1, '_', '_'), 'ZSCH': (4, 4, 4), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 423 |  |  |                   'CH': ((5, 4), (5, 4), (5, 4)), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 424 |  |  |                   'CK': ((5, 45), (5, 45), (5, 45)), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 425 |  |  |                   'C': ((5, 4), (5, 4), (5, 4)), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 426 |  |  |                   'J': ((1, 4), ('_', 4), ('_', 4)), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 427 |  |  |                   'RZ': ((94, 4), (94, 4), (94, 4)), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 428 |  |  |                   'RS': ((94, 4), (94, 4), (94, 4))} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 429 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 430 |  |  |     _dms_order = {'A': ('AI', 'AJ', 'AU', 'AY', 'A'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 431 |  |  |                   'B': ('B'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 432 |  |  |                   'C': ('CHS', 'CSZ', 'CZS', 'CH', 'CK', 'CS', 'CZ', 'C'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 433 |  |  |                   'D': ('DRS', 'DRZ', 'DSH', 'DSZ', 'DZH', 'DZS', 'DS', 'DT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 434 |  |  |                         'DZ', 'D'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 435 |  |  |                   'E': ('EI', 'EJ', 'EU', 'EY', 'E'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 436 |  |  |                   'F': ('FB', 'F'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 437 |  |  |                   'G': ('G'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 438 |  |  |                   'H': ('H'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 439 |  |  |                   'I': ('IA', 'IE', 'IO', 'IU', 'I'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 440 |  |  |                   'J': ('J'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 441 |  |  |                   'K': ('KH', 'KS', 'K'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 442 |  |  |                   'L': ('L'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 443 |  |  |                   'M': ('MN', 'M'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 444 |  |  |                   'N': ('NM', 'N'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 445 |  |  |                   'O': ('OI', 'OJ', 'OY', 'O'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 446 |  |  |                   'P': ('PF', 'PH', 'P'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 447 |  |  |                   'Q': ('Q'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 448 |  |  |                   'R': ('RS', 'RZ', 'R'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 449 |  |  |                   'S': ('SCHTSCH', 'SCHTCH', 'SCHTSH', 'SHTCH', 'SHTSH', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 450 |  |  |                         'STSCH', 'SCHD', 'SCHT', 'SHCH', 'STCH', 'STRS', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 451 |  |  |                         'STRZ', 'STSH', 'SZCS', 'SZCZ', 'SCH', 'SHD', 'SHT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 452 |  |  |                         'SZD', 'SZT', 'SC', 'SD', 'SH', 'ST', 'SZ', 'S'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 453 |  |  |                   'T': ('TTSCH', 'TSCH', 'TTCH', 'TTSZ', 'TCH', 'THS', 'TRS', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 454 |  |  |                         'TRZ', 'TSH', 'TSZ', 'TTS', 'TTZ', 'TZS', 'TC', 'TH', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 455 |  |  |                         'TS', 'TZ', 'T'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 456 |  |  |                   'U': ('UE', 'UI', 'UJ', 'UY', 'U'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 457 |  |  |                   'V': ('V'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 458 |  |  |                   'W': ('W'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 459 |  |  |                   'X': ('X'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 460 |  |  |                   'Y': ('Y'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 461 |  |  |                   'Z': ('ZHDZH', 'ZDZH', 'ZSCH', 'ZDZ', 'ZHD', 'ZSH', 'ZD', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 462 |  |  |                         'ZH', 'ZS', 'Z')} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 463 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 464 |  |  |     _vowels = {'A', 'E', 'I', 'J', 'O', 'U', 'Y'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 465 |  |  |     dms = ['']  # initialize empty code list | 
            
                                                                                                            
                            
            
                                    
            
            
                | 466 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 467 |  |  |     # Require a maxlength of at least 6 and not more than 64 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 468 |  |  |     if maxlength is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 469 |  |  |         maxlength = min(max(6, maxlength), 64) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 470 |  |  |     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 471 |  |  |         maxlength = 64 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 472 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 473 |  |  |     # uppercase, normalize, decompose, and filter non-A-Z | 
            
                                                                                                            
                            
            
                                    
            
            
                | 474 |  |  |     word = normalize('NFKD', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 475 |  |  |     word = word.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 476 |  |  |     word = ''.join(c for c in word if c in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 477 |  |  |                    {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 478 |  |  |                     'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 479 |  |  |                     'Y', 'Z'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 480 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 481 |  |  |     # Nothing to convert, return base case | 
            
                                                                                                            
                            
            
                                    
            
            
                | 482 |  |  |     if not word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 483 |  |  |         if zero_pad: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 484 |  |  |             return {'0'*maxlength} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 485 |  |  |         return {'0'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 486 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 487 |  |  |     # Reverse word if computing Reverse Soundex | 
            
                                                                                                            
                            
            
                                    
            
            
                | 488 |  |  |     if reverse: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 489 |  |  |         word = word[::-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 490 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 491 |  |  |     pos = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 492 |  |  |     while pos < len(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 493 |  |  |         # Iterate through _dms_order, which specifies the possible substrings | 
            
                                                                                                            
                            
            
                                    
            
            
                | 494 |  |  |         # for which codes exist in the Daitch-Mokotoff coding | 
            
                                                                                                            
                            
            
                                    
            
            
                | 495 |  |  |         for sstr in _dms_order[word[pos]]:  # pragma: no branch | 
            
                                                                                                            
                            
            
                                    
            
            
                | 496 |  |  |             if word[pos:].startswith(sstr): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 497 |  |  |                 # Having determined a valid substring start, retrieve the code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 498 |  |  |                 dm_val = _dms_table[sstr] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 499 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 500 |  |  |                 # Having retried the code (triple), determine the correct | 
            
                                                                                                            
                            
            
                                    
            
            
                | 501 |  |  |                 # positional variant (first, pre-vocalic, elsewhere) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 502 |  |  |                 if pos == 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 503 |  |  |                     dm_val = dm_val[0] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 504 |  |  |                 elif (pos+len(sstr) < len(word) and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 505 |  |  |                       word[pos+len(sstr)] in _vowels): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 506 |  |  |                     dm_val = dm_val[1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 507 |  |  |                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 508 |  |  |                     dm_val = dm_val[2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 509 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 510 |  |  |                 # Build the code strings | 
            
                                                                                                            
                            
            
                                    
            
            
                | 511 |  |  |                 if isinstance(dm_val, tuple): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 512 |  |  |                     dms = [_ + text_type(dm_val[0]) for _ in dms] \ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 513 |  |  |                             + [_ + text_type(dm_val[1]) for _ in dms] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 514 |  |  |                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 515 |  |  |                     dms = [_ + text_type(dm_val) for _ in dms] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 516 |  |  |                 pos += len(sstr) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 517 |  |  |                 break | 
            
                                                                                                            
                            
            
                                    
            
            
                | 518 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 519 |  |  |     # Filter out double letters and _ placeholders | 
            
                                                                                                            
                            
            
                                    
            
            
                | 520 |  |  |     dms = (''.join(c for c in _delete_consecutive_repeats(_) if c != '_') | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 521 |  |  |            for _ in dms) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 522 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 523 |  |  |     # Trim codes and return set | 
            
                                                                                                            
                            
            
                                    
            
            
                | 524 |  |  |     if zero_pad: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 525 |  |  |         dms = ((_ + ('0'*maxlength))[:maxlength] for _ in dms) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 526 |  |  |     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 527 |  |  |         dms = (_[:maxlength] for _ in dms) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 528 |  |  |     return set(dms) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 529 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 530 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 531 |  |  | def koelner_phonetik(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 532 |  |  |     """Return the Kölner Phonetik (numeric output) code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 533 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 534 |  |  |     Based on the algorithm defined by :cite:`Postel:1969`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 535 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 536 |  |  |     While the output code is numeric, it is still a str because 0s can lead | 
            
                                                                                                            
                            
            
                                    
            
            
                | 537 |  |  |     the code. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 538 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 539 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 540 |  |  |     :returns: the Kölner Phonetik value as a numeric string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 541 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 542 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 543 |  |  |     >>> koelner_phonetik('Christopher') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 544 |  |  |     '478237' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 545 |  |  |     >>> koelner_phonetik('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 546 |  |  |     '65' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 547 |  |  |     >>> koelner_phonetik('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 548 |  |  |     '862' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 549 |  |  |     >>> koelner_phonetik('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 550 |  |  |     '862' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 551 |  |  |     >>> koelner_phonetik('Müller') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 552 |  |  |     '657' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 553 |  |  |     >>> koelner_phonetik('Zimmermann') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 554 |  |  |     '86766' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 555 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 556 |  |  |     # pylint: disable=too-many-branches | 
            
                                                                                                            
                            
            
                                    
            
            
                | 557 |  |  |     def _after(word, i, letters): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 558 |  |  |         """Return True if word[i] follows one of the supplied letters.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 559 |  |  |         if i > 0 and word[i-1] in letters: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 560 |  |  |             return True | 
            
                                                                                                            
                            
            
                                    
            
            
                | 561 |  |  |         return False | 
            
                                                                                                            
                            
            
                                    
            
            
                | 562 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 563 |  |  |     def _before(word, i, letters): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 564 |  |  |         """Return True if word[i] precedes one of the supplied letters.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 565 |  |  |         if i+1 < len(word) and word[i+1] in letters: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 566 |  |  |             return True | 
            
                                                                                                            
                            
            
                                    
            
            
                | 567 |  |  |         return False | 
            
                                                                                                            
                            
            
                                    
            
            
                | 568 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 569 |  |  |     _vowels = {'A', 'E', 'I', 'J', 'O', 'U', 'Y'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 570 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 571 |  |  |     sdx = '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 572 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 573 |  |  |     word = normalize('NFKD', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 574 |  |  |     word = word.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 575 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 576 |  |  |     word = word.replace('Ä', 'AE') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 577 |  |  |     word = word.replace('Ö', 'OE') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 578 |  |  |     word = word.replace('Ü', 'UE') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 579 |  |  |     word = ''.join(c for c in word if c in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 580 |  |  |                    {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 581 |  |  |                     'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 582 |  |  |                     'Y', 'Z'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 583 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 584 |  |  |     # Nothing to convert, return base case | 
            
                                                                                                            
                            
            
                                    
            
            
                | 585 |  |  |     if not word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 586 |  |  |         return sdx | 
            
                                                                                                            
                            
            
                                    
            
            
                | 587 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 588 |  |  |     for i in range(len(word)): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                                                    
                                                                                                        
            
            
                | 589 |  | View Code Duplication |         if word[i] in _vowels: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 590 |  |  |             sdx += '0' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 591 |  |  |         elif word[i] == 'B': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 592 |  |  |             sdx += '1' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 593 |  |  |         elif word[i] == 'P': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 594 |  |  |             if _before(word, i, {'H'}): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 595 |  |  |                 sdx += '3' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 596 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 597 |  |  |                 sdx += '1' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 598 |  |  |         elif word[i] in {'D', 'T'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 599 |  |  |             if _before(word, i, {'C', 'S', 'Z'}): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 600 |  |  |                 sdx += '8' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 601 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 602 |  |  |                 sdx += '2' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 603 |  |  |         elif word[i] in {'F', 'V', 'W'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 604 |  |  |             sdx += '3' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 605 |  |  |         elif word[i] in {'G', 'K', 'Q'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 606 |  |  |             sdx += '4' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 607 |  |  |         elif word[i] == 'C': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 608 |  |  |             if _after(word, i, {'S', 'Z'}): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 609 |  |  |                 sdx += '8' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 610 |  |  |             elif i == 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 611 |  |  |                 if _before(word, i, {'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 612 |  |  |                                      'X'}): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 613 |  |  |                     sdx += '4' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 614 |  |  |                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 615 |  |  |                     sdx += '8' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 616 |  |  |             elif _before(word, i, {'A', 'H', 'K', 'O', 'Q', 'U', 'X'}): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 617 |  |  |                 sdx += '4' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 618 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 619 |  |  |                 sdx += '8' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 620 |  |  |         elif word[i] == 'X': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 621 |  |  |             if _after(word, i, {'C', 'K', 'Q'}): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 622 |  |  |                 sdx += '8' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 623 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 624 |  |  |                 sdx += '48' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 625 |  |  |         elif word[i] == 'L': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 626 |  |  |             sdx += '5' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 627 |  |  |         elif word[i] in {'M', 'N'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 628 |  |  |             sdx += '6' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 629 |  |  |         elif word[i] == 'R': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 630 |  |  |             sdx += '7' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 631 |  |  |         elif word[i] in {'S', 'Z'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 632 |  |  |             sdx += '8' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 633 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 634 |  |  |     sdx = _delete_consecutive_repeats(sdx) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 635 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 636 |  |  |     if sdx: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 637 |  |  |         sdx = sdx[:1] + sdx[1:].replace('0', '') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 638 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 639 |  |  |     return sdx | 
            
                                                                                                            
                            
            
                                    
            
            
                | 640 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 641 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 642 |  |  | def koelner_phonetik_num_to_alpha(num): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 643 |  |  |     """Convert a Kölner Phonetik code from numeric to alphabetic. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 644 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 645 |  |  |     :param str num: a numeric Kölner Phonetik representation | 
            
                                                                                                            
                            
            
                                    
            
            
                | 646 |  |  |     :returns: an alphabetic representation of the same word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 647 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 648 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 649 |  |  |     >>> koelner_phonetik_num_to_alpha(862) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 650 |  |  |     'SNT' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 651 |  |  |     >>> koelner_phonetik_num_to_alpha(657) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 652 |  |  |     'NLR' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 653 |  |  |     >>> koelner_phonetik_num_to_alpha(86766) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 654 |  |  |     'SNRNN' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 655 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 656 |  |  |     _koelner_num_translation = dict(zip((ord(_) for _ in '012345678'), | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 657 |  |  |                                         'APTFKLNRS')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 658 |  |  |     num = ''.join(c for c in text_type(num) if c in {'0', '1', '2', '3', '4', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 659 |  |  |                                                      '5', '6', '7', '8'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 660 |  |  |     return num.translate(_koelner_num_translation) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 661 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 662 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 663 |  |  | def koelner_phonetik_alpha(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 664 |  |  |     """Return the Kölner Phonetik (alphabetic output) code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 665 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 666 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 667 |  |  |     :returns: the Kölner Phonetik value as an alphabetic string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 668 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 669 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 670 |  |  |     >>> koelner_phonetik_alpha('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 671 |  |  |     'SNT' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 672 |  |  |     >>> koelner_phonetik_alpha('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 673 |  |  |     'SNT' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 674 |  |  |     >>> koelner_phonetik_alpha('Müller') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 675 |  |  |     'NLR' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 676 |  |  |     >>> koelner_phonetik_alpha('Zimmermann') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 677 |  |  |     'SNRNN' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 678 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 679 |  |  |     return koelner_phonetik_num_to_alpha(koelner_phonetik(word)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 680 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 681 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 682 |  |  | def nysiis(word, maxlength=6, modified=False): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 683 |  |  |     """Return the NYSIIS code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 684 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 685 |  |  |     The New York State Identification and Intelligence System algorithm is | 
            
                                                                                                            
                            
            
                                    
            
            
                | 686 |  |  |     defined in :cite:`Taft:1970`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 687 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 688 |  |  |     The modified version of this algorithm is described in Appendix B of | 
            
                                                                                                            
                            
            
                                    
            
            
                | 689 |  |  |     :cite:`Lynch:1977`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 690 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 691 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 692 |  |  |     :param int maxlength: the maximum length (default 6) of the code to return | 
            
                                                                                                            
                            
            
                                    
            
            
                | 693 |  |  |     :param bool modified: indicates whether to use USDA modified NYSIIS | 
            
                                                                                                            
                            
            
                                    
            
            
                | 694 |  |  |     :returns: the NYSIIS value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 695 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 696 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 697 |  |  |     >>> nysiis('Christopher') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 698 |  |  |     'CRASTA' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 699 |  |  |     >>> nysiis('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 700 |  |  |     'NAL' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 701 |  |  |     >>> nysiis('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 702 |  |  |     'SNAT' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 703 |  |  |     >>> nysiis('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 704 |  |  |     'SNAD' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 705 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 706 |  |  |     >>> nysiis('Christopher', maxlength=_INFINITY) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 707 |  |  |     'CRASTAFAR' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 708 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 709 |  |  |     >>> nysiis('Christopher', maxlength=8, modified=True) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 710 |  |  |     'CRASTAFA' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 711 |  |  |     >>> nysiis('Niall', maxlength=8, modified=True) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 712 |  |  |     'NAL' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 713 |  |  |     >>> nysiis('Smith', maxlength=8, modified=True) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 714 |  |  |     'SNAT' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 715 |  |  |     >>> nysiis('Schmidt', maxlength=8, modified=True) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 716 |  |  |     'SNAD' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 717 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 718 |  |  |     # Require a maxlength of at least 6 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 719 |  |  |     if maxlength: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 720 |  |  |         maxlength = max(6, maxlength) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 721 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 722 |  |  |     _vowels = {'A', 'E', 'I', 'O', 'U'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 723 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 724 |  |  |     word = ''.join(c for c in word.upper() if c.isalpha()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 725 |  |  |     word = word.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 726 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 727 |  |  |     # exit early if there are no alphas | 
            
                                                                                                            
                            
            
                                    
            
            
                | 728 |  |  |     if not word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 729 |  |  |         return '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 730 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 731 |  |  |     if modified: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 732 |  |  |         original_first_char = word[0] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 733 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 734 |  |  |     if word[:3] == 'MAC': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 735 |  |  |         word = 'MCC'+word[3:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 736 |  |  |     elif word[:2] == 'KN': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 737 |  |  |         word = 'NN'+word[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 738 |  |  |     elif word[:1] == 'K': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 739 |  |  |         word = 'C'+word[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 740 |  |  |     elif word[:2] in {'PH', 'PF'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 741 |  |  |         word = 'FF'+word[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 742 |  |  |     elif word[:3] == 'SCH': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 743 |  |  |         word = 'SSS'+word[3:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 744 |  |  |     elif modified: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 745 |  |  |         if word[:2] == 'WR': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 746 |  |  |             word = 'RR'+word[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 747 |  |  |         elif word[:2] == 'RH': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 748 |  |  |             word = 'RR'+word[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 749 |  |  |         elif word[:2] == 'DG': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 750 |  |  |             word = 'GG'+word[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 751 |  |  |         elif word[:1] in _vowels: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 752 |  |  |             word = 'A'+word[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 753 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 754 |  |  |     if modified and word[-1:] in {'S', 'Z'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 755 |  |  |         word = word[:-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 756 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 757 |  |  |     if word[-2:] == 'EE' or word[-2:] == 'IE' or (modified and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 758 |  |  |                                                   word[-2:] == 'YE'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 759 |  |  |         word = word[:-2]+'Y' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 760 |  |  |     elif word[-2:] in {'DT', 'RT', 'RD'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 761 |  |  |         word = word[:-2]+'D' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 762 |  |  |     elif word[-2:] in {'NT', 'ND'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 763 |  |  |         word = word[:-2]+('N' if modified else 'D') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 764 |  |  |     elif modified: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 765 |  |  |         if word[-2:] == 'IX': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 766 |  |  |             word = word[:-2]+'ICK' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 767 |  |  |         elif word[-2:] == 'EX': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 768 |  |  |             word = word[:-2]+'ECK' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 769 |  |  |         elif word[-2:] in {'JR', 'SR'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 770 |  |  |             return 'ERROR'  # TODO: decide how best to return an error | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 771 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 772 |  |  |     key = word[:1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 773 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 774 |  |  |     skip = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 775 |  |  |     for i in range(1, len(word)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 776 |  |  |         if i >= len(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 777 |  |  |             continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 778 |  |  |         elif skip: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 779 |  |  |             skip -= 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 780 |  |  |             continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 781 |  |  |         elif word[i:i+2] == 'EV': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 782 |  |  |             word = word[:i] + 'AF' + word[i+2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 783 |  |  |             skip = 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 784 |  |  |         elif word[i] in _vowels: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 785 |  |  |             word = word[:i] + 'A' + word[i+1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 786 |  |  |         elif modified and i != len(word)-1 and word[i] == 'Y': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 787 |  |  |             word = word[:i] + 'A' + word[i+1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 788 |  |  |         elif word[i] == 'Q': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 789 |  |  |             word = word[:i] + 'G' + word[i+1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 790 |  |  |         elif word[i] == 'Z': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 791 |  |  |             word = word[:i] + 'S' + word[i+1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 792 |  |  |         elif word[i] == 'M': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 793 |  |  |             word = word[:i] + 'N' + word[i+1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 794 |  |  |         elif word[i:i+2] == 'KN': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 795 |  |  |             word = word[:i] + 'N' + word[i+2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 796 |  |  |         elif word[i] == 'K': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 797 |  |  |             word = word[:i] + 'C' + word[i+1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 798 |  |  |         elif modified and i == len(word)-3 and word[i:i+3] == 'SCH': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 799 |  |  |             word = word[:i] + 'SSA' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 800 |  |  |             skip = 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 801 |  |  |         elif word[i:i+3] == 'SCH': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 802 |  |  |             word = word[:i] + 'SSS' + word[i+3:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 803 |  |  |             skip = 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 804 |  |  |         elif modified and i == len(word)-2 and word[i:i+2] == 'SH': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 805 |  |  |             word = word[:i] + 'SA' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 806 |  |  |             skip = 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 807 |  |  |         elif word[i:i+2] == 'SH': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 808 |  |  |             word = word[:i] + 'SS' + word[i+2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 809 |  |  |             skip = 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 810 |  |  |         elif word[i:i+2] == 'PH': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 811 |  |  |             word = word[:i] + 'FF' + word[i+2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 812 |  |  |             skip = 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 813 |  |  |         elif modified and word[i:i+3] == 'GHT': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 814 |  |  |             word = word[:i] + 'TTT' + word[i+3:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 815 |  |  |             skip = 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 816 |  |  |         elif modified and word[i:i+2] == 'DG': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 817 |  |  |             word = word[:i] + 'GG' + word[i+2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 818 |  |  |             skip = 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 819 |  |  |         elif modified and word[i:i+2] == 'WR': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 820 |  |  |             word = word[:i] + 'RR' + word[i+2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 821 |  |  |             skip = 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 822 |  |  |         elif word[i] == 'H' and (word[i-1] not in _vowels or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 823 |  |  |                                  word[i+1:i+2] not in _vowels): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 824 |  |  |             word = word[:i] + word[i-1] + word[i+1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 825 |  |  |         elif word[i] == 'W' and word[i-1] in _vowels: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 826 |  |  |             word = word[:i] + word[i-1] + word[i+1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 827 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 828 |  |  |         if word[i:i+skip+1] != key[-1:]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 829 |  |  |             key += word[i:i+skip+1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 830 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 831 |  |  |     key = _delete_consecutive_repeats(key) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 832 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 833 |  |  |     if key[-1:] == 'S': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 834 |  |  |         key = key[:-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 835 |  |  |     if key[-2:] == 'AY': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 836 |  |  |         key = key[:-2] + 'Y' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 837 |  |  |     if key[-1:] == 'A': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 838 |  |  |         key = key[:-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 839 |  |  |     if modified and key[:1] == 'A': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 840 |  |  |         key = original_first_char + key[1:] | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 841 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 842 |  |  |     if maxlength and maxlength < _INFINITY: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 843 |  |  |         key = key[:maxlength] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 844 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 845 |  |  |     return key | 
            
                                                                                                            
                            
            
                                    
            
            
                | 846 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 847 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 848 |  |  | def mra(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 849 |  |  |     """Return the MRA personal numeric identifier (PNI) for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 850 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 851 |  |  |     A description of the Western Airlines Surname Match Rating Algorithm can | 
            
                                                                                                            
                            
            
                                    
            
            
                | 852 |  |  |     be found on page 18 of :cite:`Moore:1977`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 853 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 854 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 855 |  |  |     :returns: the MRA PNI | 
            
                                                                                                            
                            
            
                                    
            
            
                | 856 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 857 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 858 |  |  |     >>> mra('Christopher') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 859 |  |  |     'CHRPHR' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 860 |  |  |     >>> mra('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 861 |  |  |     'NL' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 862 |  |  |     >>> mra('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 863 |  |  |     'SMTH' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 864 |  |  |     >>> mra('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 865 |  |  |     'SCHMDT' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 866 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 867 |  |  |     if not word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 868 |  |  |         return word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 869 |  |  |     word = word.upper() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 870 |  |  |     word = word.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 871 |  |  |     word = word[0]+''.join(c for c in word[1:] if | 
            
                                                                                                            
                            
            
                                    
            
            
                | 872 |  |  |                            c not in {'A', 'E', 'I', 'O', 'U'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 873 |  |  |     word = _delete_consecutive_repeats(word) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 874 |  |  |     if len(word) > 6: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 875 |  |  |         word = word[:3]+word[-3:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 876 |  |  |     return word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 877 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 878 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 879 |  |  | def metaphone(word, maxlength=_INFINITY): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 880 |  |  |     """Return the Metaphone code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 881 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 882 |  |  |     Based on Lawrence Philips' Pick BASIC code from 1990 :cite:`Philips:1990`, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 883 |  |  |     as described in :cite:`Philips:1990b`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 884 |  |  |     This incorporates some corrections to the above code, particularly | 
            
                                                                                                            
                            
            
                                    
            
            
                | 885 |  |  |     some of those suggested by Michael Kuhn in :cite:`Kuhn:1995`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 886 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 887 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 888 |  |  |     :param int maxlength: the maximum length of the returned Metaphone code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 889 |  |  |         (defaults to unlimited, but in Philips' original implementation | 
            
                                                                                                            
                            
            
                                    
            
            
                | 890 |  |  |         this was 4) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 891 |  |  |     :returns: the Metaphone value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 892 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 893 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 894 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 895 |  |  |     >>> metaphone('Christopher') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 896 |  |  |     'KRSTFR' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 897 |  |  |     >>> metaphone('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 898 |  |  |     'NL' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 899 |  |  |     >>> metaphone('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 900 |  |  |     'SM0' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 901 |  |  |     >>> metaphone('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 902 |  |  |     'SKMTT' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 903 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 904 |  |  |     # pylint: disable=too-many-branches | 
            
                                                                                                            
                            
            
                                    
            
            
                | 905 |  |  |     _vowels = {'A', 'E', 'I', 'O', 'U'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 906 |  |  |     _frontv = {'E', 'I', 'Y'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 907 |  |  |     _varson = {'C', 'G', 'P', 'S', 'T'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 908 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 909 |  |  |     # Require a maxlength of at least 4 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 910 |  |  |     if maxlength is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 911 |  |  |         maxlength = max(4, maxlength) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 912 |  |  |     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 913 |  |  |         maxlength = 64 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 914 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 915 |  |  |     # As in variable sound--those modified by adding an "h" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 916 |  |  |     ename = ''.join(c for c in word.upper() if c.isalnum()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 917 |  |  |     ename = ename.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 918 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 919 |  |  |     # Delete nonalphanumeric characters and make all caps | 
            
                                                                                                            
                            
            
                                    
            
            
                | 920 |  |  |     if not ename: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 921 |  |  |         return '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 922 |  |  |     if ename[0:2] in {'PN', 'AE', 'KN', 'GN', 'WR'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 923 |  |  |         ename = ename[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 924 |  |  |     elif ename[0] == 'X': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 925 |  |  |         ename = 'S' + ename[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 926 |  |  |     elif ename[0:2] == 'WH': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 927 |  |  |         ename = 'W' + ename[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 928 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 929 |  |  |     # Convert to metaph | 
            
                                                                                                            
                            
            
                                    
            
            
                | 930 |  |  |     elen = len(ename)-1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 931 |  |  |     metaph = '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 932 |  |  |     for i in range(len(ename)): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 933 |  |  |         if len(metaph) >= maxlength: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 934 |  |  |             break | 
            
                                                                                                            
                            
            
                                    
            
            
                | 935 |  |  |         if ((ename[i] not in {'G', 'T'} and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 936 |  |  |              i > 0 and ename[i-1] == ename[i])): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 937 |  |  |             continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 938 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 939 |  |  |         if ename[i] in _vowels and i == 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 940 |  |  |             metaph = ename[i] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 941 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 942 |  |  |         elif ename[i] == 'B': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 943 |  |  |             if i != elen or ename[i-1] != 'M': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 944 |  |  |                 metaph += ename[i] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 945 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 946 |  |  |         elif ename[i] == 'C': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 947 |  |  |             if not (i > 0 and ename[i-1] == 'S' and ename[i+1:i+2] in _frontv): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 948 |  |  |                 if ename[i+1:i+3] == 'IA': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 949 |  |  |                     metaph += 'X' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 950 |  |  |                 elif ename[i+1:i+2] in _frontv: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 951 |  |  |                     metaph += 'S' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 952 |  |  |                 elif i > 0 and ename[i-1:i+2] == 'SCH': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 953 |  |  |                     metaph += 'K' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 954 |  |  |                 elif ename[i+1:i+2] == 'H': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 955 |  |  |                     if i == 0 and i+1 < elen and ename[i+2:i+3] not in _vowels: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 956 |  |  |                         metaph += 'K' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 957 |  |  |                     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 958 |  |  |                         metaph += 'X' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 959 |  |  |                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 960 |  |  |                     metaph += 'K' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 961 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 962 |  |  |         elif ename[i] == 'D': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 963 |  |  |             if ename[i+1:i+2] == 'G' and ename[i+2:i+3] in _frontv: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 964 |  |  |                 metaph += 'J' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 965 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 966 |  |  |                 metaph += 'T' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 967 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 968 |  |  |         elif ename[i] == 'G': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 969 |  |  |             if ename[i+1:i+2] == 'H' and not (i+1 == elen or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 970 |  |  |                                               ename[i+2:i+3] not in _vowels): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 971 |  |  |                 continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 972 |  |  |             elif i > 0 and ((i+1 == elen and ename[i+1] == 'N') or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 973 |  |  |                             (i+3 == elen and ename[i+1:i+4] == 'NED')): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 974 |  |  |                 continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 975 |  |  |             elif (i-1 > 0 and i+1 <= elen and ename[i-1] == 'D' and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 976 |  |  |                   ename[i+1] in _frontv): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 977 |  |  |                 continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 978 |  |  |             elif ename[i+1:i+2] == 'G': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 979 |  |  |                 continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 980 |  |  |             elif ename[i+1:i+2] in _frontv: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 981 |  |  |                 if i == 0 or ename[i-1] != 'G': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 982 |  |  |                     metaph += 'J' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 983 |  |  |                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 984 |  |  |                     metaph += 'K' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 985 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 986 |  |  |                 metaph += 'K' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 987 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 988 |  |  |         elif ename[i] == 'H': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 989 |  |  |             if ((i > 0 and ename[i-1] in _vowels and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 990 |  |  |                  ename[i+1:i+2] not in _vowels)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 991 |  |  |                 continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 992 |  |  |             elif i > 0 and ename[i-1] in _varson: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 993 |  |  |                 continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 994 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 995 |  |  |                 metaph += 'H' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 996 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 997 |  |  |         elif ename[i] in {'F', 'J', 'L', 'M', 'N', 'R'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 998 |  |  |             metaph += ename[i] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 999 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1000 |  |  |         elif ename[i] == 'K': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1001 |  |  |             if i > 0 and ename[i-1] == 'C': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1002 |  |  |                 continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1003 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1004 |  |  |                 metaph += 'K' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1005 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1006 |  |  |         elif ename[i] == 'P': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1007 |  |  |             if ename[i+1:i+2] == 'H': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1008 |  |  |                 metaph += 'F' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1009 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1010 |  |  |                 metaph += 'P' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1011 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1012 |  |  |         elif ename[i] == 'Q': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1013 |  |  |             metaph += 'K' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1014 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1015 |  |  |         elif ename[i] == 'S': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1016 |  |  |             if ((i > 0 and i+2 <= elen and ename[i+1] == 'I' and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1017 |  |  |                  ename[i+2] in 'OA')): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1018 |  |  |                 metaph += 'X' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1019 |  |  |             elif ename[i+1:i+2] == 'H': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1020 |  |  |                 metaph += 'X' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1021 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1022 |  |  |                 metaph += 'S' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1023 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1024 |  |  |         elif ename[i] == 'T': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1025 |  |  |             if ((i > 0 and i+2 <= elen and ename[i+1] == 'I' and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1026 |  |  |                  ename[i+2] in {'A', 'O'})): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1027 |  |  |                 metaph += 'X' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1028 |  |  |             elif ename[i+1:i+2] == 'H': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1029 |  |  |                 metaph += '0' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1030 |  |  |             elif ename[i+1:i+3] != 'CH': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1031 |  |  |                 if ename[i-1:i] != 'T': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1032 |  |  |                     metaph += 'T' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1033 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1034 |  |  |         elif ename[i] == 'V': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1035 |  |  |             metaph += 'F' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1036 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1037 |  |  |         elif ename[i] in 'WY': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1038 |  |  |             if ename[i+1:i+2] in _vowels: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1039 |  |  |                 metaph += ename[i] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1040 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1041 |  |  |         elif ename[i] == 'X': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1042 |  |  |             metaph += 'KS' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1043 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1044 |  |  |         elif ename[i] == 'Z': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1045 |  |  |             metaph += 'S' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1046 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1047 |  |  |     return metaph | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1048 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 1049 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1050 |  |  | def double_metaphone(word, maxlength=_INFINITY): | 
            
                                                                        
                            
            
                                    
            
            
                | 1051 |  |  |     """Return the Double Metaphone code for a word. | 
            
                                                                        
                            
            
                                    
            
            
                | 1052 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1053 |  |  |     Based on Lawrence Philips' (Visual) C++ code from 1999 | 
            
                                                                        
                            
            
                                    
            
            
                | 1054 |  |  |     :cite:`Philips:2000`. | 
            
                                                                        
                            
            
                                    
            
            
                | 1055 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1056 |  |  |     :param word: the word to transform | 
            
                                                                        
                            
            
                                    
            
            
                | 1057 |  |  |     :param maxlength: the maximum length of the returned Double Metaphone codes | 
            
                                                                        
                            
            
                                    
            
            
                | 1058 |  |  |         (defaults to unlimited, but in Philips' original implementation this | 
            
                                                                        
                            
            
                                    
            
            
                | 1059 |  |  |         was 4) | 
            
                                                                        
                            
            
                                    
            
            
                | 1060 |  |  |     :returns: the Double Metaphone value(s) | 
            
                                                                        
                            
            
                                    
            
            
                | 1061 |  |  |     :rtype: tuple | 
            
                                                                        
                            
            
                                    
            
            
                | 1062 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1063 |  |  |     >>> double_metaphone('Christopher') | 
            
                                                                        
                            
            
                                    
            
            
                | 1064 |  |  |     ('KRSTFR', '') | 
            
                                                                        
                            
            
                                    
            
            
                | 1065 |  |  |     >>> double_metaphone('Niall') | 
            
                                                                        
                            
            
                                    
            
            
                | 1066 |  |  |     ('NL', '') | 
            
                                                                        
                            
            
                                    
            
            
                | 1067 |  |  |     >>> double_metaphone('Smith') | 
            
                                                                        
                            
            
                                    
            
            
                | 1068 |  |  |     ('SM0', 'XMT') | 
            
                                                                        
                            
            
                                    
            
            
                | 1069 |  |  |     >>> double_metaphone('Schmidt') | 
            
                                                                        
                            
            
                                    
            
            
                | 1070 |  |  |     ('XMT', 'SMT') | 
            
                                                                        
                            
            
                                    
            
            
                | 1071 |  |  |     """ | 
            
                                                                        
                            
            
                                    
            
            
                | 1072 |  |  |     # pylint: disable=too-many-branches | 
            
                                                                        
                            
            
                                    
            
            
                | 1073 |  |  |     # Require a maxlength of at least 4 | 
            
                                                                        
                            
            
                                    
            
            
                | 1074 |  |  |     if maxlength is not None: | 
            
                                                                        
                            
            
                                    
            
            
                | 1075 |  |  |         maxlength = max(4, maxlength) | 
            
                                                                        
                            
            
                                    
            
            
                | 1076 |  |  |     else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1077 |  |  |         maxlength = 64 | 
            
                                                                        
                            
            
                                    
            
            
                | 1078 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1079 |  |  |     primary = '' | 
            
                                                                        
                            
            
                                    
            
            
                | 1080 |  |  |     secondary = '' | 
            
                                                                        
                            
            
                                    
            
            
                | 1081 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1082 |  |  |     def _slavo_germanic(): | 
            
                                                                        
                            
            
                                    
            
            
                | 1083 |  |  |         """Return True if the word appears to be Slavic or Germanic.""" | 
            
                                                                        
                            
            
                                    
            
            
                | 1084 |  |  |         if 'W' in word or 'K' in word or 'CZ' in word: | 
            
                                                                        
                            
            
                                    
            
            
                | 1085 |  |  |             return True | 
            
                                                                        
                            
            
                                    
            
            
                | 1086 |  |  |         return False | 
            
                                                                        
                            
            
                                    
            
            
                | 1087 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1088 |  |  |     def _metaph_add(pri, sec=''): | 
            
                                                                        
                            
            
                                    
            
            
                | 1089 |  |  |         """Return a new metaphone tuple with the supplied elements.""" | 
            
                                                                        
                            
            
                                    
            
            
                | 1090 |  |  |         newpri = primary | 
            
                                                                        
                            
            
                                    
            
            
                | 1091 |  |  |         newsec = secondary | 
            
                                                                        
                            
            
                                    
            
            
                | 1092 |  |  |         if pri: | 
            
                                                                        
                            
            
                                    
            
            
                | 1093 |  |  |             newpri += pri | 
            
                                                                        
                            
            
                                    
            
            
                | 1094 |  |  |         if sec: | 
            
                                                                        
                            
            
                                    
            
            
                | 1095 |  |  |             if sec != ' ': | 
            
                                                                        
                            
            
                                    
            
            
                | 1096 |  |  |                 newsec += sec | 
            
                                                                        
                            
            
                                    
            
            
                | 1097 |  |  |         else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1098 |  |  |             newsec += pri | 
            
                                                                        
                            
            
                                    
            
            
                | 1099 |  |  |         return (newpri, newsec) | 
            
                                                                        
                            
            
                                    
            
            
                | 1100 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1101 |  |  |     def _is_vowel(pos): | 
            
                                                                        
                            
            
                                    
            
            
                | 1102 |  |  |         """Return True if the character at word[pos] is a vowel.""" | 
            
                                                                        
                            
            
                                    
            
            
                | 1103 |  |  |         if pos >= 0 and word[pos] in {'A', 'E', 'I', 'O', 'U', 'Y'}: | 
            
                                                                        
                            
            
                                    
            
            
                | 1104 |  |  |             return True | 
            
                                                                        
                            
            
                                    
            
            
                | 1105 |  |  |         return False | 
            
                                                                        
                            
            
                                    
            
            
                | 1106 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1107 |  |  |     def _get_at(pos): | 
            
                                                                        
                            
            
                                    
            
            
                | 1108 |  |  |         """Return the character at word[pos].""" | 
            
                                                                        
                            
            
                                    
            
            
                | 1109 |  |  |         return word[pos] | 
            
                                                                        
                            
            
                                    
            
            
                | 1110 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1111 |  |  |     def _string_at(pos, slen, substrings): | 
            
                                                                        
                            
            
                                    
            
            
                | 1112 |  |  |         """Return True if word[pos:pos+slen] is in substrings.""" | 
            
                                                                        
                            
            
                                    
            
            
                | 1113 |  |  |         if pos < 0: | 
            
                                                                        
                            
            
                                    
            
            
                | 1114 |  |  |             return False | 
            
                                                                        
                            
            
                                    
            
            
                | 1115 |  |  |         return word[pos:pos+slen] in substrings | 
            
                                                                        
                            
            
                                    
            
            
                | 1116 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1117 |  |  |     current = 0 | 
            
                                                                        
                            
            
                                    
            
            
                | 1118 |  |  |     length = len(word) | 
            
                                                                        
                            
            
                                    
            
            
                | 1119 |  |  |     if length < 1: | 
            
                                                                        
                            
            
                                    
            
            
                | 1120 |  |  |         return ('', '') | 
            
                                                                        
                            
            
                                    
            
            
                | 1121 |  |  |     last = length - 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1122 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1123 |  |  |     word = word.upper() | 
            
                                                                        
                            
            
                                    
            
            
                | 1124 |  |  |     word = word.replace('ß', 'SS') | 
            
                                                                        
                            
            
                                    
            
            
                | 1125 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1126 |  |  |     # Pad the original string so that we can index beyond the edge of the world | 
            
                                                                        
                            
            
                                    
            
            
                | 1127 |  |  |     word += '     ' | 
            
                                                                        
                            
            
                                    
            
            
                | 1128 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1129 |  |  |     # Skip these when at start of word | 
            
                                                                        
                            
            
                                    
            
            
                | 1130 |  |  |     if word[0:2] in {'GN', 'KN', 'PN', 'WR', 'PS'}: | 
            
                                                                        
                            
            
                                    
            
            
                | 1131 |  |  |         current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1132 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1133 |  |  |     # Initial 'X' is pronounced 'Z' e.g. 'Xavier' | 
            
                                                                        
                            
            
                                    
            
            
                | 1134 |  |  |     if _get_at(0) == 'X': | 
            
                                                                        
                            
            
                                    
            
            
                | 1135 |  |  |         (primary, secondary) = _metaph_add('S')  # 'Z' maps to 'S' | 
            
                                                                        
                            
            
                                    
            
            
                | 1136 |  |  |         current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1137 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1138 |  |  |     # Main loop | 
            
                                                                        
                            
            
                                    
            
            
                | 1139 |  |  |     while True: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 1140 |  |  |         if current >= length: | 
            
                                                                        
                            
            
                                    
            
            
                | 1141 |  |  |             break | 
            
                                                                        
                            
            
                                    
            
            
                | 1142 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1143 |  |  |         if _get_at(current) in {'A', 'E', 'I', 'O', 'U', 'Y'}: | 
            
                                                                        
                            
            
                                    
            
            
                | 1144 |  |  |             if current == 0: | 
            
                                                                        
                            
            
                                    
            
            
                | 1145 |  |  |                 # All init vowels now map to 'A' | 
            
                                                                        
                            
            
                                    
            
            
                | 1146 |  |  |                 (primary, secondary) = _metaph_add('A') | 
            
                                                                        
                            
            
                                    
            
            
                | 1147 |  |  |             current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1148 |  |  |             continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1149 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1150 |  |  |         elif _get_at(current) == 'B': | 
            
                                                                        
                            
            
                                    
            
            
                | 1151 |  |  |             # "-mb", e.g", "dumb", already skipped over... | 
            
                                                                        
                            
            
                                    
            
            
                | 1152 |  |  |             (primary, secondary) = _metaph_add('P') | 
            
                                                                        
                            
            
                                    
            
            
                | 1153 |  |  |             if _get_at(current + 1) == 'B': | 
            
                                                                        
                            
            
                                    
            
            
                | 1154 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1155 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1156 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1157 |  |  |             continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1158 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1159 |  |  |         elif _get_at(current) == 'Ç': | 
            
                                                                        
                            
            
                                    
            
            
                | 1160 |  |  |             (primary, secondary) = _metaph_add('S') | 
            
                                                                        
                            
            
                                    
            
            
                | 1161 |  |  |             current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1162 |  |  |             continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1163 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1164 |  |  |         elif _get_at(current) == 'C': | 
            
                                                                        
                            
            
                                    
            
            
                | 1165 |  |  |             # Various Germanic | 
            
                                                                        
                            
            
                                    
            
            
                | 1166 |  |  |             if (current > 1 and not _is_vowel(current - 2) and | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 1167 |  |  |                     _string_at((current - 1), 3, {'ACH'}) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1168 |  |  |                     ((_get_at(current + 2) != 'I') and | 
            
                                                                        
                            
            
                                    
            
            
                | 1169 |  |  |                      ((_get_at(current + 2) != 'E') or | 
            
                                                                        
                            
            
                                    
            
            
                | 1170 |  |  |                       _string_at((current - 2), 6, | 
            
                                                                        
                            
            
                                    
            
            
                | 1171 |  |  |                                  {'BACHER', 'MACHER'})))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1172 |  |  |                 (primary, secondary) = _metaph_add('K') | 
            
                                                                        
                            
            
                                    
            
            
                | 1173 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1174 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1175 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1176 |  |  |             # Special case 'caesar' | 
            
                                                                        
                            
            
                                    
            
            
                | 1177 |  |  |             elif current == 0 and _string_at(current, 6, {'CAESAR'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1178 |  |  |                 (primary, secondary) = _metaph_add('S') | 
            
                                                                        
                            
            
                                    
            
            
                | 1179 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1180 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1181 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1182 |  |  |             # Italian 'chianti' | 
            
                                                                        
                            
            
                                    
            
            
                | 1183 |  |  |             elif _string_at(current, 4, {'CHIA'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1184 |  |  |                 (primary, secondary) = _metaph_add('K') | 
            
                                                                        
                            
            
                                    
            
            
                | 1185 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1186 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1187 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1188 |  |  |             elif _string_at(current, 2, {'CH'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1189 |  |  |                 # Find 'Michael' | 
            
                                                                        
                            
            
                                    
            
            
                | 1190 |  |  |                 if current > 0 and _string_at(current, 4, {'CHAE'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1191 |  |  |                     (primary, secondary) = _metaph_add('K', 'X') | 
            
                                                                        
                            
            
                                    
            
            
                | 1192 |  |  |                     current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1193 |  |  |                     continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1194 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1195 |  |  |                 # Greek roots e.g. 'chemistry', 'chorus' | 
            
                                                                        
                            
            
                                    
            
            
                | 1196 |  |  |                 elif (current == 0 and | 
            
                                                                        
                            
            
                                    
            
            
                | 1197 |  |  |                       (_string_at((current + 1), 5, | 
            
                                                                        
                            
            
                                    
            
            
                | 1198 |  |  |                                   {'HARAC', 'HARIS'}) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1199 |  |  |                        _string_at((current + 1), 3, | 
            
                                                                        
                            
            
                                    
            
            
                | 1200 |  |  |                                   {'HOR', 'HYM', 'HIA', 'HEM'})) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1201 |  |  |                       not _string_at(0, 5, {'CHORE'})): | 
            
                                                                        
                            
            
                                    
            
            
                | 1202 |  |  |                     (primary, secondary) = _metaph_add('K') | 
            
                                                                        
                            
            
                                    
            
            
                | 1203 |  |  |                     current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1204 |  |  |                     continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1205 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1206 |  |  |                 # Germanic, Greek, or otherwise 'ch' for 'kh' sound | 
            
                                                                        
                            
            
                                    
            
            
                | 1207 |  |  |                 elif ((_string_at(0, 4, {'VAN ', 'VON '}) or | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 1208 |  |  |                        _string_at(0, 3, {'SCH'})) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1209 |  |  |                       # 'architect but not 'arch', 'orchestra', 'orchid' | 
            
                                                                        
                            
            
                                    
            
            
                | 1210 |  |  |                       _string_at((current - 2), 6, | 
            
                                                                        
                            
            
                                    
            
            
                | 1211 |  |  |                                  {'ORCHES', 'ARCHIT', 'ORCHID'}) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1212 |  |  |                       _string_at((current + 2), 1, {'T', 'S'}) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1213 |  |  |                       ((_string_at((current - 1), 1, | 
            
                                                                        
                            
            
                                    
            
            
                | 1214 |  |  |                                    {'A', 'O', 'U', 'E'}) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1215 |  |  |                         (current == 0)) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1216 |  |  |                        # e.g., 'wachtler', 'wechsler', but not 'tichner' | 
            
                                                                        
                            
            
                                    
            
            
                | 1217 |  |  |                        _string_at((current + 2), 1, | 
            
                                                                        
                            
            
                                    
            
            
                | 1218 |  |  |                                   {'L', 'R', 'N', 'M', 'B', 'H', 'F', 'V', 'W', | 
            
                                                                        
                            
            
                                    
            
            
                | 1219 |  |  |                                    ' '}))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1220 |  |  |                     (primary, secondary) = _metaph_add('K') | 
            
                                                                        
                            
            
                                    
            
            
                | 1221 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1222 |  |  |                 else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1223 |  |  |                     if current > 0: | 
            
                                                                        
                            
            
                                    
            
            
                | 1224 |  |  |                         if _string_at(0, 2, {'MC'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1225 |  |  |                             # e.g., "McHugh" | 
            
                                                                        
                            
            
                                    
            
            
                | 1226 |  |  |                             (primary, secondary) = _metaph_add('K') | 
            
                                                                        
                            
            
                                    
            
            
                | 1227 |  |  |                         else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1228 |  |  |                             (primary, secondary) = _metaph_add('X', 'K') | 
            
                                                                        
                            
            
                                    
            
            
                | 1229 |  |  |                     else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1230 |  |  |                         (primary, secondary) = _metaph_add('X') | 
            
                                                                        
                            
            
                                    
            
            
                | 1231 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1232 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1233 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1234 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1235 |  |  |             # e.g, 'czerny' | 
            
                                                                        
                            
            
                                    
            
            
                | 1236 |  |  |             elif (_string_at(current, 2, {'CZ'}) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1237 |  |  |                   not _string_at((current - 2), 4, {'WICZ'})): | 
            
                                                                        
                            
            
                                    
            
            
                | 1238 |  |  |                 (primary, secondary) = _metaph_add('S', 'X') | 
            
                                                                        
                            
            
                                    
            
            
                | 1239 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1240 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1241 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1242 |  |  |             # e.g., 'focaccia' | 
            
                                                                        
                            
            
                                    
            
            
                | 1243 |  |  |             elif _string_at((current + 1), 3, {'CIA'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1244 |  |  |                 (primary, secondary) = _metaph_add('X') | 
            
                                                                        
                            
            
                                    
            
            
                | 1245 |  |  |                 current += 3 | 
            
                                                                        
                            
            
                                    
            
            
                | 1246 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1247 |  |  |             # double 'C', but not if e.g. 'McClellan' | 
            
                                                                        
                            
            
                                    
            
            
                | 1248 |  |  |             elif (_string_at(current, 2, {'CC'}) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1249 |  |  |                   not ((current == 1) and (_get_at(0) == 'M'))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1250 |  |  |                 # 'bellocchio' but not 'bacchus' | 
            
                                                                        
                            
            
                                    
            
            
                | 1251 |  |  |                 if ((_string_at((current + 2), 1, | 
            
                                                                        
                            
            
                                    
            
            
                | 1252 |  |  |                                 {'I', 'E', 'H'}) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1253 |  |  |                      not _string_at((current + 2), 2, ['HU']))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1254 |  |  |                     # 'accident', 'accede' 'succeed' | 
            
                                                                        
                            
            
                                    
            
            
                | 1255 |  |  |                     if ((((current == 1) and _get_at(current - 1) == 'A') or | 
            
                                                                        
                            
            
                                    
            
            
                | 1256 |  |  |                          _string_at((current - 1), 5, | 
            
                                                                        
                            
            
                                    
            
            
                | 1257 |  |  |                                     {'UCCEE', 'UCCES'}))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1258 |  |  |                         (primary, secondary) = _metaph_add('KS') | 
            
                                                                        
                            
            
                                    
            
            
                | 1259 |  |  |                     # 'bacci', 'bertucci', other italian | 
            
                                                                        
                            
            
                                    
            
            
                | 1260 |  |  |                     else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1261 |  |  |                         (primary, secondary) = _metaph_add('X') | 
            
                                                                        
                            
            
                                    
            
            
                | 1262 |  |  |                     current += 3 | 
            
                                                                        
                            
            
                                    
            
            
                | 1263 |  |  |                     continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1264 |  |  |                 else:  # Pierce's rule | 
            
                                                                        
                            
            
                                    
            
            
                | 1265 |  |  |                     (primary, secondary) = _metaph_add('K') | 
            
                                                                        
                            
            
                                    
            
            
                | 1266 |  |  |                     current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1267 |  |  |                     continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1268 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1269 |  |  |             elif _string_at(current, 2, {'CK', 'CG', 'CQ'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1270 |  |  |                 (primary, secondary) = _metaph_add('K') | 
            
                                                                        
                            
            
                                    
            
            
                | 1271 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1272 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1273 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1274 |  |  |             elif _string_at(current, 2, {'CI', 'CE', 'CY'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1275 |  |  |                 # Italian vs. English | 
            
                                                                        
                            
            
                                    
            
            
                | 1276 |  |  |                 if _string_at(current, 3, {'CIO', 'CIE', 'CIA'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1277 |  |  |                     (primary, secondary) = _metaph_add('S', 'X') | 
            
                                                                        
                            
            
                                    
            
            
                | 1278 |  |  |                 else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1279 |  |  |                     (primary, secondary) = _metaph_add('S') | 
            
                                                                        
                            
            
                                    
            
            
                | 1280 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1281 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1282 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1283 |  |  |             # else | 
            
                                                                        
                            
            
                                    
            
            
                | 1284 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1285 |  |  |                 (primary, secondary) = _metaph_add('K') | 
            
                                                                        
                            
            
                                    
            
            
                | 1286 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1287 |  |  |                 # name sent in 'mac caffrey', 'mac gregor | 
            
                                                                        
                            
            
                                    
            
            
                | 1288 |  |  |                 if _string_at((current + 1), 2, {' C', ' Q', ' G'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1289 |  |  |                     current += 3 | 
            
                                                                        
                            
            
                                    
            
            
                | 1290 |  |  |                 elif (_string_at((current + 1), 1, | 
            
                                                                        
                            
            
                                    
            
            
                | 1291 |  |  |                                  {'C', 'K', 'Q'}) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1292 |  |  |                       not _string_at((current + 1), 2, {'CE', 'CI'})): | 
            
                                                                        
                            
            
                                    
            
            
                | 1293 |  |  |                     current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1294 |  |  |                 else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1295 |  |  |                     current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1296 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1297 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1298 |  |  |         elif _get_at(current) == 'D': | 
            
                                                                        
                            
            
                                    
            
            
                | 1299 |  |  |             if _string_at(current, 2, {'DG'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1300 |  |  |                 if _string_at((current + 2), 1, {'I', 'E', 'Y'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1301 |  |  |                     # e.g. 'edge' | 
            
                                                                        
                            
            
                                    
            
            
                | 1302 |  |  |                     (primary, secondary) = _metaph_add('J') | 
            
                                                                        
                            
            
                                    
            
            
                | 1303 |  |  |                     current += 3 | 
            
                                                                        
                            
            
                                    
            
            
                | 1304 |  |  |                     continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1305 |  |  |                 else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1306 |  |  |                     # e.g. 'edgar' | 
            
                                                                        
                            
            
                                    
            
            
                | 1307 |  |  |                     (primary, secondary) = _metaph_add('TK') | 
            
                                                                        
                            
            
                                    
            
            
                | 1308 |  |  |                     current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1309 |  |  |                     continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1310 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1311 |  |  |             elif _string_at(current, 2, {'DT', 'DD'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1312 |  |  |                 (primary, secondary) = _metaph_add('T') | 
            
                                                                        
                            
            
                                    
            
            
                | 1313 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1314 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1315 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1316 |  |  |             # else | 
            
                                                                        
                            
            
                                    
            
            
                | 1317 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1318 |  |  |                 (primary, secondary) = _metaph_add('T') | 
            
                                                                        
                            
            
                                    
            
            
                | 1319 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1320 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1321 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1322 |  |  |         elif _get_at(current) == 'F': | 
            
                                                                        
                            
            
                                    
            
            
                | 1323 |  |  |             if _get_at(current + 1) == 'F': | 
            
                                                                        
                            
            
                                    
            
            
                | 1324 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1325 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1326 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1327 |  |  |             (primary, secondary) = _metaph_add('F') | 
            
                                                                        
                            
            
                                    
            
            
                | 1328 |  |  |             continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1329 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1330 |  |  |         elif _get_at(current) == 'G': | 
            
                                                                        
                            
            
                                    
            
            
                | 1331 |  |  |             if _get_at(current + 1) == 'H': | 
            
                                                                        
                            
            
                                    
            
            
                | 1332 |  |  |                 if (current > 0) and not _is_vowel(current - 1): | 
            
                                                                        
                            
            
                                    
            
            
                | 1333 |  |  |                     (primary, secondary) = _metaph_add('K') | 
            
                                                                        
                            
            
                                    
            
            
                | 1334 |  |  |                     current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1335 |  |  |                     continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1336 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1337 |  |  |                 # 'ghislane', ghiradelli | 
            
                                                                        
                            
            
                                    
            
            
                | 1338 |  |  |                 elif current == 0: | 
            
                                                                        
                            
            
                                    
            
            
                | 1339 |  |  |                     if _get_at(current + 2) == 'I': | 
            
                                                                        
                            
            
                                    
            
            
                | 1340 |  |  |                         (primary, secondary) = _metaph_add('J') | 
            
                                                                        
                            
            
                                    
            
            
                | 1341 |  |  |                     else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1342 |  |  |                         (primary, secondary) = _metaph_add('K') | 
            
                                                                        
                            
            
                                    
            
            
                | 1343 |  |  |                     current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1344 |  |  |                     continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1345 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1346 |  |  |                 # Parker's rule (with some further refinements) - e.g., 'hugh' | 
            
                                                                        
                            
            
                                    
            
            
                | 1347 |  |  |                 elif (((current > 1) and | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 1348 |  |  |                        _string_at((current - 2), 1, {'B', 'H', 'D'})) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1349 |  |  |                       # e.g., 'bough' | 
            
                                                                        
                            
            
                                    
            
            
                | 1350 |  |  |                       ((current > 2) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1351 |  |  |                        _string_at((current - 3), 1, {'B', 'H', 'D'})) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1352 |  |  |                       # e.g., 'broughton' | 
            
                                                                        
                            
            
                                    
            
            
                | 1353 |  |  |                       ((current > 3) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1354 |  |  |                        _string_at((current - 4), 1, {'B', 'H'}))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1355 |  |  |                     current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1356 |  |  |                     continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1357 |  |  |                 else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1358 |  |  |                     # e.g. 'laugh', 'McLaughlin', 'cough', | 
            
                                                                        
                            
            
                                    
            
            
                | 1359 |  |  |                     #      'gough', 'rough', 'tough' | 
            
                                                                        
                            
            
                                    
            
            
                | 1360 |  |  |                     if ((current > 2) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1361 |  |  |                             (_get_at(current - 1) == 'U') and | 
            
                                                                        
                            
            
                                    
            
            
                | 1362 |  |  |                             (_string_at((current - 3), 1, | 
            
                                                                        
                            
            
                                    
            
            
                | 1363 |  |  |                                         {'C', 'G', 'L', 'R', 'T'}))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1364 |  |  |                         (primary, secondary) = _metaph_add('F') | 
            
                                                                        
                            
            
                                    
            
            
                | 1365 |  |  |                     elif (current > 0) and _get_at(current - 1) != 'I': | 
            
                                                                        
                            
            
                                    
            
            
                | 1366 |  |  |                         (primary, secondary) = _metaph_add('K') | 
            
                                                                        
                            
            
                                    
            
            
                | 1367 |  |  |                     current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1368 |  |  |                     continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1369 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1370 |  |  |             elif _get_at(current + 1) == 'N': | 
            
                                                                        
                            
            
                                    
            
            
                | 1371 |  |  |                 if (current == 1) and _is_vowel(0) and not _slavo_germanic(): | 
            
                                                                        
                            
            
                                    
            
            
                | 1372 |  |  |                     (primary, secondary) = _metaph_add('KN', 'N') | 
            
                                                                        
                            
            
                                    
            
            
                | 1373 |  |  |                 # not e.g. 'cagney' | 
            
                                                                        
                            
            
                                    
            
            
                | 1374 |  |  |                 elif (not _string_at((current + 2), 2, {'EY'}) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1375 |  |  |                       (_get_at(current + 1) != 'Y') and | 
            
                                                                        
                            
            
                                    
            
            
                | 1376 |  |  |                       not _slavo_germanic()): | 
            
                                                                        
                            
            
                                    
            
            
                | 1377 |  |  |                     (primary, secondary) = _metaph_add('N', 'KN') | 
            
                                                                        
                            
            
                                    
            
            
                | 1378 |  |  |                 else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1379 |  |  |                     (primary, secondary) = _metaph_add('KN') | 
            
                                                                        
                            
            
                                    
            
            
                | 1380 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1381 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1382 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1383 |  |  |             # 'tagliaro' | 
            
                                                                        
                            
            
                                    
            
            
                | 1384 |  |  |             elif (_string_at((current + 1), 2, {'LI'}) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1385 |  |  |                   not _slavo_germanic()): | 
            
                                                                        
                            
            
                                    
            
            
                | 1386 |  |  |                 (primary, secondary) = _metaph_add('KL', 'L') | 
            
                                                                        
                            
            
                                    
            
            
                | 1387 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1388 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1389 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1390 |  |  |             # -ges-, -gep-, -gel-, -gie- at beginning | 
            
                                                                        
                            
            
                                    
            
            
                | 1391 |  |  |             elif ((current == 0) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1392 |  |  |                   ((_get_at(current + 1) == 'Y') or | 
            
                                                                        
                            
            
                                    
            
            
                | 1393 |  |  |                    _string_at((current + 1), 2, {'ES', 'EP', 'EB', 'EL', 'EY', | 
            
                                                                        
                            
            
                                    
            
            
                | 1394 |  |  |                                                  'IB', 'IL', 'IN', 'IE', 'EI', | 
            
                                                                        
                            
            
                                    
            
            
                | 1395 |  |  |                                                  'ER'}))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1396 |  |  |                 (primary, secondary) = _metaph_add('K', 'J') | 
            
                                                                        
                            
            
                                    
            
            
                | 1397 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1398 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1399 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1400 |  |  |             #  -ger-,  -gy- | 
            
                                                                        
                            
            
                                    
            
            
                | 1401 |  |  |             elif ((_string_at((current + 1), 2, {'ER'}) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1402 |  |  |                    (_get_at(current + 1) == 'Y')) and not | 
            
                                                                        
                            
            
                                    
            
            
                | 1403 |  |  |                   _string_at(0, 6, {'DANGER', 'RANGER', 'MANGER'}) and not | 
            
                                                                        
                            
            
                                    
            
            
                | 1404 |  |  |                   _string_at((current - 1), 1, {'E', 'I'}) and not | 
            
                                                                        
                            
            
                                    
            
            
                | 1405 |  |  |                   _string_at((current - 1), 3, {'RGY', 'OGY'})): | 
            
                                                                        
                            
            
                                    
            
            
                | 1406 |  |  |                 (primary, secondary) = _metaph_add('K', 'J') | 
            
                                                                        
                            
            
                                    
            
            
                | 1407 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1408 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1409 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1410 |  |  |             #  italian e.g, 'biaggi' | 
            
                                                                        
                            
            
                                    
            
            
                | 1411 |  |  |             elif (_string_at((current + 1), 1, {'E', 'I', 'Y'}) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1412 |  |  |                   _string_at((current - 1), 4, {'AGGI', 'OGGI'})): | 
            
                                                                        
                            
            
                                    
            
            
                | 1413 |  |  |                 # obvious germanic | 
            
                                                                        
                            
            
                                    
            
            
                | 1414 |  |  |                 if (((_string_at(0, 4, {'VAN ', 'VON '}) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1415 |  |  |                       _string_at(0, 3, {'SCH'})) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1416 |  |  |                      _string_at((current + 1), 2, {'ET'}))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1417 |  |  |                     (primary, secondary) = _metaph_add('K') | 
            
                                                                        
                            
            
                                    
            
            
                | 1418 |  |  |                 elif _string_at((current + 1), 4, {'IER '}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1419 |  |  |                     (primary, secondary) = _metaph_add('J') | 
            
                                                                        
                            
            
                                    
            
            
                | 1420 |  |  |                 else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1421 |  |  |                     (primary, secondary) = _metaph_add('J', 'K') | 
            
                                                                        
                            
            
                                    
            
            
                | 1422 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1423 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1424 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1425 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1426 |  |  |                 if _get_at(current + 1) == 'G': | 
            
                                                                        
                            
            
                                    
            
            
                | 1427 |  |  |                     current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1428 |  |  |                 else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1429 |  |  |                     current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1430 |  |  |                 (primary, secondary) = _metaph_add('K') | 
            
                                                                        
                            
            
                                    
            
            
                | 1431 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1432 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1433 |  |  |         elif _get_at(current) == 'H': | 
            
                                                                        
                            
            
                                    
            
            
                | 1434 |  |  |             # only keep if first & before vowel or btw. 2 vowels | 
            
                                                                        
                            
            
                                    
            
            
                | 1435 |  |  |             if ((((current == 0) or _is_vowel(current - 1)) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1436 |  |  |                  _is_vowel(current + 1))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1437 |  |  |                 (primary, secondary) = _metaph_add('H') | 
            
                                                                        
                            
            
                                    
            
            
                | 1438 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1439 |  |  |             else:  # also takes care of 'HH' | 
            
                                                                        
                            
            
                                    
            
            
                | 1440 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1441 |  |  |             continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1442 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1443 |  |  |         elif _get_at(current) == 'J': | 
            
                                                                        
                            
            
                                    
            
            
                | 1444 |  |  |             # obvious spanish, 'jose', 'san jacinto' | 
            
                                                                        
                            
            
                                    
            
            
                | 1445 |  |  |             if _string_at(current, 4, ['JOSE']) or _string_at(0, 4, {'SAN '}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1446 |  |  |                 if ((((current == 0) and (_get_at(current + 4) == ' ')) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1447 |  |  |                      _string_at(0, 4, ['SAN ']))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1448 |  |  |                     (primary, secondary) = _metaph_add('H') | 
            
                                                                        
                            
            
                                    
            
            
                | 1449 |  |  |                 else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1450 |  |  |                     (primary, secondary) = _metaph_add('J', 'H') | 
            
                                                                        
                            
            
                                    
            
            
                | 1451 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1452 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1453 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1454 |  |  |             elif (current == 0) and not _string_at(current, 4, {'JOSE'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1455 |  |  |                 # Yankelovich/Jankelowicz | 
            
                                                                        
                            
            
                                    
            
            
                | 1456 |  |  |                 (primary, secondary) = _metaph_add('J', 'A') | 
            
                                                                        
                            
            
                                    
            
            
                | 1457 |  |  |             # Spanish pron. of e.g. 'bajador' | 
            
                                                                        
                            
            
                                    
            
            
                | 1458 |  |  |             elif (_is_vowel(current - 1) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1459 |  |  |                   not _slavo_germanic() and | 
            
                                                                        
                            
            
                                    
            
            
                | 1460 |  |  |                   ((_get_at(current + 1) == 'A') or | 
            
                                                                        
                            
            
                                    
            
            
                | 1461 |  |  |                    (_get_at(current + 1) == 'O'))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1462 |  |  |                 (primary, secondary) = _metaph_add('J', 'H') | 
            
                                                                        
                            
            
                                    
            
            
                | 1463 |  |  |             elif current == last: | 
            
                                                                        
                            
            
                                    
            
            
                | 1464 |  |  |                 (primary, secondary) = _metaph_add('J', ' ') | 
            
                                                                        
                            
            
                                    
            
            
                | 1465 |  |  |             elif (not _string_at((current + 1), 1, | 
            
                                                                        
                            
            
                                    
            
            
                | 1466 |  |  |                                  {'L', 'T', 'K', 'S', 'N', 'M', 'B', 'Z'}) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1467 |  |  |                   not _string_at((current - 1), 1, {'S', 'K', 'L'})): | 
            
                                                                        
                            
            
                                    
            
            
                | 1468 |  |  |                 (primary, secondary) = _metaph_add('J') | 
            
                                                                        
                            
            
                                    
            
            
                | 1469 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1470 |  |  |             if _get_at(current + 1) == 'J':  # it could happen! | 
            
                                                                        
                            
            
                                    
            
            
                | 1471 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1472 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1473 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1474 |  |  |             continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1475 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1476 |  |  |         elif _get_at(current) == 'K': | 
            
                                                                        
                            
            
                                    
            
            
                | 1477 |  |  |             if _get_at(current + 1) == 'K': | 
            
                                                                        
                            
            
                                    
            
            
                | 1478 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1479 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1480 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1481 |  |  |             (primary, secondary) = _metaph_add('K') | 
            
                                                                        
                            
            
                                    
            
            
                | 1482 |  |  |             continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1483 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1484 |  |  |         elif _get_at(current) == 'L': | 
            
                                                                        
                            
            
                                    
            
            
                | 1485 |  |  |             if _get_at(current + 1) == 'L': | 
            
                                                                        
                            
            
                                    
            
            
                | 1486 |  |  |                 # Spanish e.g. 'cabrillo', 'gallegos' | 
            
                                                                        
                            
            
                                    
            
            
                | 1487 |  |  |                 if (((current == (length - 3)) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1488 |  |  |                      _string_at((current - 1), 4, {'ILLO', 'ILLA', 'ALLE'})) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1489 |  |  |                         ((_string_at((last - 1), 2, {'AS', 'OS'}) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1490 |  |  |                           _string_at(last, 1, {'A', 'O'})) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1491 |  |  |                          _string_at((current - 1), 4, {'ALLE'}))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1492 |  |  |                     (primary, secondary) = _metaph_add('L', ' ') | 
            
                                                                        
                            
            
                                    
            
            
                | 1493 |  |  |                     current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1494 |  |  |                     continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1495 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1496 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1497 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1498 |  |  |             (primary, secondary) = _metaph_add('L') | 
            
                                                                        
                            
            
                                    
            
            
                | 1499 |  |  |             continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1500 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1501 |  |  |         elif _get_at(current) == 'M': | 
            
                                                                        
                            
            
                                    
            
            
                | 1502 |  |  |             if (((_string_at((current - 1), 3, {'UMB'}) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1503 |  |  |                   (((current + 1) == last) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1504 |  |  |                    _string_at((current + 2), 2, {'ER'}))) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1505 |  |  |                  # 'dumb', 'thumb' | 
            
                                                                        
                            
            
                                    
            
            
                | 1506 |  |  |                  (_get_at(current + 1) == 'M'))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1507 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1508 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1509 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1510 |  |  |             (primary, secondary) = _metaph_add('M') | 
            
                                                                        
                            
            
                                    
            
            
                | 1511 |  |  |             continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1512 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1513 |  |  |         elif _get_at(current) == 'N': | 
            
                                                                        
                            
            
                                    
            
            
                | 1514 |  |  |             if _get_at(current + 1) == 'N': | 
            
                                                                        
                            
            
                                    
            
            
                | 1515 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1516 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1517 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1518 |  |  |             (primary, secondary) = _metaph_add('N') | 
            
                                                                        
                            
            
                                    
            
            
                | 1519 |  |  |             continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1520 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1521 |  |  |         elif _get_at(current) == 'Ñ': | 
            
                                                                        
                            
            
                                    
            
            
                | 1522 |  |  |             current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1523 |  |  |             (primary, secondary) = _metaph_add('N') | 
            
                                                                        
                            
            
                                    
            
            
                | 1524 |  |  |             continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1525 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1526 |  |  |         elif _get_at(current) == 'P': | 
            
                                                                        
                            
            
                                    
            
            
                | 1527 |  |  |             if _get_at(current + 1) == 'H': | 
            
                                                                        
                            
            
                                    
            
            
                | 1528 |  |  |                 (primary, secondary) = _metaph_add('F') | 
            
                                                                        
                            
            
                                    
            
            
                | 1529 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1530 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1531 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1532 |  |  |             # also account for "campbell", "raspberry" | 
            
                                                                        
                            
            
                                    
            
            
                | 1533 |  |  |             elif _string_at((current + 1), 1, {'P', 'B'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1534 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1535 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1536 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1537 |  |  |             (primary, secondary) = _metaph_add('P') | 
            
                                                                        
                            
            
                                    
            
            
                | 1538 |  |  |             continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1539 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1540 |  |  |         elif _get_at(current) == 'Q': | 
            
                                                                        
                            
            
                                    
            
            
                | 1541 |  |  |             if _get_at(current + 1) == 'Q': | 
            
                                                                        
                            
            
                                    
            
            
                | 1542 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1543 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1544 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1545 |  |  |             (primary, secondary) = _metaph_add('K') | 
            
                                                                        
                            
            
                                    
            
            
                | 1546 |  |  |             continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1547 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1548 |  |  |         elif _get_at(current) == 'R': | 
            
                                                                        
                            
            
                                    
            
            
                | 1549 |  |  |             # french e.g. 'rogier', but exclude 'hochmeier' | 
            
                                                                        
                            
            
                                    
            
            
                | 1550 |  |  |             if (((current == last) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1551 |  |  |                  not _slavo_germanic() and | 
            
                                                                        
                            
            
                                    
            
            
                | 1552 |  |  |                  _string_at((current - 2), 2, {'IE'}) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1553 |  |  |                  not _string_at((current - 4), 2, {'ME', 'MA'}))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1554 |  |  |                 (primary, secondary) = _metaph_add('', 'R') | 
            
                                                                        
                            
            
                                    
            
            
                | 1555 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1556 |  |  |                 (primary, secondary) = _metaph_add('R') | 
            
                                                                        
                            
            
                                    
            
            
                | 1557 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1558 |  |  |             if _get_at(current + 1) == 'R': | 
            
                                                                        
                            
            
                                    
            
            
                | 1559 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1560 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1561 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1562 |  |  |             continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1563 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1564 |  |  |         elif _get_at(current) == 'S': | 
            
                                                                        
                            
            
                                    
            
            
                | 1565 |  |  |             # special cases 'island', 'isle', 'carlisle', 'carlysle' | 
            
                                                                        
                            
            
                                    
            
            
                | 1566 |  |  |             if _string_at((current - 1), 3, {'ISL', 'YSL'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1567 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1568 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1569 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1570 |  |  |             # special case 'sugar-' | 
            
                                                                        
                            
            
                                    
            
            
                | 1571 |  |  |             elif (current == 0) and _string_at(current, 5, {'SUGAR'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1572 |  |  |                 (primary, secondary) = _metaph_add('X', 'S') | 
            
                                                                        
                            
            
                                    
            
            
                | 1573 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1574 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1575 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1576 |  |  |             elif _string_at(current, 2, {'SH'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1577 |  |  |                 # Germanic | 
            
                                                                        
                            
            
                                    
            
            
                | 1578 |  |  |                 if _string_at((current + 1), 4, | 
            
                                                                        
                            
            
                                    
            
            
                | 1579 |  |  |                               {'HEIM', 'HOEK', 'HOLM', 'HOLZ'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1580 |  |  |                     (primary, secondary) = _metaph_add('S') | 
            
                                                                        
                            
            
                                    
            
            
                | 1581 |  |  |                 else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1582 |  |  |                     (primary, secondary) = _metaph_add('X') | 
            
                                                                        
                            
            
                                    
            
            
                | 1583 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1584 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1585 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1586 |  |  |             # Italian & Armenian | 
            
                                                                        
                            
            
                                    
            
            
                | 1587 |  |  |             elif (_string_at(current, 3, {'SIO', 'SIA'}) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1588 |  |  |                   _string_at(current, 4, {'SIAN'})): | 
            
                                                                        
                            
            
                                    
            
            
                | 1589 |  |  |                 if not _slavo_germanic(): | 
            
                                                                        
                            
            
                                    
            
            
                | 1590 |  |  |                     (primary, secondary) = _metaph_add('S', 'X') | 
            
                                                                        
                            
            
                                    
            
            
                | 1591 |  |  |                 else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1592 |  |  |                     (primary, secondary) = _metaph_add('S') | 
            
                                                                        
                            
            
                                    
            
            
                | 1593 |  |  |                 current += 3 | 
            
                                                                        
                            
            
                                    
            
            
                | 1594 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1595 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1596 |  |  |             # German & anglicisations, e.g. 'smith' match 'schmidt', | 
            
                                                                        
                            
            
                                    
            
            
                | 1597 |  |  |             #                               'snider' match 'schneider' | 
            
                                                                        
                            
            
                                    
            
            
                | 1598 |  |  |             # also, -sz- in Slavic language although in Hungarian it is | 
            
                                                                        
                            
            
                                    
            
            
                | 1599 |  |  |             #       pronounced 's' | 
            
                                                                        
                            
            
                                    
            
            
                | 1600 |  |  |             elif (((current == 0) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1601 |  |  |                    _string_at((current + 1), 1, {'M', 'N', 'L', 'W'})) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1602 |  |  |                   _string_at((current + 1), 1, {'Z'})): | 
            
                                                                        
                            
            
                                    
            
            
                | 1603 |  |  |                 (primary, secondary) = _metaph_add('S', 'X') | 
            
                                                                        
                            
            
                                    
            
            
                | 1604 |  |  |                 if _string_at((current + 1), 1, {'Z'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1605 |  |  |                     current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1606 |  |  |                 else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1607 |  |  |                     current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1608 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1609 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1610 |  |  |             elif _string_at(current, 2, {'SC'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1611 |  |  |                 # Schlesinger's rule | 
            
                                                                        
                            
            
                                    
            
            
                | 1612 |  |  |                 if _get_at(current + 2) == 'H': | 
            
                                                                        
                            
            
                                    
            
            
                | 1613 |  |  |                     # dutch origin, e.g. 'school', 'schooner' | 
            
                                                                        
                            
            
                                    
            
            
                | 1614 |  |  |                     if _string_at((current + 3), 2, | 
            
                                                                        
                            
            
                                    
            
            
                | 1615 |  |  |                                   {'OO', 'ER', 'EN', 'UY', 'ED', 'EM'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1616 |  |  |                         # 'schermerhorn', 'schenker' | 
            
                                                                        
                            
            
                                    
            
            
                | 1617 |  |  |                         if _string_at((current + 3), 2, {'ER', 'EN'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1618 |  |  |                             (primary, secondary) = _metaph_add('X', 'SK') | 
            
                                                                        
                            
            
                                    
            
            
                | 1619 |  |  |                         else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1620 |  |  |                             (primary, secondary) = _metaph_add('SK') | 
            
                                                                        
                            
            
                                    
            
            
                | 1621 |  |  |                         current += 3 | 
            
                                                                        
                            
            
                                    
            
            
                | 1622 |  |  |                         continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1623 |  |  |                     else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1624 |  |  |                         if (((current == 0) and not _is_vowel(3) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1625 |  |  |                              (_get_at(3) != 'W'))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1626 |  |  |                             (primary, secondary) = _metaph_add('X', 'S') | 
            
                                                                        
                            
            
                                    
            
            
                | 1627 |  |  |                         else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1628 |  |  |                             (primary, secondary) = _metaph_add('X') | 
            
                                                                        
                            
            
                                    
            
            
                | 1629 |  |  |                         current += 3 | 
            
                                                                        
                            
            
                                    
            
            
                | 1630 |  |  |                         continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1631 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1632 |  |  |                 elif _string_at((current + 2), 1, {'I', 'E', 'Y'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1633 |  |  |                     (primary, secondary) = _metaph_add('S') | 
            
                                                                        
                            
            
                                    
            
            
                | 1634 |  |  |                     current += 3 | 
            
                                                                        
                            
            
                                    
            
            
                | 1635 |  |  |                     continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1636 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1637 |  |  |                 # else | 
            
                                                                        
                            
            
                                    
            
            
                | 1638 |  |  |                 else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1639 |  |  |                     (primary, secondary) = _metaph_add('SK') | 
            
                                                                        
                            
            
                                    
            
            
                | 1640 |  |  |                     current += 3 | 
            
                                                                        
                            
            
                                    
            
            
                | 1641 |  |  |                     continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1642 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1643 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1644 |  |  |                 # french e.g. 'resnais', 'artois' | 
            
                                                                        
                            
            
                                    
            
            
                | 1645 |  |  |                 if (current == last) and _string_at((current - 2), 2, | 
            
                                                                        
                            
            
                                    
            
            
                | 1646 |  |  |                                                     {'AI', 'OI'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1647 |  |  |                     (primary, secondary) = _metaph_add('', 'S') | 
            
                                                                        
                            
            
                                    
            
            
                | 1648 |  |  |                 else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1649 |  |  |                     (primary, secondary) = _metaph_add('S') | 
            
                                                                        
                            
            
                                    
            
            
                | 1650 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1651 |  |  |                 if _string_at((current + 1), 1, {'S', 'Z'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1652 |  |  |                     current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1653 |  |  |                 else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1654 |  |  |                     current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1655 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1656 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1657 |  |  |         elif _get_at(current) == 'T': | 
            
                                                                        
                            
            
                                    
            
            
                | 1658 |  |  |             if _string_at(current, 4, {'TION'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1659 |  |  |                 (primary, secondary) = _metaph_add('X') | 
            
                                                                        
                            
            
                                    
            
            
                | 1660 |  |  |                 current += 3 | 
            
                                                                        
                            
            
                                    
            
            
                | 1661 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1662 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1663 |  |  |             elif _string_at(current, 3, {'TIA', 'TCH'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1664 |  |  |                 (primary, secondary) = _metaph_add('X') | 
            
                                                                        
                            
            
                                    
            
            
                | 1665 |  |  |                 current += 3 | 
            
                                                                        
                            
            
                                    
            
            
                | 1666 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1667 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1668 |  |  |             elif (_string_at(current, 2, {'TH'}) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1669 |  |  |                   _string_at(current, 3, {'TTH'})): | 
            
                                                                        
                            
            
                                    
            
            
                | 1670 |  |  |                 # special case 'thomas', 'thames' or germanic | 
            
                                                                        
                            
            
                                    
            
            
                | 1671 |  |  |                 if ((_string_at((current + 2), 2, {'OM', 'AM'}) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1672 |  |  |                      _string_at(0, 4, {'VAN ', 'VON '}) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1673 |  |  |                      _string_at(0, 3, {'SCH'}))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1674 |  |  |                     (primary, secondary) = _metaph_add('T') | 
            
                                                                        
                            
            
                                    
            
            
                | 1675 |  |  |                 else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1676 |  |  |                     (primary, secondary) = _metaph_add('0', 'T') | 
            
                                                                        
                            
            
                                    
            
            
                | 1677 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1678 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1679 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1680 |  |  |             elif _string_at((current + 1), 1, {'T', 'D'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1681 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1682 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1683 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1684 |  |  |             (primary, secondary) = _metaph_add('T') | 
            
                                                                        
                            
            
                                    
            
            
                | 1685 |  |  |             continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1686 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1687 |  |  |         elif _get_at(current) == 'V': | 
            
                                                                        
                            
            
                                    
            
            
                | 1688 |  |  |             if _get_at(current + 1) == 'V': | 
            
                                                                        
                            
            
                                    
            
            
                | 1689 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1690 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1691 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1692 |  |  |             (primary, secondary) = _metaph_add('F') | 
            
                                                                        
                            
            
                                    
            
            
                | 1693 |  |  |             continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1694 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1695 |  |  |         elif _get_at(current) == 'W': | 
            
                                                                        
                            
            
                                    
            
            
                | 1696 |  |  |             # can also be in middle of word | 
            
                                                                        
                            
            
                                    
            
            
                | 1697 |  |  |             if _string_at(current, 2, {'WR'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1698 |  |  |                 (primary, secondary) = _metaph_add('R') | 
            
                                                                        
                            
            
                                    
            
            
                | 1699 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1700 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1701 |  |  |             elif ((current == 0) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1702 |  |  |                   (_is_vowel(current + 1) or _string_at(current, 2, {'WH'}))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1703 |  |  |                 # Wasserman should match Vasserman | 
            
                                                                        
                            
            
                                    
            
            
                | 1704 |  |  |                 if _is_vowel(current + 1): | 
            
                                                                        
                            
            
                                    
            
            
                | 1705 |  |  |                     (primary, secondary) = _metaph_add('A', 'F') | 
            
                                                                        
                            
            
                                    
            
            
                | 1706 |  |  |                 else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1707 |  |  |                     # need Uomo to match Womo | 
            
                                                                        
                            
            
                                    
            
            
                | 1708 |  |  |                     (primary, secondary) = _metaph_add('A') | 
            
                                                                        
                            
            
                                    
            
            
                | 1709 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1710 |  |  |             # Arnow should match Arnoff | 
            
                                                                        
                            
            
                                    
            
            
                | 1711 |  |  |             if ((((current == last) and _is_vowel(current - 1)) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1712 |  |  |                  _string_at((current - 1), 5, | 
            
                                                                        
                            
            
                                    
            
            
                | 1713 |  |  |                             {'EWSKI', 'EWSKY', 'OWSKI', 'OWSKY'}) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1714 |  |  |                  _string_at(0, 3, ['SCH']))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1715 |  |  |                 (primary, secondary) = _metaph_add('', 'F') | 
            
                                                                        
                            
            
                                    
            
            
                | 1716 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1717 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1718 |  |  |             # Polish e.g. 'filipowicz' | 
            
                                                                        
                            
            
                                    
            
            
                | 1719 |  |  |             elif _string_at(current, 4, {'WICZ', 'WITZ'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1720 |  |  |                 (primary, secondary) = _metaph_add('TS', 'FX') | 
            
                                                                        
                            
            
                                    
            
            
                | 1721 |  |  |                 current += 4 | 
            
                                                                        
                            
            
                                    
            
            
                | 1722 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1723 |  |  |             # else skip it | 
            
                                                                        
                            
            
                                    
            
            
                | 1724 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1725 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1726 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1727 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1728 |  |  |         elif _get_at(current) == 'X': | 
            
                                                                        
                            
            
                                    
            
            
                | 1729 |  |  |             # French e.g. breaux | 
            
                                                                        
                            
            
                                    
            
            
                | 1730 |  |  |             if (not ((current == last) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1731 |  |  |                      (_string_at((current - 3), 3, {'IAU', 'EAU'}) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1732 |  |  |                       _string_at((current - 2), 2, {'AU', 'OU'})))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1733 |  |  |                 (primary, secondary) = _metaph_add('KS') | 
            
                                                                        
                            
            
                                    
            
            
                | 1734 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1735 |  |  |             if _string_at((current + 1), 1, {'C', 'X'}): | 
            
                                                                        
                            
            
                                    
            
            
                | 1736 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1737 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1738 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1739 |  |  |             continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1740 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1741 |  |  |         elif _get_at(current) == 'Z': | 
            
                                                                        
                            
            
                                    
            
            
                | 1742 |  |  |             # Chinese Pinyin e.g. 'zhao' | 
            
                                                                        
                            
            
                                    
            
            
                | 1743 |  |  |             if _get_at(current + 1) == 'H': | 
            
                                                                        
                            
            
                                    
            
            
                | 1744 |  |  |                 (primary, secondary) = _metaph_add('J') | 
            
                                                                        
                            
            
                                    
            
            
                | 1745 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1746 |  |  |                 continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1747 |  |  |             elif (_string_at((current + 1), 2, {'ZO', 'ZI', 'ZA'}) or | 
            
                                                                        
                            
            
                                    
            
            
                | 1748 |  |  |                   (_slavo_germanic() and ((current > 0) and | 
            
                                                                        
                            
            
                                    
            
            
                | 1749 |  |  |                                           _get_at(current - 1) != 'T'))): | 
            
                                                                        
                            
            
                                    
            
            
                | 1750 |  |  |                 (primary, secondary) = _metaph_add('S', 'TS') | 
            
                                                                        
                            
            
                                    
            
            
                | 1751 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1752 |  |  |                 (primary, secondary) = _metaph_add('S') | 
            
                                                                        
                            
            
                                    
            
            
                | 1753 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1754 |  |  |             if _get_at(current + 1) == 'Z': | 
            
                                                                        
                            
            
                                    
            
            
                | 1755 |  |  |                 current += 2 | 
            
                                                                        
                            
            
                                    
            
            
                | 1756 |  |  |             else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1757 |  |  |                 current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1758 |  |  |             continue | 
            
                                                                        
                            
            
                                    
            
            
                | 1759 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1760 |  |  |         else: | 
            
                                                                        
                            
            
                                    
            
            
                | 1761 |  |  |             current += 1 | 
            
                                                                        
                            
            
                                    
            
            
                | 1762 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1763 |  |  |     if maxlength and maxlength < _INFINITY: | 
            
                                                                        
                            
            
                                    
            
            
                | 1764 |  |  |         primary = primary[:maxlength] | 
            
                                                                        
                            
            
                                    
            
            
                | 1765 |  |  |         secondary = secondary[:maxlength] | 
            
                                                                        
                            
            
                                    
            
            
                | 1766 |  |  |     if primary == secondary: | 
            
                                                                        
                            
            
                                    
            
            
                | 1767 |  |  |         secondary = '' | 
            
                                                                        
                            
            
                                    
            
            
                | 1768 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 1769 |  |  |     return (primary, secondary) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1770 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1771 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1772 |  |  | def caverphone(word, version=2): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1773 |  |  |     """Return the Caverphone code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1774 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1775 |  |  |     A description of version 1 of the algorithm can be found in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1776 |  |  |     :cite:`Hood:2002`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1777 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1778 |  |  |     A description of version 2 of the algorithm can be found in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1779 |  |  |     :cite:`Hood:2004`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1780 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1781 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1782 |  |  |     :param int version: the version of Caverphone to employ for encoding | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1783 |  |  |         (defaults to 2) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1784 |  |  |     :returns: the Caverphone value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1785 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1786 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1787 |  |  |     >>> caverphone('Christopher') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1788 |  |  |     'KRSTFA1111' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1789 |  |  |     >>> caverphone('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1790 |  |  |     'NA11111111' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1791 |  |  |     >>> caverphone('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1792 |  |  |     'SMT1111111' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1793 |  |  |     >>> caverphone('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1794 |  |  |     'SKMT111111' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1795 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1796 |  |  |     >>> caverphone('Christopher', 1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1797 |  |  |     'KRSTF1' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1798 |  |  |     >>> caverphone('Niall', 1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1799 |  |  |     'N11111' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1800 |  |  |     >>> caverphone('Smith', 1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1801 |  |  |     'SMT111' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1802 |  |  |     >>> caverphone('Schmidt', 1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1803 |  |  |     'SKMT11' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1804 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1805 |  |  |     _vowels = {'a', 'e', 'i', 'o', 'u'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1806 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1807 |  |  |     word = word.lower() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1808 |  |  |     word = ''.join(c for c in word if c in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1809 |  |  |                    {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1810 |  |  |                     'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1811 |  |  |                     'y', 'z'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1812 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1813 |  |  |     def _squeeze_replace(word, char, new_char): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1814 |  |  |         """Convert strings of char in word to one instance of new_char.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1815 |  |  |         while char * 2 in word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1816 |  |  |             word = word.replace(char * 2, char) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1817 |  |  |         return word.replace(char, new_char) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1818 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1819 |  |  |     # the main replacemet algorithm | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1820 |  |  |     if version != 1 and word[-1:] == 'e': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1821 |  |  |         word = word[:-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1822 |  |  |     if word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1823 |  |  |         if word[:5] == 'cough': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1824 |  |  |             word = 'cou2f'+word[5:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1825 |  |  |         if word[:5] == 'rough': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1826 |  |  |             word = 'rou2f'+word[5:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1827 |  |  |         if word[:5] == 'tough': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1828 |  |  |             word = 'tou2f'+word[5:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1829 |  |  |         if word[:6] == 'enough': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1830 |  |  |             word = 'enou2f'+word[6:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1831 |  |  |         if version != 1 and word[:6] == 'trough': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1832 |  |  |             word = 'trou2f'+word[6:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1833 |  |  |         if word[:2] == 'gn': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1834 |  |  |             word = '2n'+word[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1835 |  |  |         if word[-2:] == 'mb': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1836 |  |  |             word = word[:-1]+'2' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1837 |  |  |         word = word.replace('cq', '2q') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1838 |  |  |         word = word.replace('ci', 'si') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1839 |  |  |         word = word.replace('ce', 'se') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1840 |  |  |         word = word.replace('cy', 'sy') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1841 |  |  |         word = word.replace('tch', '2ch') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1842 |  |  |         word = word.replace('c', 'k') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1843 |  |  |         word = word.replace('q', 'k') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1844 |  |  |         word = word.replace('x', 'k') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1845 |  |  |         word = word.replace('v', 'f') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1846 |  |  |         word = word.replace('dg', '2g') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1847 |  |  |         word = word.replace('tio', 'sio') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1848 |  |  |         word = word.replace('tia', 'sia') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1849 |  |  |         word = word.replace('d', 't') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1850 |  |  |         word = word.replace('ph', 'fh') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1851 |  |  |         word = word.replace('b', 'p') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1852 |  |  |         word = word.replace('sh', 's2') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1853 |  |  |         word = word.replace('z', 's') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1854 |  |  |         if word[0] in _vowels: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1855 |  |  |             word = 'A'+word[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1856 |  |  |         word = word.replace('a', '3') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1857 |  |  |         word = word.replace('e', '3') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1858 |  |  |         word = word.replace('i', '3') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1859 |  |  |         word = word.replace('o', '3') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1860 |  |  |         word = word.replace('u', '3') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1861 |  |  |         if version != 1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1862 |  |  |             word = word.replace('j', 'y') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1863 |  |  |             if word[:2] == 'y3': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1864 |  |  |                 word = 'Y3'+word[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1865 |  |  |             if word[:1] == 'y': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1866 |  |  |                 word = 'A'+word[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1867 |  |  |             word = word.replace('y', '3') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1868 |  |  |         word = word.replace('3gh3', '3kh3') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1869 |  |  |         word = word.replace('gh', '22') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1870 |  |  |         word = word.replace('g', 'k') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1871 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1872 |  |  |         word = _squeeze_replace(word, 's', 'S') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1873 |  |  |         word = _squeeze_replace(word, 't', 'T') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1874 |  |  |         word = _squeeze_replace(word, 'p', 'P') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1875 |  |  |         word = _squeeze_replace(word, 'k', 'K') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1876 |  |  |         word = _squeeze_replace(word, 'f', 'F') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1877 |  |  |         word = _squeeze_replace(word, 'm', 'M') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1878 |  |  |         word = _squeeze_replace(word, 'n', 'N') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1879 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1880 |  |  |         word = word.replace('w3', 'W3') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1881 |  |  |         if version == 1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1882 |  |  |             word = word.replace('wy', 'Wy') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1883 |  |  |         word = word.replace('wh3', 'Wh3') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1884 |  |  |         if version == 1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1885 |  |  |             word = word.replace('why', 'Why') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1886 |  |  |         if version != 1 and word[-1:] == 'w': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1887 |  |  |             word = word[:-1]+'3' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1888 |  |  |         word = word.replace('w', '2') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1889 |  |  |         if word[:1] == 'h': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1890 |  |  |             word = 'A'+word[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1891 |  |  |         word = word.replace('h', '2') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1892 |  |  |         word = word.replace('r3', 'R3') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1893 |  |  |         if version == 1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1894 |  |  |             word = word.replace('ry', 'Ry') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1895 |  |  |         if version != 1 and word[-1:] == 'r': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1896 |  |  |             word = word[:-1]+'3' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1897 |  |  |         word = word.replace('r', '2') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1898 |  |  |         word = word.replace('l3', 'L3') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1899 |  |  |         if version == 1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1900 |  |  |             word = word.replace('ly', 'Ly') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1901 |  |  |         if version != 1 and word[-1:] == 'l': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1902 |  |  |             word = word[:-1]+'3' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1903 |  |  |         word = word.replace('l', '2') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1904 |  |  |         if version == 1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1905 |  |  |             word = word.replace('j', 'y') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1906 |  |  |             word = word.replace('y3', 'Y3') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1907 |  |  |             word = word.replace('y', '2') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1908 |  |  |         word = word.replace('2', '') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1909 |  |  |         if version != 1 and word[-1:] == '3': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1910 |  |  |             word = word[:-1]+'A' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1911 |  |  |         word = word.replace('3', '') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1912 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1913 |  |  |     # pad with 1s, then extract the necessary length of code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1914 |  |  |     word = word+'1'*10 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1915 |  |  |     if version != 1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1916 |  |  |         word = word[:10] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1917 |  |  |     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1918 |  |  |         word = word[:6] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1919 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1920 |  |  |     return word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1921 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1922 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1923 |  |  | def alpha_sis(word, maxlength=14): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1924 |  |  |     """Return the IBM Alpha Search Inquiry System code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1925 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1926 |  |  |     The Alpha Search Inquiry System code is defined in :cite:`IBM:1973`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1927 |  |  |     This implementation is based on the description in :cite:`Moore:1977`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1928 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1929 |  |  |     A collection is necessary since there can be multiple values for a | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1930 |  |  |     single word. But the collection must be ordered since the first value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1931 |  |  |     is the primary coding. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1932 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1933 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1934 |  |  |     :param int maxlength: the length of the code returned (defaults to 14) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1935 |  |  |     :returns: the Alpha SIS value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1936 |  |  |     :rtype: tuple | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1937 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1938 |  |  |     >>> alpha_sis('Christopher') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1939 |  |  |     ('06401840000000', '07040184000000', '04018400000000') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1940 |  |  |     >>> alpha_sis('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1941 |  |  |     ('02500000000000',) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1942 |  |  |     >>> alpha_sis('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1943 |  |  |     ('03100000000000',) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1944 |  |  |     >>> alpha_sis('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1945 |  |  |     ('06310000000000',) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1946 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1947 |  |  |     _alpha_sis_initials = {'GF': '08', 'GM': '03', 'GN': '02', 'KN': '02', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1948 |  |  |                            'PF': '08', 'PN': '02', 'PS': '00', 'WR': '04', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1949 |  |  |                            'A': '1', 'E': '1', 'H': '2', 'I': '1', 'J': '3', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1950 |  |  |                            'O': '1', 'U': '1', 'W': '4', 'Y': '5'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1951 |  |  |     _alpha_sis_initials_order = ('GF', 'GM', 'GN', 'KN', 'PF', 'PN', 'PS', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1952 |  |  |                                  'WR', 'A', 'E', 'H', 'I', 'J', 'O', 'U', 'W', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1953 |  |  |                                  'Y') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1954 |  |  |     _alpha_sis_basic = {'SCH': '6', 'CZ': ('70', '6', '0'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1955 |  |  |                         'CH': ('6', '70', '0'), 'CK': ('7', '6'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1956 |  |  |                         'DS': ('0', '10'), 'DZ': ('0', '10'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1957 |  |  |                         'TS': ('0', '10'), 'TZ': ('0', '10'), 'CI': '0', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1958 |  |  |                         'CY': '0', 'CE': '0', 'SH': '6', 'DG': '7', 'PH': '8', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1959 |  |  |                         'C': ('7', '6'), 'K': ('7', '6'), 'Z': '0', 'S': '0', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1960 |  |  |                         'D': '1', 'T': '1', 'N': '2', 'M': '3', 'R': '4', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1961 |  |  |                         'L': '5', 'J': '6', 'G': '7', 'Q': '7', 'X': '7', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1962 |  |  |                         'F': '8', 'V': '8', 'B': '9', 'P': '9'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1963 |  |  |     _alpha_sis_basic_order = ('SCH', 'CZ', 'CH', 'CK', 'DS', 'DZ', 'TS', 'TZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1964 |  |  |                               'CI', 'CY', 'CE', 'SH', 'DG', 'PH', 'C', 'K', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1965 |  |  |                               'Z', 'S', 'D', 'T', 'N', 'M', 'R', 'L', 'J', 'C', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1966 |  |  |                               'G', 'K', 'Q', 'X', 'F', 'V', 'B', 'P') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1967 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1968 |  |  |     alpha = [''] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1969 |  |  |     pos = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1970 |  |  |     word = normalize('NFKD', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1971 |  |  |     word = word.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1972 |  |  |     word = ''.join(c for c in word if c in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1973 |  |  |                    {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1974 |  |  |                     'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1975 |  |  |                     'Y', 'Z'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1976 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1977 |  |  |     # Clamp maxlength to [4, 64] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1978 |  |  |     if maxlength is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1979 |  |  |         maxlength = min(max(4, maxlength), 64) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1980 |  |  |     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1981 |  |  |         maxlength = 64 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1982 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1983 |  |  |     # Do special processing for initial substrings | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1984 |  |  |     for k in _alpha_sis_initials_order: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1985 |  |  |         if word.startswith(k): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1986 |  |  |             alpha[0] += _alpha_sis_initials[k] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1987 |  |  |             pos += len(k) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1988 |  |  |             break | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1989 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1990 |  |  |     # Add a '0' if alpha is still empty | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1991 |  |  |     if not alpha[0]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1992 |  |  |         alpha[0] += '0' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1993 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1994 |  |  |     # Whether or not any special initial codes were encoded, iterate | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1995 |  |  |     # through the length of the word in the main encoding loop | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1996 |  |  |     while pos < len(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1997 |  |  |         origpos = pos | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1998 |  |  |         for k in _alpha_sis_basic_order: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1999 |  |  |             if word[pos:].startswith(k): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2000 |  |  |                 if isinstance(_alpha_sis_basic[k], tuple): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2001 |  |  |                     newalpha = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2002 |  |  |                     for i in range(len(_alpha_sis_basic[k])): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2003 |  |  |                         newalpha += [_ + _alpha_sis_basic[k][i] for _ in alpha] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2004 |  |  |                     alpha = newalpha | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2005 |  |  |                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2006 |  |  |                     alpha = [_ + _alpha_sis_basic[k] for _ in alpha] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2007 |  |  |                 pos += len(k) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2008 |  |  |                 break | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2009 |  |  |         if pos == origpos: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2010 |  |  |             alpha = [_ + '_' for _ in alpha] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2011 |  |  |             pos += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2012 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2013 |  |  |     # Trim doublets and placeholders | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2014 |  |  |     for i in range(len(alpha)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2015 |  |  |         pos = 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2016 |  |  |         while pos < len(alpha[i]): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2017 |  |  |             if alpha[i][pos] == alpha[i][pos-1]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2018 |  |  |                 alpha[i] = alpha[i][:pos]+alpha[i][pos+1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2019 |  |  |             pos += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2020 |  |  |     alpha = (_.replace('_', '') for _ in alpha) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2021 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2022 |  |  |     # Trim codes and return tuple | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2023 |  |  |     alpha = ((_ + ('0'*maxlength))[:maxlength] for _ in alpha) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2024 |  |  |     return tuple(alpha) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2025 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2026 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2027 |  |  | def fuzzy_soundex(word, maxlength=5, zero_pad=True): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2028 |  |  |     """Return the Fuzzy Soundex code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2029 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2030 |  |  |     Fuzzy Soundex is an algorithm derived from Soundex, defined in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2031 |  |  |     :cite:`Holmes:2002`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2032 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2033 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2034 |  |  |     :param int maxlength: the length of the code returned (defaults to 4) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2035 |  |  |     :param bool zero_pad: pad the end of the return value with 0s to achieve | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2036 |  |  |         a maxlength string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2037 |  |  |     :returns: the Fuzzy Soundex value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2038 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2039 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2040 |  |  |     >>> fuzzy_soundex('Christopher') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2041 |  |  |     'K6931' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2042 |  |  |     >>> fuzzy_soundex('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2043 |  |  |     'N4000' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2044 |  |  |     >>> fuzzy_soundex('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2045 |  |  |     'S5300' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2046 |  |  |     >>> fuzzy_soundex('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2047 |  |  |     'S5300' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2048 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2049 |  |  |     _fuzzy_soundex_translation = dict(zip((ord(_) for _ in | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2050 |  |  |                                            'ABCDEFGHIJKLMNOPQRSTUVWXYZ'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2051 |  |  |                                           '0193017-07745501769301-7-9')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2052 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2053 |  |  |     word = normalize('NFKD', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2054 |  |  |     word = word.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2055 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2056 |  |  |     # Clamp maxlength to [4, 64] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2057 |  |  |     if maxlength is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2058 |  |  |         maxlength = min(max(4, maxlength), 64) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2059 |  |  |     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2060 |  |  |         maxlength = 64 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2061 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2062 |  |  |     if not word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2063 |  |  |         if zero_pad: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2064 |  |  |             return '0' * maxlength | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2065 |  |  |         return '0' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2066 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2067 |  |  |     if word[:2] in {'CS', 'CZ', 'TS', 'TZ'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2068 |  |  |         word = 'SS' + word[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2069 |  |  |     elif word[:2] == 'GN': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2070 |  |  |         word = 'NN' + word[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2071 |  |  |     elif word[:2] in {'HR', 'WR'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2072 |  |  |         word = 'RR' + word[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2073 |  |  |     elif word[:2] == 'HW': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2074 |  |  |         word = 'WW' + word[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2075 |  |  |     elif word[:2] in {'KN', 'NG'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2076 |  |  |         word = 'NN' + word[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2077 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2078 |  |  |     if word[-2:] == 'CH': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2079 |  |  |         word = word[:-2] + 'KK' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2080 |  |  |     elif word[-2:] == 'NT': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2081 |  |  |         word = word[:-2] + 'TT' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2082 |  |  |     elif word[-2:] == 'RT': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2083 |  |  |         word = word[:-2] + 'RR' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2084 |  |  |     elif word[-3:] == 'RDT': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2085 |  |  |         word = word[:-3] + 'RR' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2086 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2087 |  |  |     word = word.replace('CA', 'KA') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2088 |  |  |     word = word.replace('CC', 'KK') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2089 |  |  |     word = word.replace('CK', 'KK') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2090 |  |  |     word = word.replace('CE', 'SE') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2091 |  |  |     word = word.replace('CHL', 'KL') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2092 |  |  |     word = word.replace('CL', 'KL') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2093 |  |  |     word = word.replace('CHR', 'KR') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2094 |  |  |     word = word.replace('CR', 'KR') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2095 |  |  |     word = word.replace('CI', 'SI') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2096 |  |  |     word = word.replace('CO', 'KO') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2097 |  |  |     word = word.replace('CU', 'KU') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2098 |  |  |     word = word.replace('CY', 'SY') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2099 |  |  |     word = word.replace('DG', 'GG') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2100 |  |  |     word = word.replace('GH', 'HH') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2101 |  |  |     word = word.replace('MAC', 'MK') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2102 |  |  |     word = word.replace('MC', 'MK') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2103 |  |  |     word = word.replace('NST', 'NSS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2104 |  |  |     word = word.replace('PF', 'FF') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2105 |  |  |     word = word.replace('PH', 'FF') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2106 |  |  |     word = word.replace('SCH', 'SSS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2107 |  |  |     word = word.replace('TIO', 'SIO') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2108 |  |  |     word = word.replace('TIA', 'SIO') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2109 |  |  |     word = word.replace('TCH', 'CHH') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2110 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2111 |  |  |     sdx = word.translate(_fuzzy_soundex_translation) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2112 |  |  |     sdx = sdx.replace('-', '') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2113 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2114 |  |  |     # remove repeating characters | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2115 |  |  |     sdx = _delete_consecutive_repeats(sdx) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2116 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2117 |  |  |     if word[0] in {'H', 'W', 'Y'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2118 |  |  |         sdx = word[0] + sdx | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2119 |  |  |     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2120 |  |  |         sdx = word[0] + sdx[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2121 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2122 |  |  |     sdx = sdx.replace('0', '') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2123 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2124 |  |  |     if zero_pad: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2125 |  |  |         sdx += ('0'*maxlength) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2126 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2127 |  |  |     return sdx[:maxlength] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2128 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2129 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2130 |  |  | def phonex(word, maxlength=4, zero_pad=True): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2131 |  |  |     """Return the Phonex code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2132 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2133 |  |  |     Phonex is an algorithm derived from Soundex, defined in :cite:`Lait:1996`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2134 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2135 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2136 |  |  |     :param int maxlength: the length of the code returned (defaults to 4) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2137 |  |  |     :param bool zero_pad: pad the end of the return value with 0s to achieve | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2138 |  |  |         a maxlength string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2139 |  |  |     :returns: the Phonex value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2140 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2141 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2142 |  |  |     >>> phonex('Christopher') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2143 |  |  |     'C623' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2144 |  |  |     >>> phonex('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2145 |  |  |     'N400' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2146 |  |  |     >>> phonex('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2147 |  |  |     'S253' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2148 |  |  |     >>> phonex('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2149 |  |  |     'S530' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2150 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2151 |  |  |     name = normalize('NFKD', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2152 |  |  |     name = name.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2153 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2154 |  |  |     # Clamp maxlength to [4, 64] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2155 |  |  |     if maxlength is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2156 |  |  |         maxlength = min(max(4, maxlength), 64) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2157 |  |  |     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2158 |  |  |         maxlength = 64 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2159 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2160 |  |  |     name_code = last = '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2161 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2162 |  |  |     # Deletions effected by replacing with next letter which | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2163 |  |  |     # will be ignored due to duplicate handling of Soundex code. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2164 |  |  |     # This is faster than 'moving' all subsequent letters. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2165 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2166 |  |  |     # Remove any trailing Ss | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2167 |  |  |     while name[-1:] == 'S': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2168 |  |  |         name = name[:-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2169 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2170 |  |  |     # Phonetic equivalents of first 2 characters | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2171 |  |  |     # Works since duplicate letters are ignored | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2172 |  |  |     if name[:2] == 'KN': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2173 |  |  |         name = 'N' + name[2:]  # KN.. == N.. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2174 |  |  |     elif name[:2] == 'PH': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2175 |  |  |         name = 'F' + name[2:]  # PH.. == F.. (H ignored anyway) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2176 |  |  |     elif name[:2] == 'WR': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2177 |  |  |         name = 'R' + name[2:]  # WR.. == R.. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2178 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2179 |  |  |     if name: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2180 |  |  |         # Special case, ignore H first letter (subsequent Hs ignored anyway) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2181 |  |  |         # Works since duplicate letters are ignored | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2182 |  |  |         if name[0] == 'H': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2183 |  |  |             name = name[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2184 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2185 |  |  |     if name: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2186 |  |  |         # Phonetic equivalents of first character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2187 |  |  |         if name[0] in {'A', 'E', 'I', 'O', 'U', 'Y'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2188 |  |  |             name = 'A' + name[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2189 |  |  |         elif name[0] in {'B', 'P'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2190 |  |  |             name = 'B' + name[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2191 |  |  |         elif name[0] in {'V', 'F'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2192 |  |  |             name = 'F' + name[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2193 |  |  |         elif name[0] in {'C', 'K', 'Q'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2194 |  |  |             name = 'C' + name[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2195 |  |  |         elif name[0] in {'G', 'J'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2196 |  |  |             name = 'G' + name[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2197 |  |  |         elif name[0] in {'S', 'Z'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2198 |  |  |             name = 'S' + name[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2199 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2200 |  |  |         name_code = last = name[0] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2201 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2202 |  |  |     # MODIFIED SOUNDEX CODE | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2203 |  |  |     for i in range(1, len(name)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2204 |  |  |         code = '0' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2205 |  |  |         if name[i] in {'B', 'F', 'P', 'V'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2206 |  |  |             code = '1' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2207 |  |  |         elif name[i] in {'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2208 |  |  |             code = '2' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2209 |  |  |         elif name[i] in {'D', 'T'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2210 |  |  |             if name[i+1:i+2] != 'C': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2211 |  |  |                 code = '3' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2212 |  |  |         elif name[i] == 'L': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2213 |  |  |             if (name[i+1:i+2] in {'A', 'E', 'I', 'O', 'U', 'Y'} or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2214 |  |  |                     i+1 == len(name)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2215 |  |  |                 code = '4' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2216 |  |  |         elif name[i] in {'M', 'N'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2217 |  |  |             if name[i+1:i+2] in {'D', 'G'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2218 |  |  |                 name = name[:i+1] + name[i] + name[i+2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2219 |  |  |             code = '5' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2220 |  |  |         elif name[i] == 'R': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2221 |  |  |             if (name[i+1:i+2] in {'A', 'E', 'I', 'O', 'U', 'Y'} or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2222 |  |  |                     i+1 == len(name)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2223 |  |  |                 code = '6' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2224 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2225 |  |  |         if code != last and code != '0' and i != 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2226 |  |  |             name_code += code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2227 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2228 |  |  |         last = name_code[-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2229 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2230 |  |  |     if zero_pad: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2231 |  |  |         name_code += '0' * maxlength | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2232 |  |  |     if not name_code: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2233 |  |  |         name_code = '0' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2234 |  |  |     return name_code[:maxlength] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2235 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2236 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2237 |  |  | def phonem(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2238 |  |  |     """Return the Phonem code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2239 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2240 |  |  |     Phonem is defined in :cite:`Wilde:1988`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2241 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2242 |  |  |     This version is based on the Perl implementation documented at | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2243 |  |  |     :cite:`Wilz:2005`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2244 |  |  |     It includes some enhancements presented in the Java port at | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2245 |  |  |     :cite:`dcm4che:2011`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2246 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2247 |  |  |     Phonem is intended chiefly for German names/words. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2248 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2249 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2250 |  |  |     :returns: the Phonem value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2251 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2252 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2253 |  |  |     >>> phonem('Christopher') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2254 |  |  |     'CRYSDOVR' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2255 |  |  |     >>> phonem('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2256 |  |  |     'NYAL' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2257 |  |  |     >>> phonem('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2258 |  |  |     'SMYD' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2259 |  |  |     >>> phonem('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2260 |  |  |     'CMYD' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2261 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2262 |  |  |     _phonem_substitutions = (('SC', 'C'), ('SZ', 'C'), ('CZ', 'C'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2263 |  |  |                              ('TZ', 'C'), ('TS', 'C'), ('KS', 'X'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2264 |  |  |                              ('PF', 'V'), ('QU', 'KW'), ('PH', 'V'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2265 |  |  |                              ('UE', 'Y'), ('AE', 'E'), ('OE', 'Ö'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2266 |  |  |                              ('EI', 'AY'), ('EY', 'AY'), ('EU', 'OY'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2267 |  |  |                              ('AU', 'A§'), ('OU', '§')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2268 |  |  |     _phonem_translation = dict(zip((ord(_) for _ in | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2269 |  |  |                                     'ZKGQÇÑßFWPTÁÀÂÃÅÄÆÉÈÊËIJÌÍÎÏÜݧÚÙÛÔÒÓÕØ'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2270 |  |  |                                    'CCCCCNSVVBDAAAAAEEEEEEYYYYYYYYUUUUOOOOÖ')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2271 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2272 |  |  |     word = normalize('NFC', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2273 |  |  |     for i, j in _phonem_substitutions: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2274 |  |  |         word = word.replace(i, j) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2275 |  |  |     word = word.translate(_phonem_translation) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2276 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2277 |  |  |     return ''.join(c for c in _delete_consecutive_repeats(word) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2278 |  |  |                    if c in {'A', 'B', 'C', 'D', 'L', 'M', 'N', 'O', 'R', 'S', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2279 |  |  |                             'U', 'V', 'W', 'X', 'Y', 'Ö'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2280 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2281 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2282 |  |  | def phonix(word, maxlength=4, zero_pad=True): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2283 |  |  |     """Return the Phonix code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2284 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2285 |  |  |     Phonix is a Soundex-like algorithm defined in :cite:`Gadd:1990`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2286 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2287 |  |  |     This implementation is based on: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2288 |  |  |     - :cite:`Pfeifer:2000` | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2289 |  |  |     - :cite:`Christen:2011` | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2290 |  |  |     - :cite:`Kollar:2007` | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2291 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2292 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2293 |  |  |     :param int maxlength: the length of the code returned (defaults to 4) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2294 |  |  |     :param bool zero_pad: pad the end of the return value with 0s to achieve | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2295 |  |  |         a maxlength string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2296 |  |  |     :returns: the Phonix value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2297 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2298 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2299 |  |  |     >>> phonix('Christopher') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2300 |  |  |     'K683' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2301 |  |  |     >>> phonix('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2302 |  |  |     'N400' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2303 |  |  |     >>> phonix('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2304 |  |  |     'S530' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2305 |  |  |     >>> phonix('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2306 |  |  |     'S530' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2307 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2308 |  |  |     # pylint: disable=too-many-branches | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2309 |  |  |     def _start_repl(word, src, tar, post=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2310 |  |  |         r"""Replace src with tar at the start of word.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2311 |  |  |         if post: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2312 |  |  |             for i in post: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2313 |  |  |                 if word.startswith(src+i): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2314 |  |  |                     return tar + word[len(src):] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2315 |  |  |         elif word.startswith(src): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2316 |  |  |             return tar + word[len(src):] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2317 |  |  |         return word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2318 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2319 |  |  |     def _end_repl(word, src, tar, pre=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2320 |  |  |         r"""Replace src with tar at the end of word.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2321 |  |  |         if pre: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2322 |  |  |             for i in pre: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2323 |  |  |                 if word.endswith(i+src): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2324 |  |  |                     return word[:-len(src)] + tar | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2325 |  |  |         elif word.endswith(src): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2326 |  |  |             return word[:-len(src)] + tar | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2327 |  |  |         return word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2328 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2329 |  |  |     def _mid_repl(word, src, tar, pre=None, post=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2330 |  |  |         r"""Replace src with tar in the middle of word.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2331 |  |  |         if pre or post: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2332 |  |  |             if not pre: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2333 |  |  |                 return word[0] + _all_repl(word[1:], src, tar, pre, post) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2334 |  |  |             elif not post: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2335 |  |  |                 return _all_repl(word[:-1], src, tar, pre, post) + word[-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2336 |  |  |             return _all_repl(word, src, tar, pre, post) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2337 |  |  |         return (word[0] + _all_repl(word[1:-1], src, tar, pre, post) + | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2338 |  |  |                 word[-1]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2339 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2340 |  |  |     def _all_repl(word, src, tar, pre=None, post=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2341 |  |  |         r"""Replace src with tar anywhere in word.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2342 |  |  |         if pre or post: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2343 |  |  |             if post: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2344 |  |  |                 post = post | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2345 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2346 |  |  |                 post = frozenset(('',)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2347 |  |  |             if pre: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2348 |  |  |                 pre = pre | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2349 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2350 |  |  |                 pre = frozenset(('',)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2351 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2352 |  |  |             for i, j in ((i, j) for i in pre for j in post): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2353 |  |  |                 word = word.replace(i+src+j, i+tar+j) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2354 |  |  |             return word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2355 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2356 |  |  |             return word.replace(src, tar) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2357 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2358 |  |  |     _vow = {'A', 'E', 'I', 'O', 'U'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2359 |  |  |     _con = {'B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2360 |  |  |             'R', 'S', 'T', 'V', 'W', 'X', 'Y', 'Z'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2361 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2362 |  |  |     _phonix_substitutions = ((_all_repl, 'DG', 'G'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2363 |  |  |                              (_all_repl, 'CO', 'KO'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2364 |  |  |                              (_all_repl, 'CA', 'KA'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2365 |  |  |                              (_all_repl, 'CU', 'KU'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2366 |  |  |                              (_all_repl, 'CY', 'SI'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2367 |  |  |                              (_all_repl, 'CI', 'SI'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2368 |  |  |                              (_all_repl, 'CE', 'SE'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2369 |  |  |                              (_start_repl, 'CL', 'KL', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2370 |  |  |                              (_all_repl, 'CK', 'K'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2371 |  |  |                              (_end_repl, 'GC', 'K'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2372 |  |  |                              (_end_repl, 'JC', 'K'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2373 |  |  |                              (_start_repl, 'CHR', 'KR', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2374 |  |  |                              (_start_repl, 'CR', 'KR', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2375 |  |  |                              (_start_repl, 'WR', 'R'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2376 |  |  |                              (_all_repl, 'NC', 'NK'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2377 |  |  |                              (_all_repl, 'CT', 'KT'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2378 |  |  |                              (_all_repl, 'PH', 'F'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2379 |  |  |                              (_all_repl, 'AA', 'AR'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2380 |  |  |                              (_all_repl, 'SCH', 'SH'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2381 |  |  |                              (_all_repl, 'BTL', 'TL'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2382 |  |  |                              (_all_repl, 'GHT', 'T'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2383 |  |  |                              (_all_repl, 'AUGH', 'ARF'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2384 |  |  |                              (_mid_repl, 'LJ', 'LD', _vow, _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2385 |  |  |                              (_all_repl, 'LOUGH', 'LOW'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2386 |  |  |                              (_start_repl, 'Q', 'KW'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2387 |  |  |                              (_start_repl, 'KN', 'N'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2388 |  |  |                              (_end_repl, 'GN', 'N'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2389 |  |  |                              (_all_repl, 'GHN', 'N'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2390 |  |  |                              (_end_repl, 'GNE', 'N'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2391 |  |  |                              (_all_repl, 'GHNE', 'NE'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2392 |  |  |                              (_end_repl, 'GNES', 'NS'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2393 |  |  |                              (_start_repl, 'GN', 'N'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2394 |  |  |                              (_mid_repl, 'GN', 'N', None, _con), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2395 |  |  |                              (_end_repl, 'GN', 'N'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2396 |  |  |                              (_start_repl, 'PS', 'S'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2397 |  |  |                              (_start_repl, 'PT', 'T'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2398 |  |  |                              (_start_repl, 'CZ', 'C'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2399 |  |  |                              (_mid_repl, 'WZ', 'Z', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2400 |  |  |                              (_mid_repl, 'CZ', 'CH'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2401 |  |  |                              (_all_repl, 'LZ', 'LSH'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2402 |  |  |                              (_all_repl, 'RZ', 'RSH'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2403 |  |  |                              (_mid_repl, 'Z', 'S', None, _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2404 |  |  |                              (_all_repl, 'ZZ', 'TS'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2405 |  |  |                              (_mid_repl, 'Z', 'TS', _con), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2406 |  |  |                              (_all_repl, 'HROUG', 'REW'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2407 |  |  |                              (_all_repl, 'OUGH', 'OF'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2408 |  |  |                              (_mid_repl, 'Q', 'KW', _vow, _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2409 |  |  |                              (_mid_repl, 'J', 'Y', _vow, _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2410 |  |  |                              (_start_repl, 'YJ', 'Y', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2411 |  |  |                              (_start_repl, 'GH', 'G'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2412 |  |  |                              (_end_repl, 'GH', 'E', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2413 |  |  |                              (_start_repl, 'CY', 'S'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2414 |  |  |                              (_all_repl, 'NX', 'NKS'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2415 |  |  |                              (_start_repl, 'PF', 'F'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2416 |  |  |                              (_end_repl, 'DT', 'T'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2417 |  |  |                              (_end_repl, 'TL', 'TIL'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2418 |  |  |                              (_end_repl, 'DL', 'DIL'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2419 |  |  |                              (_all_repl, 'YTH', 'ITH'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2420 |  |  |                              (_start_repl, 'TJ', 'CH', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2421 |  |  |                              (_start_repl, 'TSJ', 'CH', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2422 |  |  |                              (_start_repl, 'TS', 'T', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2423 |  |  |                              (_all_repl, 'TCH', 'CH'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2424 |  |  |                              (_mid_repl, 'WSK', 'VSKIE', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2425 |  |  |                              (_end_repl, 'WSK', 'VSKIE', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2426 |  |  |                              (_start_repl, 'MN', 'N', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2427 |  |  |                              (_start_repl, 'PN', 'N', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2428 |  |  |                              (_mid_repl, 'STL', 'SL', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2429 |  |  |                              (_end_repl, 'STL', 'SL', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2430 |  |  |                              (_end_repl, 'TNT', 'ENT'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2431 |  |  |                              (_end_repl, 'EAUX', 'OH'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2432 |  |  |                              (_all_repl, 'EXCI', 'ECS'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2433 |  |  |                              (_all_repl, 'X', 'ECS'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2434 |  |  |                              (_end_repl, 'NED', 'ND'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2435 |  |  |                              (_all_repl, 'JR', 'DR'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2436 |  |  |                              (_end_repl, 'EE', 'EA'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2437 |  |  |                              (_all_repl, 'ZS', 'S'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2438 |  |  |                              (_mid_repl, 'R', 'AH', _vow, _con), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2439 |  |  |                              (_end_repl, 'R', 'AH', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2440 |  |  |                              (_mid_repl, 'HR', 'AH', _vow, _con), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2441 |  |  |                              (_end_repl, 'HR', 'AH', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2442 |  |  |                              (_end_repl, 'HR', 'AH', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2443 |  |  |                              (_end_repl, 'RE', 'AR'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2444 |  |  |                              (_end_repl, 'R', 'AH', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2445 |  |  |                              (_all_repl, 'LLE', 'LE'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2446 |  |  |                              (_end_repl, 'LE', 'ILE', _con), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2447 |  |  |                              (_end_repl, 'LES', 'ILES', _con), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2448 |  |  |                              (_end_repl, 'E', ''), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2449 |  |  |                              (_end_repl, 'ES', 'S'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2450 |  |  |                              (_end_repl, 'SS', 'AS', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2451 |  |  |                              (_end_repl, 'MB', 'M', _vow), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2452 |  |  |                              (_all_repl, 'MPTS', 'MPS'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2453 |  |  |                              (_all_repl, 'MPS', 'MS'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2454 |  |  |                              (_all_repl, 'MPT', 'MT')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2455 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2456 |  |  |     _phonix_translation = dict(zip((ord(_) for _ in | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2457 |  |  |                                     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2458 |  |  |                                    '01230720022455012683070808')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2459 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2460 |  |  |     sdx = '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2461 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2462 |  |  |     word = normalize('NFKD', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2463 |  |  |     word = word.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2464 |  |  |     word = ''.join(c for c in word if c in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2465 |  |  |                    {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2466 |  |  |                     'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2467 |  |  |                     'Y', 'Z'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2468 |  |  |     if word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2469 |  |  |         for trans in _phonix_substitutions: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2470 |  |  |             word = trans[0](word, *trans[1:]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2471 |  |  |         if word[0] in {'A', 'E', 'I', 'O', 'U', 'Y'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2472 |  |  |             sdx = 'v' + word[1:].translate(_phonix_translation) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2473 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2474 |  |  |             sdx = word[0] + word[1:].translate(_phonix_translation) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2475 |  |  |         sdx = _delete_consecutive_repeats(sdx) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2476 |  |  |         sdx = sdx.replace('0', '') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2477 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2478 |  |  |     # Clamp maxlength to [4, 64] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2479 |  |  |     if maxlength is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2480 |  |  |         maxlength = min(max(4, maxlength), 64) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2481 |  |  |     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2482 |  |  |         maxlength = 64 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2483 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2484 |  |  |     if zero_pad: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2485 |  |  |         sdx += '0' * maxlength | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2486 |  |  |     if not sdx: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2487 |  |  |         sdx = '0' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2488 |  |  |     return sdx[:maxlength] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2489 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2490 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2491 |  |  | def sfinxbis(word, maxlength=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2492 |  |  |     """Return the SfinxBis code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2493 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2494 |  |  |     SfinxBis is a Soundex-like algorithm defined in :cite:`Axelsson:2009`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2495 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2496 |  |  |     This implementation follows the reference implementation: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2497 |  |  |     :cite:`Sjoo:2009`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2498 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2499 |  |  |     SfinxBis is intended chiefly for Swedish names. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2500 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2501 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2502 |  |  |     :param int maxlength: the length of the code returned (defaults to | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2503 |  |  |         unlimited) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2504 |  |  |     :returns: the SfinxBis value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2505 |  |  |     :rtype: tuple | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2506 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2507 |  |  |     >>> sfinxbis('Christopher') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2508 |  |  |     ('K68376',) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2509 |  |  |     >>> sfinxbis('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2510 |  |  |     ('N4',) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2511 |  |  |     >>> sfinxbis('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2512 |  |  |     ('S53',) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2513 |  |  |     >>> sfinxbis('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2514 |  |  |     ('S53',) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2515 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2516 |  |  |     >>> sfinxbis('Johansson') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2517 |  |  |     ('J585',) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2518 |  |  |     >>> sfinxbis('Sjöberg') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2519 |  |  |     ('#162',) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2520 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2521 |  |  |     adelstitler = (' DE LA ', ' DE LAS ', ' DE LOS ', ' VAN DE ', ' VAN DEN ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2522 |  |  |                    ' VAN DER ', ' VON DEM ', ' VON DER ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2523 |  |  |                    ' AF ', ' AV ', ' DA ', ' DE ', ' DEL ', ' DEN ', ' DES ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2524 |  |  |                    ' DI ', ' DO ', ' DON ', ' DOS ', ' DU ', ' E ', ' IN ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2525 |  |  |                    ' LA ', ' LE ', ' MAC ', ' MC ', ' VAN ', ' VON ', ' Y ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2526 |  |  |                    ' S:T ') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2527 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2528 |  |  |     _harde_vokaler = {'A', 'O', 'U', 'Å'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2529 |  |  |     _mjuka_vokaler = {'E', 'I', 'Y', 'Ä', 'Ö'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2530 |  |  |     _konsonanter = {'B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2531 |  |  |                     'Q', 'R', 'S', 'T', 'V', 'W', 'X', 'Z'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2532 |  |  |     _alfabet = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2533 |  |  |                 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2534 |  |  |                 'Y', 'Z', 'Ä', 'Å', 'Ö'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2535 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2536 |  |  |     _sfinxbis_translation = dict(zip((ord(_) for _ in | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2537 |  |  |                                       'BCDFGHJKLMNPQRSTVZAOUÅEIYÄÖ'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2538 |  |  |                                      '123729224551268378999999999')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2539 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2540 |  |  |     _sfinxbis_substitutions = dict(zip((ord(_) for _ in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2541 |  |  |                                         'WZÀÁÂÃÆÇÈÉÊËÌÍÎÏÑÒÓÔÕØÙÚÛÜÝ'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2542 |  |  |                                        'VSAAAAÄCEEEEIIIINOOOOÖUUUYY')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2543 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2544 |  |  |     def _foersvensker(ordet): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2545 |  |  |         """Return the Swedish-ized form of the word.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2546 |  |  |         ordet = ordet.replace('STIERN', 'STJÄRN') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2547 |  |  |         ordet = ordet.replace('HIE', 'HJ') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2548 |  |  |         ordet = ordet.replace('SIÖ', 'SJÖ') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2549 |  |  |         ordet = ordet.replace('SCH', 'SH') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2550 |  |  |         ordet = ordet.replace('QU', 'KV') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2551 |  |  |         ordet = ordet.replace('IO', 'JO') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2552 |  |  |         ordet = ordet.replace('PH', 'F') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2553 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2554 |  |  |         for i in _harde_vokaler: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2555 |  |  |             ordet = ordet.replace(i+'Ü', i+'J') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2556 |  |  |             ordet = ordet.replace(i+'Y', i+'J') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2557 |  |  |             ordet = ordet.replace(i+'I', i+'J') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2558 |  |  |         for i in _mjuka_vokaler: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2559 |  |  |             ordet = ordet.replace(i+'Ü', i+'J') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2560 |  |  |             ordet = ordet.replace(i+'Y', i+'J') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2561 |  |  |             ordet = ordet.replace(i+'I', i+'J') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2562 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2563 |  |  |         if 'H' in ordet: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2564 |  |  |             for i in _konsonanter: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2565 |  |  |                 ordet = ordet.replace('H'+i, i) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2566 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2567 |  |  |         ordet = ordet.translate(_sfinxbis_substitutions) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2568 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2569 |  |  |         ordet = ordet.replace('Ð', 'ETH') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2570 |  |  |         ordet = ordet.replace('Þ', 'TH') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2571 |  |  |         ordet = ordet.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2572 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2573 |  |  |         return ordet | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2574 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2575 |  |  |     def _koda_foersta_ljudet(ordet): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2576 |  |  |         """Return the word with the first sound coded.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2577 |  |  |         if ordet[0:1] in _mjuka_vokaler or ordet[0:1] in _harde_vokaler: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2578 |  |  |             ordet = '$' + ordet[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2579 |  |  |         elif ordet[0:2] in ('DJ', 'GJ', 'HJ', 'LJ'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2580 |  |  |             ordet = 'J' + ordet[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2581 |  |  |         elif ordet[0:1] == 'G' and ordet[1:2] in _mjuka_vokaler: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2582 |  |  |             ordet = 'J' + ordet[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2583 |  |  |         elif ordet[0:1] == 'Q': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2584 |  |  |             ordet = 'K' + ordet[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2585 |  |  |         elif (ordet[0:2] == 'CH' and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2586 |  |  |               ordet[2:3] in frozenset(_mjuka_vokaler | _harde_vokaler)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2587 |  |  |             ordet = '#' + ordet[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2588 |  |  |         elif ordet[0:1] == 'C' and ordet[1:2] in _harde_vokaler: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2589 |  |  |             ordet = 'K' + ordet[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2590 |  |  |         elif ordet[0:1] == 'C' and ordet[1:2] in _konsonanter: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2591 |  |  |             ordet = 'K' + ordet[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2592 |  |  |         elif ordet[0:1] == 'X': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2593 |  |  |             ordet = 'S' + ordet[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2594 |  |  |         elif ordet[0:1] == 'C' and ordet[1:2] in _mjuka_vokaler: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2595 |  |  |             ordet = 'S' + ordet[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2596 |  |  |         elif ordet[0:3] in ('SKJ', 'STJ', 'SCH'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2597 |  |  |             ordet = '#' + ordet[3:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2598 |  |  |         elif ordet[0:2] in ('SH', 'KJ', 'TJ', 'SJ'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2599 |  |  |             ordet = '#' + ordet[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2600 |  |  |         elif ordet[0:2] == 'SK' and ordet[2:3] in _mjuka_vokaler: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2601 |  |  |             ordet = '#' + ordet[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2602 |  |  |         elif ordet[0:1] == 'K' and ordet[1:2] in _mjuka_vokaler: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2603 |  |  |             ordet = '#' + ordet[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2604 |  |  |         return ordet | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2605 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2606 |  |  |     # Steg 1, Versaler | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2607 |  |  |     word = normalize('NFC', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2608 |  |  |     word = word.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2609 |  |  |     word = word.replace('-', ' ') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2610 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2611 |  |  |     # Steg 2, Ta bort adelsprefix | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2612 |  |  |     for adelstitel in adelstitler: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2613 |  |  |         while adelstitel in word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2614 |  |  |             word = word.replace(adelstitel, ' ') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2615 |  |  |         if word.startswith(adelstitel[1:]): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2616 |  |  |             word = word[len(adelstitel)-1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2617 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2618 |  |  |     # Split word into tokens | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2619 |  |  |     ordlista = word.split() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2620 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2621 |  |  |     # Steg 3, Ta bort dubbelteckning i början på namnet | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2622 |  |  |     ordlista = [_delete_consecutive_repeats(ordet) for ordet in ordlista] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2623 |  |  |     if not ordlista: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2624 |  |  |         return ('',) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2625 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2626 |  |  |     # Steg 4, Försvenskning | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2627 |  |  |     ordlista = [_foersvensker(ordet) for ordet in ordlista] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2628 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2629 |  |  |     # Steg 5, Ta bort alla tecken som inte är A-Ö (65-90,196,197,214) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2630 |  |  |     ordlista = [''.join(c for c in ordet if c in _alfabet) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2631 |  |  |                 for ordet in ordlista] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2632 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2633 |  |  |     # Steg 6, Koda första ljudet | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2634 |  |  |     ordlista = [_koda_foersta_ljudet(ordet) for ordet in ordlista] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2635 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2636 |  |  |     # Steg 7, Dela upp namnet i två delar | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2637 |  |  |     rest = [ordet[1:] for ordet in ordlista] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2638 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2639 |  |  |     # Steg 8, Utför fonetisk transformation i resten | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2640 |  |  |     rest = [ordet.replace('DT', 'T') for ordet in rest] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2641 |  |  |     rest = [ordet.replace('X', 'KS') for ordet in rest] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2642 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2643 |  |  |     # Steg 9, Koda resten till en sifferkod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2644 |  |  |     for vokal in _mjuka_vokaler: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2645 |  |  |         rest = [ordet.replace('C'+vokal, '8'+vokal) for ordet in rest] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2646 |  |  |     rest = [ordet.translate(_sfinxbis_translation) for ordet in rest] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2647 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2648 |  |  |     # Steg 10, Ta bort intilliggande dubbletter | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2649 |  |  |     rest = [_delete_consecutive_repeats(ordet) for ordet in rest] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2650 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2651 |  |  |     # Steg 11, Ta bort alla "9" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2652 |  |  |     rest = [ordet.replace('9', '') for ordet in rest] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2653 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2654 |  |  |     # Steg 12, Sätt ihop delarna igen | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2655 |  |  |     ordlista = [''.join(ordet) for ordet in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2656 |  |  |                 zip((_[0:1] for _ in ordlista), rest)] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2657 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2658 |  |  |     # truncate, if maxlength is set | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2659 |  |  |     if maxlength and maxlength < _INFINITY: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2660 |  |  |         ordlista = [ordet[:maxlength] for ordet in ordlista] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2661 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2662 |  |  |     return tuple(ordlista) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2663 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2664 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2665 |  |  | def phonet(word, mode=1, lang='de', trace=False): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2666 |  |  |     """Return the phonet code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2667 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2668 |  |  |     phonet ("Hannoveraner Phonetik") was developed by Jörg Michael and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2669 |  |  |     documented in :cite:`Michael:1999`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2670 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2671 |  |  |     This is a port of Jesper Zedlitz's code, which is licensed LGPL | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2672 |  |  |     :cite:`Zedlitz:2015`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2673 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2674 |  |  |     That is, in turn, based on Michael's C code, which is also licensed LGPL | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2675 |  |  |     :cite:`Michael:2007`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2676 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2677 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2678 |  |  |     :param int mode: the ponet variant to employ (1 or 2) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2679 |  |  |     :param str lang: 'de' (default) for German | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2680 |  |  |             'none' for no language | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2681 |  |  |     :param bool trace: prints debugging info if True | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2682 |  |  |     :returns: the phonet value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2683 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2684 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2685 |  |  |     >>> phonet('Christopher') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2686 |  |  |     'KRISTOFA' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2687 |  |  |     >>> phonet('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2688 |  |  |     'NIAL' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2689 |  |  |     >>> phonet('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2690 |  |  |     'SMIT' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2691 |  |  |     >>> phonet('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2692 |  |  |     'SHMIT' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2693 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2694 |  |  |     >>> phonet('Christopher', mode=2) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2695 |  |  |     'KRIZTUFA' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2696 |  |  |     >>> phonet('Niall', mode=2) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2697 |  |  |     'NIAL' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2698 |  |  |     >>> phonet('Smith', mode=2) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2699 |  |  |     'ZNIT' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2700 |  |  |     >>> phonet('Schmidt', mode=2) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2701 |  |  |     'ZNIT' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2702 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2703 |  |  |     >>> phonet('Christopher', lang='none') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2704 |  |  |     'CHRISTOPHER' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2705 |  |  |     >>> phonet('Niall', lang='none') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2706 |  |  |     'NIAL' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2707 |  |  |     >>> phonet('Smith', lang='none') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2708 |  |  |     'SMITH' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2709 |  |  |     >>> phonet('Schmidt', lang='none') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2710 |  |  |     'SCHMIDT' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2711 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2712 |  |  |     # pylint: disable=too-many-branches | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2713 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2714 |  |  |     _phonet_rules_no_lang = (  # separator chars | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2715 |  |  |         '´', ' ', ' ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2716 |  |  |         '"', ' ', ' ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2717 |  |  |         '`$', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2718 |  |  |         '\'', ' ', ' ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2719 |  |  |         ',', ',', ',', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2720 |  |  |         ';', ',', ',', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2721 |  |  |         '-', ' ', ' ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2722 |  |  |         ' ', ' ', ' ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2723 |  |  |         '.', '.', '.', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2724 |  |  |         ':', '.', '.', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2725 |  |  |         # German umlauts | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2726 |  |  |         'Ä', 'AE', 'AE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2727 |  |  |         'Ö', 'OE', 'OE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2728 |  |  |         'Ü', 'UE', 'UE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2729 |  |  |         'ß', 'S', 'S', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2730 |  |  |         # international umlauts | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2731 |  |  |         'À', 'A', 'A', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2732 |  |  |         'Á', 'A', 'A', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2733 |  |  |         'Â', 'A', 'A', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2734 |  |  |         'Ã', 'A', 'A', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2735 |  |  |         'Å', 'A', 'A', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2736 |  |  |         'Æ', 'AE', 'AE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2737 |  |  |         'Ç', 'C', 'C', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2738 |  |  |         'Ð', 'DJ', 'DJ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2739 |  |  |         'È', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2740 |  |  |         'É', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2741 |  |  |         'Ê', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2742 |  |  |         'Ë', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2743 |  |  |         'Ì', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2744 |  |  |         'Í', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2745 |  |  |         'Î', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2746 |  |  |         'Ï', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2747 |  |  |         'Ñ', 'NH', 'NH', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2748 |  |  |         'Ò', 'O', 'O', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2749 |  |  |         'Ó', 'O', 'O', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2750 |  |  |         'Ô', 'O', 'O', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2751 |  |  |         'Õ', 'O', 'O', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2752 |  |  |         'Œ', 'OE', 'OE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2753 |  |  |         'Ø', 'OE', 'OE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2754 |  |  |         'Š', 'SH', 'SH', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2755 |  |  |         'Þ', 'TH', 'TH', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2756 |  |  |         'Ù', 'U', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2757 |  |  |         'Ú', 'U', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2758 |  |  |         'Û', 'U', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2759 |  |  |         'Ý', 'Y', 'Y', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2760 |  |  |         'Ÿ', 'Y', 'Y', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2761 |  |  |         # 'normal' letters (A-Z) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2762 |  |  |         'MC^', 'MAC', 'MAC', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2763 |  |  |         'MC^', 'MAC', 'MAC', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2764 |  |  |         'M´^', 'MAC', 'MAC', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2765 |  |  |         'M\'^', 'MAC', 'MAC', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2766 |  |  |         'O´^', 'O', 'O', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2767 |  |  |         'O\'^', 'O', 'O', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2768 |  |  |         'VAN DEN ^', 'VANDEN', 'VANDEN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2769 |  |  |         None, None, None) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2770 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2771 |  |  |     _phonet_rules_german = (  # separator chars | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2772 |  |  |         '´', ' ', ' ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2773 |  |  |         '"', ' ', ' ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2774 |  |  |         '`$', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2775 |  |  |         '\'', ' ', ' ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2776 |  |  |         ',', ' ', ' ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2777 |  |  |         ';', ' ', ' ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2778 |  |  |         '-', ' ', ' ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2779 |  |  |         ' ', ' ', ' ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2780 |  |  |         '.', '.', '.', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2781 |  |  |         ':', '.', '.', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2782 |  |  |         # German umlauts | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2783 |  |  |         'ÄE', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2784 |  |  |         'ÄU<', 'EU', 'EU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2785 |  |  |         'ÄV(AEOU)-<', 'EW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2786 |  |  |         'Ä$', 'Ä', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2787 |  |  |         'Ä<', None, 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2788 |  |  |         'Ä', 'E', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2789 |  |  |         'ÖE', 'Ö', 'Ö', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2790 |  |  |         'ÖU', 'Ö', 'Ö', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2791 |  |  |         'ÖVER--<', 'ÖW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2792 |  |  |         'ÖV(AOU)-', 'ÖW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2793 |  |  |         'ÜBEL(GNRW)-^^', 'ÜBL ', 'IBL ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2794 |  |  |         'ÜBER^^', 'ÜBA', 'IBA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2795 |  |  |         'ÜE', 'Ü', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2796 |  |  |         'ÜVER--<', 'ÜW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2797 |  |  |         'ÜV(AOU)-', 'ÜW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2798 |  |  |         'Ü', None, 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2799 |  |  |         'ßCH<', None, 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2800 |  |  |         'ß<', 'S', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2801 |  |  |         # international umlauts | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2802 |  |  |         'À<', 'A', 'A', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2803 |  |  |         'Á<', 'A', 'A', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2804 |  |  |         'Â<', 'A', 'A', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2805 |  |  |         'Ã<', 'A', 'A', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2806 |  |  |         'Å<', 'A', 'A', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2807 |  |  |         'ÆER-', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2808 |  |  |         'ÆU<', 'EU', 'EU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2809 |  |  |         'ÆV(AEOU)-<', 'EW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2810 |  |  |         'Æ$', 'Ä', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2811 |  |  |         'Æ<', None, 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2812 |  |  |         'Æ', 'E', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2813 |  |  |         'Ç', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2814 |  |  |         'ÐÐ-', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2815 |  |  |         'Ð', 'DI', 'TI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2816 |  |  |         'È<', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2817 |  |  |         'É<', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2818 |  |  |         'Ê<', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2819 |  |  |         'Ë', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2820 |  |  |         'Ì<', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2821 |  |  |         'Í<', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2822 |  |  |         'Î<', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2823 |  |  |         'Ï', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2824 |  |  |         'ÑÑ-', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2825 |  |  |         'Ñ', 'NI', 'NI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2826 |  |  |         'Ò<', 'O', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2827 |  |  |         'Ó<', 'O', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2828 |  |  |         'Ô<', 'O', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2829 |  |  |         'Õ<', 'O', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2830 |  |  |         'Œ<', 'Ö', 'Ö', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2831 |  |  |         'Ø(IJY)-<', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2832 |  |  |         'Ø<', 'Ö', 'Ö', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2833 |  |  |         'Š', 'SH', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2834 |  |  |         'Þ', 'T', 'T', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2835 |  |  |         'Ù<', 'U', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2836 |  |  |         'Ú<', 'U', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2837 |  |  |         'Û<', 'U', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2838 |  |  |         'Ý<', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2839 |  |  |         'Ÿ<', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2840 |  |  |         # 'normal' letters (A-Z) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2841 |  |  |         'ABELLE$', 'ABL', 'ABL', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2842 |  |  |         'ABELL$', 'ABL', 'ABL', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2843 |  |  |         'ABIENNE$', 'ABIN', 'ABIN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2844 |  |  |         'ACHME---^', 'ACH', 'AK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2845 |  |  |         'ACEY$', 'AZI', 'AZI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2846 |  |  |         'ADV', 'ATW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2847 |  |  |         'AEGL-', 'EK', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2848 |  |  |         'AEU<', 'EU', 'EU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2849 |  |  |         'AE2', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2850 |  |  |         'AFTRAUBEN------', 'AFT ', 'AFT ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2851 |  |  |         'AGL-1', 'AK', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2852 |  |  |         'AGNI-^', 'AKN', 'AKN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2853 |  |  |         'AGNIE-', 'ANI', 'ANI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2854 |  |  |         'AGN(AEOU)-$', 'ANI', 'ANI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2855 |  |  |         'AH(AIOÖUÜY)-', 'AH', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2856 |  |  |         'AIA2', 'AIA', 'AIA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2857 |  |  |         'AIE$', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2858 |  |  |         'AILL(EOU)-', 'ALI', 'ALI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2859 |  |  |         'AINE$', 'EN', 'EN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2860 |  |  |         'AIRE$', 'ER', 'ER', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2861 |  |  |         'AIR-', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2862 |  |  |         'AISE$', 'ES', 'EZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2863 |  |  |         'AISSANCE$', 'ESANS', 'EZANZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2864 |  |  |         'AISSE$', 'ES', 'EZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2865 |  |  |         'AIX$', 'EX', 'EX', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2866 |  |  |         'AJ(AÄEÈÉÊIOÖUÜ)--', 'A', 'A', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2867 |  |  |         'AKTIE', 'AXIE', 'AXIE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2868 |  |  |         'AKTUEL', 'AKTUEL', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2869 |  |  |         'ALOI^', 'ALOI', 'ALUI',  # Don't merge these rules | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2870 |  |  |         'ALOY^', 'ALOI', 'ALUI',  # needed by 'check_rules' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2871 |  |  |         'AMATEU(RS)-', 'AMATÖ', 'ANATÖ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2872 |  |  |         'ANCH(OEI)-', 'ANSH', 'ANZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2873 |  |  |         'ANDERGEGANG----', 'ANDA GE', 'ANTA KE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2874 |  |  |         'ANDERGEHE----', 'ANDA ', 'ANTA ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2875 |  |  |         'ANDERGESETZ----', 'ANDA GE', 'ANTA KE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2876 |  |  |         'ANDERGING----', 'ANDA ', 'ANTA ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2877 |  |  |         'ANDERSETZ(ET)-----', 'ANDA ', 'ANTA ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2878 |  |  |         'ANDERZUGEHE----', 'ANDA ZU ', 'ANTA ZU ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2879 |  |  |         'ANDERZUSETZE-----', 'ANDA ZU ', 'ANTA ZU ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2880 |  |  |         'ANER(BKO)---^^', 'AN', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2881 |  |  |         'ANHAND---^$', 'AN H', 'AN ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2882 |  |  |         'ANH(AÄEIOÖUÜY)--^^', 'AN', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2883 |  |  |         'ANIELLE$', 'ANIEL', 'ANIL', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2884 |  |  |         'ANIEL', 'ANIEL', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2885 |  |  |         'ANSTELLE----^$', 'AN ST', 'AN ZT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2886 |  |  |         'ANTI^^', 'ANTI', 'ANTI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2887 |  |  |         'ANVER^^', 'ANFA', 'ANFA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2888 |  |  |         'ATIA$', 'ATIA', 'ATIA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2889 |  |  |         'ATIA(NS)--', 'ATI', 'ATI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2890 |  |  |         'ATI(AÄOÖUÜ)-', 'AZI', 'AZI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2891 |  |  |         'AUAU--', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2892 |  |  |         'AUERE$', 'AUERE', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2893 |  |  |         'AUERE(NS)-$', 'AUERE', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2894 |  |  |         'AUERE(AIOUY)--', 'AUER', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2895 |  |  |         'AUER(AÄIOÖUÜY)-', 'AUER', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2896 |  |  |         'AUER<', 'AUA', 'AUA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2897 |  |  |         'AUF^^', 'AUF', 'AUF', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2898 |  |  |         'AULT$', 'O', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2899 |  |  |         'AUR(BCDFGKLMNQSTVWZ)-', 'AUA', 'AUA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2900 |  |  |         'AUR$', 'AUA', 'AUA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2901 |  |  |         'AUSSE$', 'OS', 'UZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2902 |  |  |         'AUS(ST)-^', 'AUS', 'AUS', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2903 |  |  |         'AUS^^', 'AUS', 'AUS', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2904 |  |  |         'AUTOFAHR----', 'AUTO ', 'AUTU ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2905 |  |  |         'AUTO^^', 'AUTO', 'AUTU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2906 |  |  |         'AUX(IY)-', 'AUX', 'AUX', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2907 |  |  |         'AUX', 'O', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2908 |  |  |         'AU', 'AU', 'AU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2909 |  |  |         'AVER--<', 'AW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2910 |  |  |         'AVIER$', 'AWIE', 'AFIE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2911 |  |  |         'AV(EÈÉÊI)-^', 'AW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2912 |  |  |         'AV(AOU)-', 'AW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2913 |  |  |         'AYRE$', 'EIRE', 'EIRE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2914 |  |  |         'AYRE(NS)-$', 'EIRE', 'EIRE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2915 |  |  |         'AYRE(AIOUY)--', 'EIR', 'EIR', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2916 |  |  |         'AYR(AÄIOÖUÜY)-', 'EIR', 'EIR', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2917 |  |  |         'AYR<', 'EIA', 'EIA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2918 |  |  |         'AYER--<', 'EI', 'EI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2919 |  |  |         'AY(AÄEIOÖUÜY)--', 'A', 'A', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2920 |  |  |         'AË', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2921 |  |  |         'A(IJY)<', 'EI', 'EI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2922 |  |  |         'BABY^$', 'BEBI', 'BEBI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2923 |  |  |         'BAB(IY)^', 'BEBI', 'BEBI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2924 |  |  |         'BEAU^$', 'BO', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2925 |  |  |         'BEA(BCMNRU)-^', 'BEA', 'BEA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2926 |  |  |         'BEAT(AEIMORU)-^', 'BEAT', 'BEAT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2927 |  |  |         'BEE$', 'BI', 'BI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2928 |  |  |         'BEIGE^$', 'BESH', 'BEZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2929 |  |  |         'BENOIT--', 'BENO', 'BENU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2930 |  |  |         'BER(DT)-', 'BER', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2931 |  |  |         'BERN(DT)-', 'BERN', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2932 |  |  |         'BE(LMNRST)-^', 'BE', 'BE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2933 |  |  |         'BETTE$', 'BET', 'BET', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2934 |  |  |         'BEVOR^$', 'BEFOR', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2935 |  |  |         'BIC$', 'BIZ', 'BIZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2936 |  |  |         'BOWL(EI)-', 'BOL', 'BUL', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2937 |  |  |         'BP(AÄEÈÉÊIÌÍÎOÖRUÜY)-', 'B', 'B', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2938 |  |  |         'BRINGEND-----^', 'BRI', 'BRI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2939 |  |  |         'BRINGEND-----', ' BRI', ' BRI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2940 |  |  |         'BROW(NS)-', 'BRAU', 'BRAU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2941 |  |  |         'BUDGET7', 'BÜGE', 'BIKE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2942 |  |  |         'BUFFET7', 'BÜFE', 'BIFE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2943 |  |  |         'BYLLE$', 'BILE', 'BILE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2944 |  |  |         'BYLL$', 'BIL', 'BIL', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2945 |  |  |         'BYPA--^', 'BEI', 'BEI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2946 |  |  |         'BYTE<', 'BEIT', 'BEIT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2947 |  |  |         'BY9^', 'BÜ', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2948 |  |  |         'B(SßZ)$', 'BS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2949 |  |  |         'CACH(EI)-^', 'KESH', 'KEZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2950 |  |  |         'CAE--', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2951 |  |  |         'CA(IY)$', 'ZEI', 'ZEI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2952 |  |  |         'CE(EIJUY)--', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2953 |  |  |         'CENT<', 'ZENT', 'ZENT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2954 |  |  |         'CERST(EI)----^', 'KE', 'KE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2955 |  |  |         'CER$', 'ZA', 'ZA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2956 |  |  |         'CE3', 'ZE', 'ZE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2957 |  |  |         'CH\'S$', 'X', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2958 |  |  |         'CH´S$', 'X', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2959 |  |  |         'CHAO(ST)-', 'KAO', 'KAU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2960 |  |  |         'CHAMPIO-^', 'SHEMPI', 'ZENBI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2961 |  |  |         'CHAR(AI)-^', 'KAR', 'KAR', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2962 |  |  |         'CHAU(CDFSVWXZ)-', 'SHO', 'ZU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2963 |  |  |         'CHÄ(CF)-', 'SHE', 'ZE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2964 |  |  |         'CHE(CF)-', 'SHE', 'ZE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2965 |  |  |         'CHEM-^', 'KE', 'KE',  # or: 'CHE', 'KE' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2966 |  |  |         'CHEQUE<', 'SHEK', 'ZEK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2967 |  |  |         'CHI(CFGPVW)-', 'SHI', 'ZI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2968 |  |  |         'CH(AEUY)-<^', 'SH', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2969 |  |  |         'CHK-', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2970 |  |  |         'CHO(CKPS)-^', 'SHO', 'ZU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2971 |  |  |         'CHRIS-', 'KRI', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2972 |  |  |         'CHRO-', 'KR', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2973 |  |  |         'CH(LOR)-<^', 'K', 'K', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2974 |  |  |         'CHST-', 'X', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2975 |  |  |         'CH(SßXZ)3', 'X', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2976 |  |  |         'CHTNI-3', 'CHN', 'KN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2977 |  |  |         'CH^', 'K', 'K',  # or: 'CH', 'K' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2978 |  |  |         'CH', 'CH', 'K', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2979 |  |  |         'CIC$', 'ZIZ', 'ZIZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2980 |  |  |         'CIENCEFICT----', 'EIENS ', 'EIENZ ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2981 |  |  |         'CIENCE$', 'EIENS', 'EIENZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2982 |  |  |         'CIER$', 'ZIE', 'ZIE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2983 |  |  |         'CYB-^', 'ZEI', 'ZEI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2984 |  |  |         'CY9^', 'ZÜ', 'ZI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2985 |  |  |         'C(IJY)-<3', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2986 |  |  |         'CLOWN-', 'KLAU', 'KLAU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2987 |  |  |         'CCH', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2988 |  |  |         'CCE-', 'X', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2989 |  |  |         'C(CK)-', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2990 |  |  |         'CLAUDET---', 'KLO', 'KLU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2991 |  |  |         'CLAUDINE^$', 'KLODIN', 'KLUTIN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2992 |  |  |         'COACH', 'KOSH', 'KUZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2993 |  |  |         'COLE$', 'KOL', 'KUL', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2994 |  |  |         'COUCH', 'KAUSH', 'KAUZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2995 |  |  |         'COW', 'KAU', 'KAU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2996 |  |  |         'CQUES$', 'K', 'K', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2997 |  |  |         'CQUE', 'K', 'K', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2998 |  |  |         'CRASH--9', 'KRE', 'KRE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2999 |  |  |         'CREAT-^', 'KREA', 'KREA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3000 |  |  |         'CST', 'XT', 'XT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3001 |  |  |         'CS<^', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3002 |  |  |         'C(SßX)', 'X', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3003 |  |  |         'CT\'S$', 'X', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3004 |  |  |         'CT(SßXZ)', 'X', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3005 |  |  |         'CZ<', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3006 |  |  |         'C(ÈÉÊÌÍÎÝ)3', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3007 |  |  |         'C.^', 'C.', 'C.', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3008 |  |  |         'CÄ-', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3009 |  |  |         'CÜ$', 'ZÜ', 'ZI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3010 |  |  |         'C\'S$', 'X', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3011 |  |  |         'C<', 'K', 'K', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3012 |  |  |         'DAHER^$', 'DAHER', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3013 |  |  |         'DARAUFFOLGE-----', 'DARAUF ', 'TARAUF ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3014 |  |  |         'DAVO(NR)-^$', 'DAFO', 'TAFU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3015 |  |  |         'DD(SZ)--<', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3016 |  |  |         'DD9', 'D', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3017 |  |  |         'DEPOT7', 'DEPO', 'TEBU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3018 |  |  |         'DESIGN', 'DISEIN', 'TIZEIN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3019 |  |  |         'DE(LMNRST)-3^', 'DE', 'TE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3020 |  |  |         'DETTE$', 'DET', 'TET', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3021 |  |  |         'DH$', 'T', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3022 |  |  |         'DIC$', 'DIZ', 'TIZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3023 |  |  |         'DIDR-^', 'DIT', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3024 |  |  |         'DIEDR-^', 'DIT', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3025 |  |  |         'DJ(AEIOU)-^', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3026 |  |  |         'DMITR-^', 'DIMIT', 'TINIT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3027 |  |  |         'DRY9^', 'DRÜ', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3028 |  |  |         'DT-', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3029 |  |  |         'DUIS-^', 'DÜ', 'TI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3030 |  |  |         'DURCH^^', 'DURCH', 'TURK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3031 |  |  |         'DVA$', 'TWA', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3032 |  |  |         'DY9^', 'DÜ', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3033 |  |  |         'DYS$', 'DIS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3034 |  |  |         'DS(CH)--<', 'T', 'T', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3035 |  |  |         'DST', 'ZT', 'ZT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3036 |  |  |         'DZS(CH)--', 'T', 'T', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3037 |  |  |         'D(SßZ)', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3038 |  |  |         'D(AÄEIOÖRUÜY)-', 'D', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3039 |  |  |         'D(ÀÁÂÃÅÈÉÊÌÍÎÙÚÛ)-', 'D', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3040 |  |  |         'D\'H^', 'D', 'T', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3041 |  |  |         'D´H^', 'D', 'T', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3042 |  |  |         'D`H^', 'D', 'T', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3043 |  |  |         'D\'S3$', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3044 |  |  |         'D´S3$', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3045 |  |  |         'D^', 'D', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3046 |  |  |         'D', 'T', 'T', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3047 |  |  |         'EAULT$', 'O', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3048 |  |  |         'EAUX$', 'O', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3049 |  |  |         'EAU', 'O', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3050 |  |  |         'EAV', 'IW', 'IF', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3051 |  |  |         'EAS3$', 'EAS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3052 |  |  |         'EA(AÄEIOÖÜY)-3', 'EA', 'EA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3053 |  |  |         'EA3$', 'EA', 'EA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3054 |  |  |         'EA3', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3055 |  |  |         'EBENSO^$', 'EBNSO', 'EBNZU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3056 |  |  |         'EBENSO^^', 'EBNSO ', 'EBNZU ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3057 |  |  |         'EBEN^^', 'EBN', 'EBN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3058 |  |  |         'EE9', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3059 |  |  |         'EGL-1', 'EK', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3060 |  |  |         'EHE(IUY)--1', 'EH', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3061 |  |  |         'EHUNG---1', 'E', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3062 |  |  |         'EH(AÄIOÖUÜY)-1', 'EH', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3063 |  |  |         'EIEI--', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3064 |  |  |         'EIERE^$', 'EIERE', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3065 |  |  |         'EIERE$', 'EIERE', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3066 |  |  |         'EIERE(NS)-$', 'EIERE', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3067 |  |  |         'EIERE(AIOUY)--', 'EIER', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3068 |  |  |         'EIER(AÄIOÖUÜY)-', 'EIER', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3069 |  |  |         'EIER<', 'EIA', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3070 |  |  |         'EIGL-1', 'EIK', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3071 |  |  |         'EIGH$', 'EI', 'EI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3072 |  |  |         'EIH--', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3073 |  |  |         'EILLE$', 'EI', 'EI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3074 |  |  |         'EIR(BCDFGKLMNQSTVWZ)-', 'EIA', 'EIA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3075 |  |  |         'EIR$', 'EIA', 'EIA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3076 |  |  |         'EITRAUBEN------', 'EIT ', 'EIT ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3077 |  |  |         'EI', 'EI', 'EI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3078 |  |  |         'EJ$', 'EI', 'EI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3079 |  |  |         'ELIZ^', 'ELIS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3080 |  |  |         'ELZ^', 'ELS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3081 |  |  |         'EL-^', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3082 |  |  |         'ELANG----1', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3083 |  |  |         'EL(DKL)--1', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3084 |  |  |         'EL(MNT)--1$', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3085 |  |  |         'ELYNE$', 'ELINE', 'ELINE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3086 |  |  |         'ELYN$', 'ELIN', 'ELIN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3087 |  |  |         'EL(AÄEÈÉÊIÌÍÎOÖUÜY)-1', 'EL', 'EL', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3088 |  |  |         'EL-1', 'L', 'L', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3089 |  |  |         'EM-^', None, 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3090 |  |  |         'EM(DFKMPQT)--1', None, 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3091 |  |  |         'EM(AÄEÈÉÊIÌÍÎOÖUÜY)--1', None, 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3092 |  |  |         'EM-1', None, 'N', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3093 |  |  |         'ENGAG-^', 'ANGA', 'ANKA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3094 |  |  |         'EN-^', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3095 |  |  |         'ENTUEL', 'ENTUEL', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3096 |  |  |         'EN(CDGKQSTZ)--1', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3097 |  |  |         'EN(AÄEÈÉÊIÌÍÎNOÖUÜY)-1', 'EN', 'EN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3098 |  |  |         'EN-1', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3099 |  |  |         'ERH(AÄEIOÖUÜ)-^', 'ERH', 'ER', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3100 |  |  |         'ER-^', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3101 |  |  |         'ERREGEND-----', ' ER', ' ER', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3102 |  |  |         'ERT1$', 'AT', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3103 |  |  |         'ER(DGLKMNRQTZß)-1', 'ER', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3104 |  |  |         'ER(AÄEÈÉÊIÌÍÎOÖUÜY)-1', 'ER', 'A', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3105 |  |  |         'ER1$', 'A', 'A', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3106 |  |  |         'ER<1', 'A', 'A', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3107 |  |  |         'ETAT7', 'ETA', 'ETA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3108 |  |  |         'ETI(AÄOÖÜU)-', 'EZI', 'EZI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3109 |  |  |         'EUERE$', 'EUERE', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3110 |  |  |         'EUERE(NS)-$', 'EUERE', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3111 |  |  |         'EUERE(AIOUY)--', 'EUER', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3112 |  |  |         'EUER(AÄIOÖUÜY)-', 'EUER', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3113 |  |  |         'EUER<', 'EUA', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3114 |  |  |         'EUEU--', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3115 |  |  |         'EUILLE$', 'Ö', 'Ö', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3116 |  |  |         'EUR$', 'ÖR', 'ÖR', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3117 |  |  |         'EUX', 'Ö', 'Ö', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3118 |  |  |         'EUSZ$', 'EUS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3119 |  |  |         'EUTZ$', 'EUS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3120 |  |  |         'EUYS$', 'EUS', 'EUZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3121 |  |  |         'EUZ$', 'EUS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3122 |  |  |         'EU', 'EU', 'EU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3123 |  |  |         'EVER--<1', 'EW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3124 |  |  |         'EV(ÄOÖUÜ)-1', 'EW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3125 |  |  |         'EYER<', 'EIA', 'EIA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3126 |  |  |         'EY<', 'EI', 'EI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3127 |  |  |         'FACETTE', 'FASET', 'FAZET', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3128 |  |  |         'FANS--^$', 'FE', 'FE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3129 |  |  |         'FAN-^$', 'FE', 'FE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3130 |  |  |         'FAULT-', 'FOL', 'FUL', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3131 |  |  |         'FEE(DL)-', 'FI', 'FI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3132 |  |  |         'FEHLER', 'FELA', 'FELA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3133 |  |  |         'FE(LMNRST)-3^', 'FE', 'FE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3134 |  |  |         'FOERDERN---^', 'FÖRD', 'FÖRT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3135 |  |  |         'FOERDERN---', ' FÖRD', ' FÖRT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3136 |  |  |         'FOND7', 'FON', 'FUN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3137 |  |  |         'FRAIN$', 'FRA', 'FRA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3138 |  |  |         'FRISEU(RS)-', 'FRISÖ', 'FRIZÖ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3139 |  |  |         'FY9^', 'FÜ', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3140 |  |  |         'FÖRDERN---^', 'FÖRD', 'FÖRT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3141 |  |  |         'FÖRDERN---', ' FÖRD', ' FÖRT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3142 |  |  |         'GAGS^$', 'GEX', 'KEX', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3143 |  |  |         'GAG^$', 'GEK', 'KEK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3144 |  |  |         'GD', 'KT', 'KT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3145 |  |  |         'GEGEN^^', 'GEGN', 'KEKN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3146 |  |  |         'GEGENGEKOM-----', 'GEGN ', 'KEKN ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3147 |  |  |         'GEGENGESET-----', 'GEGN ', 'KEKN ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3148 |  |  |         'GEGENKOMME-----', 'GEGN ', 'KEKN ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3149 |  |  |         'GEGENZUKOM---', 'GEGN ZU ', 'KEKN ZU ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3150 |  |  |         'GENDETWAS-----$', 'GENT ', 'KENT ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3151 |  |  |         'GENRE', 'IORE', 'IURE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3152 |  |  |         'GE(LMNRST)-3^', 'GE', 'KE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3153 |  |  |         'GER(DKT)-', 'GER', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3154 |  |  |         'GETTE$', 'GET', 'KET', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3155 |  |  |         'GGF.', 'GF.', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3156 |  |  |         'GG-', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3157 |  |  |         'GH', 'G', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3158 |  |  |         'GI(AOU)-^', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3159 |  |  |         'GION-3', 'KIO', 'KIU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3160 |  |  |         'G(CK)-', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3161 |  |  |         'GJ(AEIOU)-^', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3162 |  |  |         'GMBH^$', 'GMBH', 'GMBH', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3163 |  |  |         'GNAC$', 'NIAK', 'NIAK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3164 |  |  |         'GNON$', 'NION', 'NIUN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3165 |  |  |         'GN$', 'N', 'N', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3166 |  |  |         'GONCAL-^', 'GONZA', 'KUNZA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3167 |  |  |         'GRY9^', 'GRÜ', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3168 |  |  |         'G(SßXZ)-<', 'K', 'K', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3169 |  |  |         'GUCK-', 'KU', 'KU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3170 |  |  |         'GUISEP-^', 'IUSE', 'IUZE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3171 |  |  |         'GUI-^', 'G', 'K', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3172 |  |  |         'GUTAUSSEH------^', 'GUT ', 'KUT ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3173 |  |  |         'GUTGEHEND------^', 'GUT ', 'KUT ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3174 |  |  |         'GY9^', 'GÜ', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3175 |  |  |         'G(AÄEILOÖRUÜY)-', 'G', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3176 |  |  |         'G(ÀÁÂÃÅÈÉÊÌÍÎÙÚÛ)-', 'G', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3177 |  |  |         'G\'S$', 'X', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3178 |  |  |         'G´S$', 'X', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3179 |  |  |         'G^', 'G', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3180 |  |  |         'G', 'K', 'K', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3181 |  |  |         'HA(HIUY)--1', 'H', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3182 |  |  |         'HANDVOL---^', 'HANT ', 'ANT ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3183 |  |  |         'HANNOVE-^', 'HANOF', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3184 |  |  |         'HAVEN7$', 'HAFN', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3185 |  |  |         'HEAD-', 'HE', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3186 |  |  |         'HELIEGEN------', 'E ', 'E ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3187 |  |  |         'HESTEHEN------', 'E ', 'E ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3188 |  |  |         'HE(LMNRST)-3^', 'HE', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3189 |  |  |         'HE(LMN)-1', 'E', 'E', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3190 |  |  |         'HEUR1$', 'ÖR', 'ÖR', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3191 |  |  |         'HE(HIUY)--1', 'H', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3192 |  |  |         'HIH(AÄEIOÖUÜY)-1', 'IH', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3193 |  |  |         'HLH(AÄEIOÖUÜY)-1', 'LH', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3194 |  |  |         'HMH(AÄEIOÖUÜY)-1', 'MH', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3195 |  |  |         'HNH(AÄEIOÖUÜY)-1', 'NH', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3196 |  |  |         'HOBBY9^', 'HOBI', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3197 |  |  |         'HOCHBEGAB-----^', 'HOCH ', 'UK ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3198 |  |  |         'HOCHTALEN-----^', 'HOCH ', 'UK ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3199 |  |  |         'HOCHZUFRI-----^', 'HOCH ', 'UK ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3200 |  |  |         'HO(HIY)--1', 'H', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3201 |  |  |         'HRH(AÄEIOÖUÜY)-1', 'RH', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3202 |  |  |         'HUH(AÄEIOÖUÜY)-1', 'UH', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3203 |  |  |         'HUIS^^', 'HÜS', 'IZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3204 |  |  |         'HUIS$', 'ÜS', 'IZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3205 |  |  |         'HUI--1', 'H', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3206 |  |  |         'HYGIEN^', 'HÜKIEN', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3207 |  |  |         'HY9^', 'HÜ', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3208 |  |  |         'HY(BDGMNPST)-', 'Ü', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3209 |  |  |         'H.^', None, 'H.', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3210 |  |  |         'HÄU--1', 'H', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3211 |  |  |         'H^', 'H', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3212 |  |  |         'H', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3213 |  |  |         'ICHELL---', 'ISH', 'IZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3214 |  |  |         'ICHI$', 'ISHI', 'IZI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3215 |  |  |         'IEC$', 'IZ', 'IZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3216 |  |  |         'IEDENSTELLE------', 'IDN ', 'ITN ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3217 |  |  |         'IEI-3', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3218 |  |  |         'IELL3', 'IEL', 'IEL', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3219 |  |  |         'IENNE$', 'IN', 'IN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3220 |  |  |         'IERRE$', 'IER', 'IER', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3221 |  |  |         'IERZULAN---', 'IR ZU ', 'IR ZU ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3222 |  |  |         'IETTE$', 'IT', 'IT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3223 |  |  |         'IEU', 'IÖ', 'IÖ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3224 |  |  |         'IE<4', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3225 |  |  |         'IGL-1', 'IK', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3226 |  |  |         'IGHT3$', 'EIT', 'EIT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3227 |  |  |         'IGNI(EO)-', 'INI', 'INI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3228 |  |  |         'IGN(AEOU)-$', 'INI', 'INI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3229 |  |  |         'IHER(DGLKRT)--1', 'IHE', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3230 |  |  |         'IHE(IUY)--', 'IH', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3231 |  |  |         'IH(AIOÖUÜY)-', 'IH', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3232 |  |  |         'IJ(AOU)-', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3233 |  |  |         'IJ$', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3234 |  |  |         'IJ<', 'EI', 'EI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3235 |  |  |         'IKOLE$', 'IKOL', 'IKUL', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3236 |  |  |         'ILLAN(STZ)--4', 'ILIA', 'ILIA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3237 |  |  |         'ILLAR(DT)--4', 'ILIA', 'ILIA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3238 |  |  |         'IMSTAN----^', 'IM ', 'IN ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3239 |  |  |         'INDELERREGE------', 'INDL ', 'INTL ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3240 |  |  |         'INFRAGE-----^$', 'IN ', 'IN ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3241 |  |  |         'INTERN(AOU)-^', 'INTAN', 'INTAN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3242 |  |  |         'INVER-', 'INWE', 'INFE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3243 |  |  |         'ITI(AÄIOÖUÜ)-', 'IZI', 'IZI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3244 |  |  |         'IUSZ$', 'IUS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3245 |  |  |         'IUTZ$', 'IUS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3246 |  |  |         'IUZ$', 'IUS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3247 |  |  |         'IVER--<', 'IW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3248 |  |  |         'IVIER$', 'IWIE', 'IFIE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3249 |  |  |         'IV(ÄOÖUÜ)-', 'IW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3250 |  |  |         'IV<3', 'IW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3251 |  |  |         'IY2', 'I', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3252 |  |  |         'I(ÈÉÊ)<4', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3253 |  |  |         'JAVIE---<^', 'ZA', 'ZA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3254 |  |  |         'JEANS^$', 'JINS', 'INZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3255 |  |  |         'JEANNE^$', 'IAN', 'IAN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3256 |  |  |         'JEAN-^', 'IA', 'IA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3257 |  |  |         'JER-^', 'IE', 'IE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3258 |  |  |         'JE(LMNST)-', 'IE', 'IE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3259 |  |  |         'JI^', 'JI', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3260 |  |  |         'JOR(GK)^$', 'IÖRK', 'IÖRK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3261 |  |  |         'J', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3262 |  |  |         'KC(ÄEIJ)-', 'X', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3263 |  |  |         'KD', 'KT', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3264 |  |  |         'KE(LMNRST)-3^', 'KE', 'KE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3265 |  |  |         'KG(AÄEILOÖRUÜY)-', 'K', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3266 |  |  |         'KH<^', 'K', 'K', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3267 |  |  |         'KIC$', 'KIZ', 'KIZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3268 |  |  |         'KLE(LMNRST)-3^', 'KLE', 'KLE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3269 |  |  |         'KOTELE-^', 'KOTL', 'KUTL', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3270 |  |  |         'KREAT-^', 'KREA', 'KREA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3271 |  |  |         'KRÜS(TZ)--^', 'KRI', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3272 |  |  |         'KRYS(TZ)--^', 'KRI', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3273 |  |  |         'KRY9^', 'KRÜ', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3274 |  |  |         'KSCH---', 'K', 'K', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3275 |  |  |         'KSH--', 'K', 'K', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3276 |  |  |         'K(SßXZ)7', 'X', 'X',  # implies 'KST' -> 'XT' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3277 |  |  |         'KT\'S$', 'X', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3278 |  |  |         'KTI(AIOU)-3', 'XI', 'XI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3279 |  |  |         'KT(SßXZ)', 'X', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3280 |  |  |         'KY9^', 'KÜ', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3281 |  |  |         'K\'S$', 'X', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3282 |  |  |         'K´S$', 'X', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3283 |  |  |         'LANGES$', ' LANGES', ' LANKEZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3284 |  |  |         'LANGE$', ' LANGE', ' LANKE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3285 |  |  |         'LANG$', ' LANK', ' LANK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3286 |  |  |         'LARVE-', 'LARF', 'LARF', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3287 |  |  |         'LD(SßZ)$', 'LS', 'LZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3288 |  |  |         'LD\'S$', 'LS', 'LZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3289 |  |  |         'LD´S$', 'LS', 'LZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3290 |  |  |         'LEAND-^', 'LEAN', 'LEAN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3291 |  |  |         'LEERSTEHE-----^', 'LER ', 'LER ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3292 |  |  |         'LEICHBLEIB-----', 'LEICH ', 'LEIK ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3293 |  |  |         'LEICHLAUTE-----', 'LEICH ', 'LEIK ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3294 |  |  |         'LEIDERREGE------', 'LEIT ', 'LEIT ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3295 |  |  |         'LEIDGEPR----^', 'LEIT ', 'LEIT ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3296 |  |  |         'LEINSTEHE-----', 'LEIN ', 'LEIN ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3297 |  |  |         'LEL-', 'LE', 'LE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3298 |  |  |         'LE(MNRST)-3^', 'LE', 'LE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3299 |  |  |         'LETTE$', 'LET', 'LET', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3300 |  |  |         'LFGNAG-', 'LFGAN', 'LFKAN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3301 |  |  |         'LICHERWEIS----', 'LICHA ', 'LIKA ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3302 |  |  |         'LIC$', 'LIZ', 'LIZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3303 |  |  |         'LIVE^$', 'LEIF', 'LEIF', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3304 |  |  |         'LT(SßZ)$', 'LS', 'LZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3305 |  |  |         'LT\'S$', 'LS', 'LZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3306 |  |  |         'LT´S$', 'LS', 'LZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3307 |  |  |         'LUI(GS)--', 'LU', 'LU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3308 |  |  |         'LV(AIO)-', 'LW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3309 |  |  |         'LY9^', 'LÜ', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3310 |  |  |         'LSTS$', 'LS', 'LZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3311 |  |  |         'LZ(BDFGKLMNPQRSTVWX)-', 'LS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3312 |  |  |         'L(SßZ)$', 'LS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3313 |  |  |         'MAIR-<', 'MEI', 'NEI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3314 |  |  |         'MANAG-', 'MENE', 'NENE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3315 |  |  |         'MANUEL', 'MANUEL', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3316 |  |  |         'MASSEU(RS)-', 'MASÖ', 'NAZÖ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3317 |  |  |         'MATCH', 'MESH', 'NEZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3318 |  |  |         'MAURICE', 'MORIS', 'NURIZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3319 |  |  |         'MBH^$', 'MBH', 'MBH', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3320 |  |  |         'MB(ßZ)$', 'MS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3321 |  |  |         'MB(SßTZ)-', 'M', 'N', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3322 |  |  |         'MCG9^', 'MAK', 'NAK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3323 |  |  |         'MC9^', 'MAK', 'NAK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3324 |  |  |         'MEMOIR-^', 'MEMOA', 'NENUA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3325 |  |  |         'MERHAVEN$', 'MAHAFN', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3326 |  |  |         'ME(LMNRST)-3^', 'ME', 'NE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3327 |  |  |         'MEN(STZ)--3', 'ME', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3328 |  |  |         'MEN$', 'MEN', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3329 |  |  |         'MIGUEL-', 'MIGE', 'NIKE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3330 |  |  |         'MIKE^$', 'MEIK', 'NEIK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3331 |  |  |         'MITHILFE----^$', 'MIT H', 'NIT ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3332 |  |  |         'MN$', 'M', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3333 |  |  |         'MN', 'N', 'N', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3334 |  |  |         'MPJUTE-', 'MPUT', 'NBUT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3335 |  |  |         'MP(ßZ)$', 'MS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3336 |  |  |         'MP(SßTZ)-', 'M', 'N', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3337 |  |  |         'MP(BDJLMNPQVW)-', 'MB', 'NB', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3338 |  |  |         'MY9^', 'MÜ', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3339 |  |  |         'M(ßZ)$', 'MS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3340 |  |  |         'M´G7^', 'MAK', 'NAK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3341 |  |  |         'M\'G7^', 'MAK', 'NAK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3342 |  |  |         'M´^', 'MAK', 'NAK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3343 |  |  |         'M\'^', 'MAK', 'NAK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3344 |  |  |         'M', None, 'N', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3345 |  |  |         'NACH^^', 'NACH', 'NAK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3346 |  |  |         'NADINE', 'NADIN', 'NATIN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3347 |  |  |         'NAIV--', 'NA', 'NA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3348 |  |  |         'NAISE$', 'NESE', 'NEZE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3349 |  |  |         'NAUGENOMM------', 'NAU ', 'NAU ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3350 |  |  |         'NAUSOGUT$', 'NAUSO GUT', 'NAUZU KUT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3351 |  |  |         'NCH$', 'NSH', 'NZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3352 |  |  |         'NCOISE$', 'SOA', 'ZUA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3353 |  |  |         'NCOIS$', 'SOA', 'ZUA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3354 |  |  |         'NDAR$', 'NDA', 'NTA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3355 |  |  |         'NDERINGEN------', 'NDE ', 'NTE ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3356 |  |  |         'NDRO(CDKTZ)-', 'NTRO', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3357 |  |  |         'ND(BFGJLMNPQVW)-', 'NT', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3358 |  |  |         'ND(SßZ)$', 'NS', 'NZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3359 |  |  |         'ND\'S$', 'NS', 'NZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3360 |  |  |         'ND´S$', 'NS', 'NZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3361 |  |  |         'NEBEN^^', 'NEBN', 'NEBN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3362 |  |  |         'NENGELERN------', 'NEN ', 'NEN ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3363 |  |  |         'NENLERN(ET)---', 'NEN LE', 'NEN LE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3364 |  |  |         'NENZULERNE---', 'NEN ZU LE', 'NEN ZU LE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3365 |  |  |         'NE(LMNRST)-3^', 'NE', 'NE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3366 |  |  |         'NEN-3', 'NE', 'NE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3367 |  |  |         'NETTE$', 'NET', 'NET', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3368 |  |  |         'NGU^^', 'NU', 'NU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3369 |  |  |         'NG(BDFJLMNPQRTVW)-', 'NK', 'NK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3370 |  |  |         'NH(AUO)-$', 'NI', 'NI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3371 |  |  |         'NICHTSAHNEN-----', 'NIX ', 'NIX ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3372 |  |  |         'NICHTSSAGE----', 'NIX ', 'NIX ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3373 |  |  |         'NICHTS^^', 'NIX', 'NIX', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3374 |  |  |         'NICHT^^', 'NICHT', 'NIKT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3375 |  |  |         'NINE$', 'NIN', 'NIN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3376 |  |  |         'NON^^', 'NON', 'NUN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3377 |  |  |         'NOTLEIDE-----^', 'NOT ', 'NUT ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3378 |  |  |         'NOT^^', 'NOT', 'NUT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3379 |  |  |         'NTI(AIOU)-3', 'NZI', 'NZI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3380 |  |  |         'NTIEL--3', 'NZI', 'NZI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3381 |  |  |         'NT(SßZ)$', 'NS', 'NZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3382 |  |  |         'NT\'S$', 'NS', 'NZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3383 |  |  |         'NT´S$', 'NS', 'NZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3384 |  |  |         'NYLON', 'NEILON', 'NEILUN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3385 |  |  |         'NY9^', 'NÜ', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3386 |  |  |         'NSTZUNEH---', 'NST ZU ', 'NZT ZU ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3387 |  |  |         'NSZ-', 'NS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3388 |  |  |         'NSTS$', 'NS', 'NZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3389 |  |  |         'NZ(BDFGKLMNPQRSTVWX)-', 'NS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3390 |  |  |         'N(SßZ)$', 'NS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3391 |  |  |         'OBERE-', 'OBER', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3392 |  |  |         'OBER^^', 'OBA', 'UBA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3393 |  |  |         'OEU2', 'Ö', 'Ö', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3394 |  |  |         'OE<2', 'Ö', 'Ö', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3395 |  |  |         'OGL-', 'OK', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3396 |  |  |         'OGNIE-', 'ONI', 'UNI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3397 |  |  |         'OGN(AEOU)-$', 'ONI', 'UNI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3398 |  |  |         'OH(AIOÖUÜY)-', 'OH', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3399 |  |  |         'OIE$', 'Ö', 'Ö', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3400 |  |  |         'OIRE$', 'OA', 'UA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3401 |  |  |         'OIR$', 'OA', 'UA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3402 |  |  |         'OIX', 'OA', 'UA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3403 |  |  |         'OI<3', 'EU', 'EU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3404 |  |  |         'OKAY^$', 'OKE', 'UKE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3405 |  |  |         'OLYN$', 'OLIN', 'ULIN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3406 |  |  |         'OO(DLMZ)-', 'U', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3407 |  |  |         'OO$', 'U', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3408 |  |  |         'OO-', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3409 |  |  |         'ORGINAL-----', 'ORI', 'URI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3410 |  |  |         'OTI(AÄOÖUÜ)-', 'OZI', 'UZI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3411 |  |  |         'OUI^', 'WI', 'FI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3412 |  |  |         'OUILLE$', 'ULIE', 'ULIE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3413 |  |  |         'OU(DT)-^', 'AU', 'AU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3414 |  |  |         'OUSE$', 'AUS', 'AUZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3415 |  |  |         'OUT-', 'AU', 'AU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3416 |  |  |         'OU', 'U', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3417 |  |  |         'O(FV)$', 'AU', 'AU',  # due to 'OW$' -> 'AU' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3418 |  |  |         'OVER--<', 'OW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3419 |  |  |         'OV(AOU)-', 'OW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3420 |  |  |         'OW$', 'AU', 'AU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3421 |  |  |         'OWS$', 'OS', 'UZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3422 |  |  |         'OJ(AÄEIOÖUÜ)--', 'O', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3423 |  |  |         'OYER', 'OIA', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3424 |  |  |         'OY(AÄEIOÖUÜ)--', 'O', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3425 |  |  |         'O(JY)<', 'EU', 'EU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3426 |  |  |         'OZ$', 'OS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3427 |  |  |         'O´^', 'O', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3428 |  |  |         'O\'^', 'O', 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3429 |  |  |         'O', None, 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3430 |  |  |         'PATIEN--^', 'PAZI', 'PAZI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3431 |  |  |         'PENSIO-^', 'PANSI', 'PANZI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3432 |  |  |         'PE(LMNRST)-3^', 'PE', 'PE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3433 |  |  |         'PFER-^', 'FE', 'FE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3434 |  |  |         'P(FH)<', 'F', 'F', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3435 |  |  |         'PIC^$', 'PIK', 'PIK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3436 |  |  |         'PIC$', 'PIZ', 'PIZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3437 |  |  |         'PIPELINE', 'PEIBLEIN', 'PEIBLEIN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3438 |  |  |         'POLYP-', 'POLÜ', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3439 |  |  |         'POLY^^', 'POLI', 'PULI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3440 |  |  |         'PORTRAIT7', 'PORTRE', 'PURTRE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3441 |  |  |         'POWER7', 'PAUA', 'PAUA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3442 |  |  |         'PP(FH)--<', 'B', 'B', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3443 |  |  |         'PP-', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3444 |  |  |         'PRODUZ-^', 'PRODU', 'BRUTU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3445 |  |  |         'PRODUZI--', ' PRODU', ' BRUTU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3446 |  |  |         'PRIX^$', 'PRI', 'PRI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3447 |  |  |         'PS-^^', 'P', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3448 |  |  |         'P(SßZ)^', None, 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3449 |  |  |         'P(SßZ)$', 'BS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3450 |  |  |         'PT-^', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3451 |  |  |         'PTI(AÄOÖUÜ)-3', 'BZI', 'BZI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3452 |  |  |         'PY9^', 'PÜ', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3453 |  |  |         'P(AÄEIOÖRUÜY)-', 'P', 'P', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3454 |  |  |         'P(ÀÁÂÃÅÈÉÊÌÍÎÙÚÛ)-', 'P', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3455 |  |  |         'P.^', None, 'P.', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3456 |  |  |         'P^', 'P', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3457 |  |  |         'P', 'B', 'B', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3458 |  |  |         'QI-', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3459 |  |  |         'QUARANT--', 'KARA', 'KARA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3460 |  |  |         'QUE(LMNRST)-3', 'KWE', 'KFE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3461 |  |  |         'QUE$', 'K', 'K', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3462 |  |  |         'QUI(NS)$', 'KI', 'KI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3463 |  |  |         'QUIZ7', 'KWIS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3464 |  |  |         'Q(UV)7', 'KW', 'KF', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3465 |  |  |         'Q<', 'K', 'K', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3466 |  |  |         'RADFAHR----', 'RAT ', 'RAT ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3467 |  |  |         'RAEFTEZEHRE-----', 'REFTE ', 'REFTE ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3468 |  |  |         'RCH', 'RCH', 'RK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3469 |  |  |         'REA(DU)---3^', 'R', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3470 |  |  |         'REBSERZEUG------', 'REBS ', 'REBZ ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3471 |  |  |         'RECHERCH^', 'RESHASH', 'REZAZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3472 |  |  |         'RECYCL--', 'RIZEI', 'RIZEI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3473 |  |  |         'RE(ALST)-3^', 'RE', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3474 |  |  |         'REE$', 'RI', 'RI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3475 |  |  |         'RER$', 'RA', 'RA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3476 |  |  |         'RE(MNR)-4', 'RE', 'RE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3477 |  |  |         'RETTE$', 'RET', 'RET', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3478 |  |  |         'REUZ$', 'REUZ', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3479 |  |  |         'REW$', 'RU', 'RU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3480 |  |  |         'RH<^', 'R', 'R', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3481 |  |  |         'RJA(MN)--', 'RI', 'RI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3482 |  |  |         'ROWD-^', 'RAU', 'RAU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3483 |  |  |         'RTEMONNAIE-', 'RTMON', 'RTNUN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3484 |  |  |         'RTI(AÄOÖUÜ)-3', 'RZI', 'RZI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3485 |  |  |         'RTIEL--3', 'RZI', 'RZI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3486 |  |  |         'RV(AEOU)-3', 'RW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3487 |  |  |         'RY(KN)-$', 'RI', 'RI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3488 |  |  |         'RY9^', 'RÜ', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3489 |  |  |         'RÄFTEZEHRE-----', 'REFTE ', 'REFTE ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3490 |  |  |         'SAISO-^', 'SES', 'ZEZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3491 |  |  |         'SAFE^$', 'SEIF', 'ZEIF', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3492 |  |  |         'SAUCE-^', 'SOS', 'ZUZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3493 |  |  |         'SCHLAGGEBEN-----<', 'SHLAK ', 'ZLAK ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3494 |  |  |         'SCHSCH---7', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3495 |  |  |         'SCHTSCH', 'SH', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3496 |  |  |         'SC(HZ)<', 'SH', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3497 |  |  |         'SC', 'SK', 'ZK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3498 |  |  |         'SELBSTST--7^^', 'SELB', 'ZELB', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3499 |  |  |         'SELBST7^^', 'SELBST', 'ZELBZT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3500 |  |  |         'SERVICE7^', 'SÖRWIS', 'ZÖRFIZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3501 |  |  |         'SERVI-^', 'SERW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3502 |  |  |         'SE(LMNRST)-3^', 'SE', 'ZE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3503 |  |  |         'SETTE$', 'SET', 'ZET', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3504 |  |  |         'SHP-^', 'S', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3505 |  |  |         'SHST', 'SHT', 'ZT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3506 |  |  |         'SHTSH', 'SH', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3507 |  |  |         'SHT', 'ST', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3508 |  |  |         'SHY9^', 'SHÜ', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3509 |  |  |         'SH^^', 'SH', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3510 |  |  |         'SH3', 'SH', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3511 |  |  |         'SICHERGEGAN-----^', 'SICHA ', 'ZIKA ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3512 |  |  |         'SICHERGEHE----^', 'SICHA ', 'ZIKA ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3513 |  |  |         'SICHERGESTEL------^', 'SICHA ', 'ZIKA ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3514 |  |  |         'SICHERSTELL-----^', 'SICHA ', 'ZIKA ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3515 |  |  |         'SICHERZU(GS)--^', 'SICHA ZU ', 'ZIKA ZU ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3516 |  |  |         'SIEGLI-^', 'SIKL', 'ZIKL', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3517 |  |  |         'SIGLI-^', 'SIKL', 'ZIKL', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3518 |  |  |         'SIGHT', 'SEIT', 'ZEIT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3519 |  |  |         'SIGN', 'SEIN', 'ZEIN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3520 |  |  |         'SKI(NPZ)-', 'SKI', 'ZKI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3521 |  |  |         'SKI<^', 'SHI', 'ZI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3522 |  |  |         'SODASS^$', 'SO DAS', 'ZU TAZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3523 |  |  |         'SODAß^$', 'SO DAS', 'ZU TAZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3524 |  |  |         'SOGENAN--^', 'SO GEN', 'ZU KEN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3525 |  |  |         'SOUND-', 'SAUN', 'ZAUN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3526 |  |  |         'STAATS^^', 'STAZ', 'ZTAZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3527 |  |  |         'STADT^^', 'STAT', 'ZTAT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3528 |  |  |         'STANDE$', ' STANDE', ' ZTANTE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3529 |  |  |         'START^^', 'START', 'ZTART', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3530 |  |  |         'STAURANT7', 'STORAN', 'ZTURAN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3531 |  |  |         'STEAK-', 'STE', 'ZTE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3532 |  |  |         'STEPHEN-^$', 'STEW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3533 |  |  |         'STERN', 'STERN', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3534 |  |  |         'STRAF^^', 'STRAF', 'ZTRAF', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3535 |  |  |         'ST\'S$', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3536 |  |  |         'ST´S$', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3537 |  |  |         'STST--', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3538 |  |  |         'STS(ACEÈÉÊHIÌÍÎOUÄÜÖ)--', 'ST', 'ZT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3539 |  |  |         'ST(SZ)', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3540 |  |  |         'SPAREN---^', 'SPA', 'ZPA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3541 |  |  |         'SPAREND----', ' SPA', ' ZPA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3542 |  |  |         'S(PTW)-^^', 'S', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3543 |  |  |         'SP', 'SP', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3544 |  |  |         'STYN(AE)-$', 'STIN', 'ZTIN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3545 |  |  |         'ST', 'ST', 'ZT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3546 |  |  |         'SUITE<', 'SIUT', 'ZIUT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3547 |  |  |         'SUKE--$', 'S', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3548 |  |  |         'SURF(EI)-', 'SÖRF', 'ZÖRF', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3549 |  |  |         'SV(AEÈÉÊIÌÍÎOU)-<^', 'SW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3550 |  |  |         'SYB(IY)--^', 'SIB', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3551 |  |  |         'SYL(KVW)--^', 'SI', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3552 |  |  |         'SY9^', 'SÜ', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3553 |  |  |         'SZE(NPT)-^', 'ZE', 'ZE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3554 |  |  |         'SZI(ELN)-^', 'ZI', 'ZI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3555 |  |  |         'SZCZ<', 'SH', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3556 |  |  |         'SZT<', 'ST', 'ZT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3557 |  |  |         'SZ<3', 'SH', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3558 |  |  |         'SÜL(KVW)--^', 'SI', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3559 |  |  |         'S', None, 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3560 |  |  |         'TCH', 'SH', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3561 |  |  |         'TD(AÄEIOÖRUÜY)-', 'T', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3562 |  |  |         'TD(ÀÁÂÃÅÈÉÊËÌÍÎÏÒÓÔÕØÙÚÛÝŸ)-', 'T', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3563 |  |  |         'TEAT-^', 'TEA', 'TEA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3564 |  |  |         'TERRAI7^', 'TERA', 'TERA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3565 |  |  |         'TE(LMNRST)-3^', 'TE', 'TE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3566 |  |  |         'TH<', 'T', 'T', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3567 |  |  |         'TICHT-', 'TIK', 'TIK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3568 |  |  |         'TICH$', 'TIK', 'TIK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3569 |  |  |         'TIC$', 'TIZ', 'TIZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3570 |  |  |         'TIGGESTELL-------', 'TIK ', 'TIK ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3571 |  |  |         'TIGSTELL-----', 'TIK ', 'TIK ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3572 |  |  |         'TOAS-^', 'TO', 'TU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3573 |  |  |         'TOILET-', 'TOLE', 'TULE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3574 |  |  |         'TOIN-', 'TOA', 'TUA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3575 |  |  |         'TRAECHTI-^', 'TRECHT', 'TREKT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3576 |  |  |         'TRAECHTIG--', ' TRECHT', ' TREKT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3577 |  |  |         'TRAINI-', 'TREN', 'TREN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3578 |  |  |         'TRÄCHTI-^', 'TRECHT', 'TREKT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3579 |  |  |         'TRÄCHTIG--', ' TRECHT', ' TREKT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3580 |  |  |         'TSCH', 'SH', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3581 |  |  |         'TSH', 'SH', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3582 |  |  |         'TST', 'ZT', 'ZT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3583 |  |  |         'T(Sß)', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3584 |  |  |         'TT(SZ)--<', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3585 |  |  |         'TT9', 'T', 'T', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3586 |  |  |         'TV^$', 'TV', 'TV', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3587 |  |  |         'TX(AEIOU)-3', 'SH', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3588 |  |  |         'TY9^', 'TÜ', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3589 |  |  |         'TZ-', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3590 |  |  |         'T\'S3$', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3591 |  |  |         'T´S3$', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3592 |  |  |         'UEBEL(GNRW)-^^', 'ÜBL ', 'IBL ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3593 |  |  |         'UEBER^^', 'ÜBA', 'IBA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3594 |  |  |         'UE2', 'Ü', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3595 |  |  |         'UGL-', 'UK', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3596 |  |  |         'UH(AOÖUÜY)-', 'UH', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3597 |  |  |         'UIE$', 'Ü', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3598 |  |  |         'UM^^', 'UM', 'UN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3599 |  |  |         'UNTERE--3', 'UNTE', 'UNTE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3600 |  |  |         'UNTER^^', 'UNTA', 'UNTA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3601 |  |  |         'UNVER^^', 'UNFA', 'UNFA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3602 |  |  |         'UN^^', 'UN', 'UN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3603 |  |  |         'UTI(AÄOÖUÜ)-', 'UZI', 'UZI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3604 |  |  |         'UVE-4', 'UW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3605 |  |  |         'UY2', 'UI', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3606 |  |  |         'UZZ', 'AS', 'AZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3607 |  |  |         'VACL-^', 'WAZ', 'FAZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3608 |  |  |         'VAC$', 'WAZ', 'FAZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3609 |  |  |         'VAN DEN ^', 'FANDN', 'FANTN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3610 |  |  |         'VANES-^', 'WANE', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3611 |  |  |         'VATRO-', 'WATR', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3612 |  |  |         'VA(DHJNT)--^', 'F', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3613 |  |  |         'VEDD-^', 'FE', 'FE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3614 |  |  |         'VE(BEHIU)--^', 'F', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3615 |  |  |         'VEL(BDLMNT)-^', 'FEL', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3616 |  |  |         'VENTZ-^', 'FEN', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3617 |  |  |         'VEN(NRSZ)-^', 'FEN', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3618 |  |  |         'VER(AB)-^$', 'WER', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3619 |  |  |         'VERBAL^$', 'WERBAL', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3620 |  |  |         'VERBAL(EINS)-^', 'WERBAL', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3621 |  |  |         'VERTEBR--', 'WERTE', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3622 |  |  |         'VEREIN-----', 'F', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3623 |  |  |         'VEREN(AEIOU)-^', 'WEREN', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3624 |  |  |         'VERIFI', 'WERIFI', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3625 |  |  |         'VERON(AEIOU)-^', 'WERON', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3626 |  |  |         'VERSEN^', 'FERSN', 'FAZN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3627 |  |  |         'VERSIERT--^', 'WERSI', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3628 |  |  |         'VERSIO--^', 'WERS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3629 |  |  |         'VERSUS', 'WERSUS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3630 |  |  |         'VERTI(GK)-', 'WERTI', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3631 |  |  |         'VER^^', 'FER', 'FA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3632 |  |  |         'VERSPRECHE-------', ' FER', ' FA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3633 |  |  |         'VER$', 'WA', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3634 |  |  |         'VER', 'FA', 'FA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3635 |  |  |         'VET(HT)-^', 'FET', 'FET', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3636 |  |  |         'VETTE$', 'WET', 'FET', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3637 |  |  |         'VE^', 'WE', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3638 |  |  |         'VIC$', 'WIZ', 'FIZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3639 |  |  |         'VIELSAGE----', 'FIL ', 'FIL ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3640 |  |  |         'VIEL', 'FIL', 'FIL', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3641 |  |  |         'VIEW', 'WIU', 'FIU', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3642 |  |  |         'VILL(AE)-', 'WIL', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3643 |  |  |         'VIS(ACEIKUVWZ)-<^', 'WIS', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3644 |  |  |         'VI(ELS)--^', 'F', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3645 |  |  |         'VILLON--', 'WILI', 'FILI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3646 |  |  |         'VIZE^^', 'FIZE', 'FIZE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3647 |  |  |         'VLIE--^', 'FL', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3648 |  |  |         'VL(AEIOU)--', 'W', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3649 |  |  |         'VOKA-^', 'WOK', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3650 |  |  |         'VOL(ATUVW)--^', 'WO', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3651 |  |  |         'VOR^^', 'FOR', 'FUR', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3652 |  |  |         'VR(AEIOU)--', 'W', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3653 |  |  |         'VV9', 'W', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3654 |  |  |         'VY9^', 'WÜ', 'FI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3655 |  |  |         'V(ÜY)-', 'W', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3656 |  |  |         'V(ÀÁÂÃÅÈÉÊÌÍÎÙÚÛ)-', 'W', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3657 |  |  |         'V(AEIJLRU)-<', 'W', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3658 |  |  |         'V.^', 'V.', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3659 |  |  |         'V<', 'F', 'F', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3660 |  |  |         'WEITERENTWI-----^', 'WEITA ', 'FEITA ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3661 |  |  |         'WEITREICH-----^', 'WEIT ', 'FEIT ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3662 |  |  |         'WEITVER^', 'WEIT FER', 'FEIT FA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3663 |  |  |         'WE(LMNRST)-3^', 'WE', 'FE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3664 |  |  |         'WER(DST)-', 'WER', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3665 |  |  |         'WIC$', 'WIZ', 'FIZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3666 |  |  |         'WIEDERU--', 'WIDE', 'FITE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3667 |  |  |         'WIEDER^$', 'WIDA', 'FITA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3668 |  |  |         'WIEDER^^', 'WIDA ', 'FITA ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3669 |  |  |         'WIEVIEL', 'WI FIL', 'FI FIL', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3670 |  |  |         'WISUEL', 'WISUEL', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3671 |  |  |         'WR-^', 'W', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3672 |  |  |         'WY9^', 'WÜ', 'FI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3673 |  |  |         'W(BDFGJKLMNPQRSTZ)-', 'F', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3674 |  |  |         'W$', 'F', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3675 |  |  |         'W', None, 'F', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3676 |  |  |         'X<^', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3677 |  |  |         'XHAVEN$', 'XAFN', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3678 |  |  |         'X(CSZ)', 'X', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3679 |  |  |         'XTS(CH)--', 'XT', 'XT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3680 |  |  |         'XT(SZ)', 'Z', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3681 |  |  |         'YE(LMNRST)-3^', 'IE', 'IE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3682 |  |  |         'YE-3', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3683 |  |  |         'YOR(GK)^$', 'IÖRK', 'IÖRK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3684 |  |  |         'Y(AOU)-<7', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3685 |  |  |         'Y(BKLMNPRSTX)-1', 'Ü', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3686 |  |  |         'YVES^$', 'IF', 'IF', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3687 |  |  |         'YVONNE^$', 'IWON', 'IFUN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3688 |  |  |         'Y.^', 'Y.', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3689 |  |  |         'Y', 'I', 'I', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3690 |  |  |         'ZC(AOU)-', 'SK', 'ZK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3691 |  |  |         'ZE(LMNRST)-3^', 'ZE', 'ZE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3692 |  |  |         'ZIEJ$', 'ZI', 'ZI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3693 |  |  |         'ZIGERJA(HR)-3', 'ZIGA IA', 'ZIKA IA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3694 |  |  |         'ZL(AEIOU)-', 'SL', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3695 |  |  |         'ZS(CHT)--', '', '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3696 |  |  |         'ZS', 'SH', 'Z', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3697 |  |  |         'ZUERST', 'ZUERST', 'ZUERST', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3698 |  |  |         'ZUGRUNDE^$', 'ZU GRUNDE', 'ZU KRUNTE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3699 |  |  |         'ZUGRUNDE', 'ZU GRUNDE ', 'ZU KRUNTE ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3700 |  |  |         'ZUGUNSTEN', 'ZU GUNSTN', 'ZU KUNZTN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3701 |  |  |         'ZUHAUSE-', 'ZU HAUS', 'ZU AUZ', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3702 |  |  |         'ZULASTEN^$', 'ZU LASTN', 'ZU LAZTN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3703 |  |  |         'ZURUECK^^', 'ZURÜK', 'ZURIK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3704 |  |  |         'ZURZEIT', 'ZUR ZEIT', 'ZUR ZEIT', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3705 |  |  |         'ZURÜCK^^', 'ZURÜK', 'ZURIK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3706 |  |  |         'ZUSTANDE', 'ZU STANDE', 'ZU ZTANTE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3707 |  |  |         'ZUTAGE', 'ZU TAGE', 'ZU TAKE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3708 |  |  |         'ZUVER^^', 'ZUFA', 'ZUFA', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3709 |  |  |         'ZUVIEL', 'ZU FIL', 'ZU FIL', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3710 |  |  |         'ZUWENIG', 'ZU WENIK', 'ZU FENIK', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3711 |  |  |         'ZY9^', 'ZÜ', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3712 |  |  |         'ZYK3$', 'ZIK', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3713 |  |  |         'Z(VW)7^', 'SW', None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3714 |  |  |         None, None, None) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3715 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3716 |  |  |     phonet_hash = Counter() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3717 |  |  |     alpha_pos = Counter() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3718 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3719 |  |  |     phonet_hash_1 = Counter() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3720 |  |  |     phonet_hash_2 = Counter() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3721 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3722 |  |  |     _phonet_upper_translation = dict(zip((ord(_) for _ in | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3723 |  |  |                                           'abcdefghijklmnopqrstuvwxyzàáâãåäæ' + | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3724 |  |  |                                           'çðèéêëìíîïñòóôõöøœšßþùúûüýÿ'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3725 |  |  |                                          'ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÅÄÆ' + | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3726 |  |  |                                          'ÇÐÈÉÊËÌÍÎÏÑÒÓÔÕÖØŒŠßÞÙÚÛÜÝŸ')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3727 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3728 |  |  |     def _trinfo(text, rule, err_text, lang): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3729 |  |  |         """Output debug information.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3730 |  |  |         if lang == 'none': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3731 |  |  |             _phonet_rules = _phonet_rules_no_lang | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3732 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3733 |  |  |             _phonet_rules = _phonet_rules_german | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3734 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3735 |  |  |         from_rule = ('(NULL)' if _phonet_rules[rule] is None else | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3736 |  |  |                      _phonet_rules[rule]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3737 |  |  |         to_rule1 = ('(NULL)' if (_phonet_rules[rule + 1] is None) else | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3738 |  |  |                     _phonet_rules[rule + 1]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3739 |  |  |         to_rule2 = ('(NULL)' if (_phonet_rules[rule + 2] is None) else | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3740 |  |  |                     _phonet_rules[rule + 2]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3741 |  |  |         print('"{} {}:  "{}"{}"{}" {}'.format(text, ((rule / 3) + 1), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3742 |  |  |                                               from_rule, to_rule1, to_rule2, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3743 |  |  |                                               err_text)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3744 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3745 |  |  |     def _initialize_phonet(lang): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3746 |  |  |         """Initialize phonet variables.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3747 |  |  |         if lang == 'none': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3748 |  |  |             _phonet_rules = _phonet_rules_no_lang | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3749 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3750 |  |  |             _phonet_rules = _phonet_rules_german | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3751 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3752 |  |  |         phonet_hash[''] = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3753 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3754 |  |  |         # German and international umlauts | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3755 |  |  |         for j in {'À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Ç', 'È', 'É', 'Ê', 'Ë', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3756 |  |  |                   'Ì', 'Í', 'Î', 'Ï', 'Ð', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ', 'Ö', 'Ø', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3757 |  |  |                   'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'Þ', 'ß', 'Œ', 'Š', 'Ÿ'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3758 |  |  |             alpha_pos[j] = 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3759 |  |  |             phonet_hash[j] = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3760 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3761 |  |  |         # "normal" letters ('A'-'Z') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3762 |  |  |         for i, j in enumerate('ABCDEFGHIJKLMNOPQRSTUVWXYZ'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3763 |  |  |             alpha_pos[j] = i + 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3764 |  |  |             phonet_hash[j] = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3765 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3766 |  |  |         for i in range(26): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3767 |  |  |             for j in range(28): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3768 |  |  |                 phonet_hash_1[i, j] = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3769 |  |  |                 phonet_hash_2[i, j] = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3770 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3771 |  |  |         # for each phonetc rule | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3772 |  |  |         for i in range(len(_phonet_rules)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3773 |  |  |             rule = _phonet_rules[i] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3774 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3775 |  |  |             if rule and i % 3 == 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3776 |  |  |                 # calculate first hash value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3777 |  |  |                 k = _phonet_rules[i][0] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3778 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3779 |  |  |                 if phonet_hash[k] < 0 and (_phonet_rules[i+1] or | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3780 |  |  |                                            _phonet_rules[i+2]): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3781 |  |  |                     phonet_hash[k] = i | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3782 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3783 |  |  |                 # calculate second hash values | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3784 |  |  |                 if k and alpha_pos[k] >= 2: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3785 |  |  |                     k = alpha_pos[k] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3786 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3787 |  |  |                     j = k-2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3788 |  |  |                     rule = rule[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3789 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3790 |  |  |                     if not rule: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3791 |  |  |                         rule = ' ' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3792 |  |  |                     elif rule[0] == '(': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3793 |  |  |                         rule = rule[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3794 |  |  |                     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3795 |  |  |                         rule = rule[0] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3796 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3797 |  |  |                     while rule and (rule[0] != ')'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3798 |  |  |                         k = alpha_pos[rule[0]] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3799 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3800 |  |  |                         if k > 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3801 |  |  |                             # add hash value for this letter | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3802 |  |  |                             if phonet_hash_1[j, k] < 0: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3803 |  |  |                                 phonet_hash_1[j, k] = i | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3804 |  |  |                                 phonet_hash_2[j, k] = i | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3805 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3806 |  |  |                             if phonet_hash_2[j, k] >= (i-30): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3807 |  |  |                                 phonet_hash_2[j, k] = i | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3808 |  |  |                             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3809 |  |  |                                 k = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3810 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3811 |  |  |                         if k <= 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3812 |  |  |                             # add hash value for all letters | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3813 |  |  |                             if phonet_hash_1[j, 0] < 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3814 |  |  |                                 phonet_hash_1[j, 0] = i | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3815 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3816 |  |  |                             phonet_hash_2[j, 0] = i | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3817 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3818 |  |  |                         rule = rule[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3819 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3820 |  |  |     def _phonet(term, mode, lang, trace): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3821 |  |  |         """Return the phonet coded form of a term.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3822 |  |  |         if lang == 'none': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3823 |  |  |             _phonet_rules = _phonet_rules_no_lang | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3824 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3825 |  |  |             _phonet_rules = _phonet_rules_german | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3826 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3827 |  |  |         char0 = '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3828 |  |  |         dest = term | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3829 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3830 |  |  |         if not term: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3831 |  |  |             return '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3832 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3833 |  |  |         term_length = len(term) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3834 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3835 |  |  |         # convert input string to upper-case | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3836 |  |  |         src = term.translate(_phonet_upper_translation) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3837 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3838 |  |  |         # check "src" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3839 |  |  |         i = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3840 |  |  |         j = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3841 |  |  |         zeta = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3842 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3843 |  |  |         while i < len(src): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3844 |  |  |             char = src[i] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3845 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3846 |  |  |             if trace: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3847 |  |  |                 print('\ncheck position {}:  src = "{}",  dest = "{}"'.format | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3848 |  |  |                       (j, src[i:], dest[:j])) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3849 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3850 |  |  |             pos = alpha_pos[char] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3851 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3852 |  |  |             if pos >= 2: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3853 |  |  |                 xpos = pos-2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3854 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3855 |  |  |                 if i+1 == len(src): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3856 |  |  |                     pos = alpha_pos[''] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3857 |  |  |                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3858 |  |  |                     pos = alpha_pos[src[i+1]] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3859 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3860 |  |  |                 start1 = phonet_hash_1[xpos, pos] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3861 |  |  |                 start2 = phonet_hash_1[xpos, 0] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3862 |  |  |                 end1 = phonet_hash_2[xpos, pos] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3863 |  |  |                 end2 = phonet_hash_2[xpos, 0] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3864 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3865 |  |  |                 # preserve rule priorities | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3866 |  |  |                 if (start2 >= 0) and ((start1 < 0) or (start2 < start1)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3867 |  |  |                     pos = start1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3868 |  |  |                     start1 = start2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3869 |  |  |                     start2 = pos | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3870 |  |  |                     pos = end1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3871 |  |  |                     end1 = end2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3872 |  |  |                     end2 = pos | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3873 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3874 |  |  |                 if (end1 >= start2) and (start2 >= 0): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3875 |  |  |                     if end2 > end1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3876 |  |  |                         end1 = end2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3877 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3878 |  |  |                     start2 = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3879 |  |  |                     end2 = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3880 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3881 |  |  |                 pos = phonet_hash[char] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3882 |  |  |                 start1 = pos | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3883 |  |  |                 end1 = 10000 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3884 |  |  |                 start2 = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3885 |  |  |                 end2 = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3886 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3887 |  |  |             pos = start1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3888 |  |  |             zeta0 = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3889 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3890 |  |  |             if pos >= 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3891 |  |  |                 # check rules for this char | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3892 |  |  |                 while ((_phonet_rules[pos] is None) or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3893 |  |  |                        (_phonet_rules[pos][0] == char)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3894 |  |  |                     if pos > end1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3895 |  |  |                         if start2 > 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3896 |  |  |                             pos = start2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3897 |  |  |                             start1 = start2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3898 |  |  |                             start2 = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3899 |  |  |                             end1 = end2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3900 |  |  |                             end2 = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3901 |  |  |                             continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3902 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3903 |  |  |                         break | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3904 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3905 |  |  |                     if (((_phonet_rules[pos] is None) or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3906 |  |  |                          (_phonet_rules[pos + mode] is None))): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3907 |  |  |                         # no conversion rule available | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3908 |  |  |                         pos += 3 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3909 |  |  |                         continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3910 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3911 |  |  |                     if trace: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3912 |  |  |                         _trinfo('> rule no.', pos, 'is being checked', lang) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3913 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3914 |  |  |                     # check whole string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3915 |  |  |                     matches = 1  # number of matching letters | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3916 |  |  |                     priority = 5  # default priority | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3917 |  |  |                     rule = _phonet_rules[pos] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3918 |  |  |                     rule = rule[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3919 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3920 |  |  |                     while (rule and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3921 |  |  |                            (len(src) > (i + matches)) and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3922 |  |  |                            (src[i + matches] == rule[0]) and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3923 |  |  |                            not rule[0].isdigit() and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3924 |  |  |                            (rule not in '(-<^$')): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3925 |  |  |                         matches += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3926 |  |  |                         rule = rule[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3927 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3928 |  |  |                     if rule and (rule[0] == '('): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3929 |  |  |                         # check an array of letters | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3930 |  |  |                         if (((len(src) > (i + matches)) and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3931 |  |  |                              src[i + matches].isalpha() and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3932 |  |  |                              (src[i + matches] in rule[1:]))): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3933 |  |  |                             matches += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3934 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3935 |  |  |                             while rule and rule[0] != ')': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3936 |  |  |                                 rule = rule[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3937 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3938 |  |  |                             # if rule[0] == ')': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3939 |  |  |                             rule = rule[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3940 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3941 |  |  |                     if rule: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3942 |  |  |                         priority0 = ord(rule[0]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3943 |  |  |                     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3944 |  |  |                         priority0 = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3945 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3946 |  |  |                     matches0 = matches | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3947 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3948 |  |  |                     while rule and rule[0] == '-' and matches > 1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3949 |  |  |                         matches -= 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3950 |  |  |                         rule = rule[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3951 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3952 |  |  |                     if rule and rule[0] == '<': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3953 |  |  |                         rule = rule[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3954 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3955 |  |  |                     if rule and rule[0].isdigit(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3956 |  |  |                         # read priority | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3957 |  |  |                         priority = int(rule[0]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3958 |  |  |                         rule = rule[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3959 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3960 |  |  |                     if rule and rule[0:2] == '^^': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3961 |  |  |                         rule = rule[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3962 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3963 |  |  |                     if (not rule or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3964 |  |  |                             ((rule[0] == '^') and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3965 |  |  |                              ((i == 0) or not src[i-1].isalpha()) and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3966 |  |  |                              ((rule[1:2] != '$') or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3967 |  |  |                               (not (src[i+matches0:i+matches0+1].isalpha()) and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3968 |  |  |                                (src[i+matches0:i+matches0+1] != '.')))) or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3969 |  |  |                             ((rule[0] == '$') and (i > 0) and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3970 |  |  |                              src[i-1].isalpha() and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3971 |  |  |                              ((not src[i+matches0:i+matches0+1].isalpha()) and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3972 |  |  |                               (src[i+matches0:i+matches0+1] != '.')))): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3973 |  |  |                         # look for continuation, if: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3974 |  |  |                         # matches > 1 und NO '-' in first string */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3975 |  |  |                         pos0 = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3976 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3977 |  |  |                         start3 = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3978 |  |  |                         start4 = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3979 |  |  |                         end3 = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3980 |  |  |                         end4 = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3981 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3982 |  |  |                         if (((matches > 1) and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3983 |  |  |                              src[i+matches:i+matches+1] and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3984 |  |  |                              (priority0 != ord('-')))): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3985 |  |  |                             char0 = src[i+matches-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3986 |  |  |                             pos0 = alpha_pos[char0] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3987 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3988 |  |  |                             if pos0 >= 2 and src[i+matches]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3989 |  |  |                                 xpos = pos0 - 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3990 |  |  |                                 pos0 = alpha_pos[src[i+matches]] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3991 |  |  |                                 start3 = phonet_hash_1[xpos, pos0] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3992 |  |  |                                 start4 = phonet_hash_1[xpos, 0] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3993 |  |  |                                 end3 = phonet_hash_2[xpos, pos0] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3994 |  |  |                                 end4 = phonet_hash_2[xpos, 0] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3995 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3996 |  |  |                                 # preserve rule priorities | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3997 |  |  |                                 if (((start4 >= 0) and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3998 |  |  |                                      ((start3 < 0) or (start4 < start3)))): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3999 |  |  |                                     pos0 = start3 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4000 |  |  |                                     start3 = start4 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4001 |  |  |                                     start4 = pos0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4002 |  |  |                                     pos0 = end3 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4003 |  |  |                                     end3 = end4 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4004 |  |  |                                     end4 = pos0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4005 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4006 |  |  |                                 if (end3 >= start4) and (start4 >= 0): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4007 |  |  |                                     if end4 > end3: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4008 |  |  |                                         end3 = end4 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4009 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4010 |  |  |                                     start4 = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4011 |  |  |                                     end4 = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4012 |  |  |                             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4013 |  |  |                                 pos0 = phonet_hash[char0] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4014 |  |  |                                 start3 = pos0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4015 |  |  |                                 end3 = 10000 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4016 |  |  |                                 start4 = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4017 |  |  |                                 end4 = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4018 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4019 |  |  |                             pos0 = start3 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4020 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4021 |  |  |                         # check continuation rules for src[i+matches] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4022 |  |  |                         if pos0 >= 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4023 |  |  |                             while ((_phonet_rules[pos0] is None) or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4024 |  |  |                                    (_phonet_rules[pos0][0] == char0)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4025 |  |  |                                 if pos0 > end3: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4026 |  |  |                                     if start4 > 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4027 |  |  |                                         pos0 = start4 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4028 |  |  |                                         start3 = start4 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4029 |  |  |                                         start4 = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4030 |  |  |                                         end3 = end4 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4031 |  |  |                                         end4 = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4032 |  |  |                                         continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4033 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4034 |  |  |                                     priority0 = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4035 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4036 |  |  |                                     # important | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4037 |  |  |                                     break | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4038 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4039 |  |  |                                 if (((_phonet_rules[pos0] is None) or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4040 |  |  |                                      (_phonet_rules[pos0 + mode] is None))): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4041 |  |  |                                     # no conversion rule available | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4042 |  |  |                                     pos0 += 3 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4043 |  |  |                                     continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4044 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4045 |  |  |                                 if trace: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4046 |  |  |                                     _trinfo('> > continuation rule no.', pos0, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4047 |  |  |                                             'is being checked', lang) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4048 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4049 |  |  |                                 # check whole string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4050 |  |  |                                 matches0 = matches | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4051 |  |  |                                 priority0 = 5 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4052 |  |  |                                 rule = _phonet_rules[pos0] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4053 |  |  |                                 rule = rule[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4054 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4055 |  |  |                                 while (rule and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4056 |  |  |                                        (src[i+matches0:i+matches0+1] == | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4057 |  |  |                                         rule[0]) and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4058 |  |  |                                        (not rule[0].isdigit() or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4059 |  |  |                                         (rule in '(-<^$'))): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4060 |  |  |                                     matches0 += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4061 |  |  |                                     rule = rule[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4062 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4063 |  |  |                                 if rule and rule[0] == '(': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4064 |  |  |                                     # check an array of letters | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4065 |  |  |                                     if ((src[i+matches0:i+matches0+1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4066 |  |  |                                          .isalpha() and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4067 |  |  |                                          (src[i+matches0] in rule[1:]))): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4068 |  |  |                                         matches0 += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4069 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4070 |  |  |                                         while rule and rule[0] != ')': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4071 |  |  |                                             rule = rule[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4072 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4073 |  |  |                                         # if rule[0] == ')': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4074 |  |  |                                         rule = rule[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4075 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4076 |  |  |                                 while rule and rule[0] == '-': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4077 |  |  |                                     # "matches0" is NOT decremented | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4078 |  |  |                                     # because of  "if (matches0 == matches)" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4079 |  |  |                                     rule = rule[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4080 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4081 |  |  |                                 if rule and rule[0] == '<': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4082 |  |  |                                     rule = rule[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4083 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4084 |  |  |                                 if rule and rule[0].isdigit(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4085 |  |  |                                     priority0 = int(rule[0]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4086 |  |  |                                     rule = rule[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4087 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4088 |  |  |                                 if (not rule or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4089 |  |  |                                         # rule == '^' is not possible here | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4090 |  |  |                                         ((rule[0] == '$') and not | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4091 |  |  |                                          src[i+matches0:i+matches0+1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4092 |  |  |                                          .isalpha() and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4093 |  |  |                                          (src[i+matches0:i+matches0+1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4094 |  |  |                                           != '.'))): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4095 |  |  |                                     if matches0 == matches: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4096 |  |  |                                         # this is only a partial string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4097 |  |  |                                         if trace: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4098 |  |  |                                             _trinfo('> > continuation ' + | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4099 |  |  |                                                     'rule no.', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4100 |  |  |                                                     pos0, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4101 |  |  |                                                     'not used (too short)', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4102 |  |  |                                                     lang) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4103 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4104 |  |  |                                         pos0 += 3 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4105 |  |  |                                         continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4106 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4107 |  |  |                                     if priority0 < priority: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4108 |  |  |                                         # priority is too low | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4109 |  |  |                                         if trace: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4110 |  |  |                                             _trinfo('> > continuation ' + | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4111 |  |  |                                                     'rule no.', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4112 |  |  |                                                     pos0, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4113 |  |  |                                                     'not used (priority)', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4114 |  |  |                                                     lang) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4115 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4116 |  |  |                                         pos0 += 3 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4117 |  |  |                                         continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4118 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4119 |  |  |                                     # continuation rule found | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4120 |  |  |                                     break | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4121 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4122 |  |  |                                 if trace: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4123 |  |  |                                     _trinfo('> > continuation rule no.', pos0, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4124 |  |  |                                             'not used', lang) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4125 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4126 |  |  |                                 pos0 += 3 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4127 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4128 |  |  |                             # end of "while" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4129 |  |  |                             if ((priority0 >= priority) and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4130 |  |  |                                     ((_phonet_rules[pos0] is not None) and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4131 |  |  |                                      (_phonet_rules[pos0][0] == char0))): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4132 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4133 |  |  |                                 if trace: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4134 |  |  |                                     _trinfo('> rule no.', pos, '', lang) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4135 |  |  |                                     _trinfo('> not used because of ' + | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4136 |  |  |                                             'continuation', pos0, '', lang) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4137 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4138 |  |  |                                 pos += 3 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4139 |  |  |                                 continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4140 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4141 |  |  |                         # replace string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4142 |  |  |                         if trace: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4143 |  |  |                             _trinfo('Rule no.', pos, 'is applied', lang) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4144 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4145 |  |  |                         if ((_phonet_rules[pos] and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4146 |  |  |                              ('<' in _phonet_rules[pos][1:]))): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4147 |  |  |                             priority0 = 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4148 |  |  |                         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4149 |  |  |                             priority0 = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4150 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4151 |  |  |                         rule = _phonet_rules[pos + mode] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4152 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4153 |  |  |                         if (priority0 == 1) and (zeta == 0): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4154 |  |  |                             # rule with '<' is applied | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4155 |  |  |                             if ((j > 0) and rule and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4156 |  |  |                                     ((dest[j-1] == char) or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4157 |  |  |                                      (dest[j-1] == rule[0]))): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4158 |  |  |                                 j -= 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4159 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4160 |  |  |                             zeta0 = 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4161 |  |  |                             zeta += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4162 |  |  |                             matches0 = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4163 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4164 |  |  |                             while rule and src[i+matches0]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4165 |  |  |                                 src = (src[0:i+matches0] + rule[0] + | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4166 |  |  |                                        src[i+matches0+1:]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4167 |  |  |                                 matches0 += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4168 |  |  |                                 rule = rule[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4169 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4170 |  |  |                             if matches0 < matches: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4171 |  |  |                                 src = (src[0:i+matches0] + | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4172 |  |  |                                        src[i+matches:]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4173 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4174 |  |  |                             char = src[i] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4175 |  |  |                         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4176 |  |  |                             i = i + matches - 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4177 |  |  |                             zeta = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4178 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4179 |  |  |                             while len(rule) > 1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4180 |  |  |                                 if (j == 0) or (dest[j - 1] != rule[0]): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4181 |  |  |                                     dest = (dest[0:j] + rule[0] + | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4182 |  |  |                                             dest[min(len(dest), j+1):]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4183 |  |  |                                     j += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4184 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4185 |  |  |                                 rule = rule[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4186 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4187 |  |  |                             # new "current char" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4188 |  |  |                             if not rule: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4189 |  |  |                                 rule = '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4190 |  |  |                                 char = '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4191 |  |  |                             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4192 |  |  |                                 char = rule[0] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4193 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4194 |  |  |                             if ((_phonet_rules[pos] and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4195 |  |  |                                  '^^' in _phonet_rules[pos][1:])): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4196 |  |  |                                 if char:  # pragma: no branch | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4197 |  |  |                                     dest = (dest[0:j] + char + | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4198 |  |  |                                             dest[min(len(dest), j + 1):]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4199 |  |  |                                     j += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4200 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4201 |  |  |                                 src = src[i + 1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4202 |  |  |                                 i = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4203 |  |  |                                 zeta0 = 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4204 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4205 |  |  |                         break | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4206 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4207 |  |  |                     pos += 3 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4208 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4209 |  |  |                     if pos > end1 and start2 > 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4210 |  |  |                         pos = start2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4211 |  |  |                         start1 = start2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4212 |  |  |                         end1 = end2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4213 |  |  |                         start2 = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4214 |  |  |                         end2 = -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4215 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4216 |  |  |             if zeta0 == 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4217 |  |  |                 if char and ((j == 0) or (dest[j-1] != char)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4218 |  |  |                     # delete multiple letters only | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4219 |  |  |                     dest = dest[0:j] + char + dest[min(j+1, term_length):] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4220 |  |  |                     j += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4221 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4222 |  |  |                 i += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4223 |  |  |                 zeta = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4224 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4225 |  |  |         dest = dest[0:j] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4226 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4227 |  |  |         return dest | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4228 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4229 |  |  |     _initialize_phonet(lang) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4230 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4231 |  |  |     word = normalize('NFKC', text_type(word)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4232 |  |  |     return _phonet(word, mode, lang, trace) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4233 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4234 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4235 |  |  | def spfc(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4236 |  |  |     """Return the Standardized Phonetic Frequency Code (SPFC) of a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4237 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4238 |  |  |     Standardized Phonetic Frequency Code is roughly Soundex-like. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4239 |  |  |     This implementation is based on page 19-21 of :cite:`Moore:1977`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4240 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4241 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4242 |  |  |     :returns: the SPFC value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4243 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4244 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4245 |  |  |     >>> spfc('Christopher Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4246 |  |  |     '01160' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4247 |  |  |     >>> spfc('Christopher Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4248 |  |  |     '01160' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4249 |  |  |     >>> spfc('Niall Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4250 |  |  |     '01660' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4251 |  |  |     >>> spfc('Niall Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4252 |  |  |     '01660' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4253 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4254 |  |  |     >>> spfc('L.Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4255 |  |  |     '01960' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4256 |  |  |     >>> spfc('R.Miller') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4257 |  |  |     '65490' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4258 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4259 |  |  |     >>> spfc(('L', 'Smith')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4260 |  |  |     '01960' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4261 |  |  |     >>> spfc(('R', 'Miller')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4262 |  |  |     '65490' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4263 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4264 |  |  |     _pf1 = dict(zip((ord(_) for _ in 'SZCKQVFPUWABLORDHIEMNXGJT'), | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4265 |  |  |                     '0011112222334445556666777')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4266 |  |  |     _pf2 = dict(zip((ord(_) for _ in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4267 |  |  |                      'SZCKQFPXABORDHIMNGJTUVWEL'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4268 |  |  |                     '0011122233445556677788899')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4269 |  |  |     _pf3 = dict(zip((ord(_) for _ in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4270 |  |  |                      'BCKQVDTFLPGJXMNRSZAEHIOUWY'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4271 |  |  |                     '00000112223334456677777777')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4272 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4273 |  |  |     _substitutions = (('DK', 'K'), ('DT', 'T'), ('SC', 'S'), ('KN', 'N'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4274 |  |  |                       ('MN', 'N')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4275 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4276 |  |  |     def _raise_word_ex(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4277 |  |  |         """Raise an AttributeError.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4278 |  |  |         raise AttributeError('word attribute must be a string with a space ' + | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4279 |  |  |                              'or period dividing the first and last names ' + | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4280 |  |  |                              'or a tuple/list consisting of the first and ' + | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4281 |  |  |                              'last names') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4282 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4283 |  |  |     if not word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4284 |  |  |         return '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4285 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4286 |  |  |     if isinstance(word, (str, text_type)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4287 |  |  |         names = word.split('.', 1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4288 |  |  |         if len(names) != 2: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4289 |  |  |             names = word.split(' ', 1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4290 |  |  |             if len(names) != 2: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4291 |  |  |                 _raise_word_ex() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4292 |  |  |     elif hasattr(word, '__iter__'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4293 |  |  |         if len(word) != 2: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4294 |  |  |             _raise_word_ex() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4295 |  |  |         names = word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4296 |  |  |     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4297 |  |  |         _raise_word_ex() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4298 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4299 |  |  |     names = [normalize('NFKD', text_type(_.strip() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4300 |  |  |                                          .replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4301 |  |  |                                          .upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4302 |  |  |              for _ in names] | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4303 |  |  |     code = '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4304 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4305 |  |  |     def steps_one_to_three(name): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4306 |  |  |         """Perform the first three steps of SPFC.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4307 |  |  |         # filter out non A-Z | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4308 |  |  |         name = ''.join(_ for _ in name if _ in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4309 |  |  |                        {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4310 |  |  |                         'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4311 |  |  |                         'W', 'X', 'Y', 'Z'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4312 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4313 |  |  |         # 1. In the field, convert DK to K, DT to T, SC to S, KN to N, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4314 |  |  |         # and MN to N | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4315 |  |  |         for subst in _substitutions: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4316 |  |  |             name = name.replace(subst[0], subst[1]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4317 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4318 |  |  |         # 2. In the name field, replace multiple letters with a single letter | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4319 |  |  |         name = _delete_consecutive_repeats(name) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4320 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4321 |  |  |         # 3. Remove vowels, W, H, and Y, but keep the first letter in the name | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4322 |  |  |         # field. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4323 |  |  |         if name: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4324 |  |  |             name = name[0] + ''.join(_ for _ in name[1:] if _ not in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4325 |  |  |                                      {'A', 'E', 'H', 'I', 'O', 'U', 'W', 'Y'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4326 |  |  |         return name | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4327 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4328 |  |  |     names = [steps_one_to_three(_) for _ in names] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4329 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4330 |  |  |     # 4. The first digit of the code is obtained using PF1 and the first letter | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4331 |  |  |     # of the name field. Remove this letter after coding. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4332 |  |  |     if names[1]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4333 |  |  |         code += names[1][0].translate(_pf1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4334 |  |  |         names[1] = names[1][1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4335 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4336 |  |  |     # 5. Using the last letters of the name, use Table PF3 to obtain the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4337 |  |  |     # second digit of the code. Use as many letters as possible and remove | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4338 |  |  |     # after coding. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4339 |  |  |     if names[1]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4340 |  |  |         if names[1][-3:] == 'STN' or names[1][-3:] == 'PRS': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4341 |  |  |             code += '8' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4342 |  |  |             names[1] = names[1][:-3] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4343 |  |  |         elif names[1][-2:] == 'SN': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4344 |  |  |             code += '8' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4345 |  |  |             names[1] = names[1][:-2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4346 |  |  |         elif names[1][-3:] == 'STR': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4347 |  |  |             code += '9' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4348 |  |  |             names[1] = names[1][:-3] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4349 |  |  |         elif names[1][-2:] in {'SR', 'TN', 'TD'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4350 |  |  |             code += '9' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4351 |  |  |             names[1] = names[1][:-2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4352 |  |  |         elif names[1][-3:] == 'DRS': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4353 |  |  |             code += '7' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4354 |  |  |             names[1] = names[1][:-3] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4355 |  |  |         elif names[1][-2:] in {'TR', 'MN'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4356 |  |  |             code += '7' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4357 |  |  |             names[1] = names[1][:-2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4358 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4359 |  |  |             code += names[1][-1].translate(_pf3) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4360 |  |  |             names[1] = names[1][:-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4361 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4362 |  |  |     # 6. The third digit is found using Table PF2 and the first character of | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4363 |  |  |     # the first name. Remove after coding. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4364 |  |  |     if names[0]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4365 |  |  |         code += names[0][0].translate(_pf2) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4366 |  |  |         names[0] = names[0][1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4367 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4368 |  |  |     # 7. The fourth digit is found using Table PF2 and the first character of | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4369 |  |  |     # the name field. If no letters remain use zero. After coding remove the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4370 |  |  |     # letter. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4371 |  |  |     # 8. The fifth digit is found in the same manner as the fourth using the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4372 |  |  |     # remaining characters of the name field if any. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4373 |  |  |     for _ in range(2): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4374 |  |  |         if names[1]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4375 |  |  |             code += names[1][0].translate(_pf2) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4376 |  |  |             names[1] = names[1][1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4377 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4378 |  |  |             code += '0' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4379 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4380 |  |  |     return code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4381 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4382 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4383 |  |  | def statistics_canada(word, maxlength=4): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4384 |  |  |     """Return the Statistics Canada code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4385 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4386 |  |  |     The original description of this algorithm could not be located, and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4387 |  |  |     may only have been specified in an unpublished TR. The coding does not | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4388 |  |  |     appear to be in use by Statistics Canada any longer. In its place, this is | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4389 |  |  |     an implementation of the "Census modified Statistics Canada name coding | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4390 |  |  |     procedure". | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4391 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4392 |  |  |     The modified version of this algorithm is described in Appendix B of | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4393 |  |  |      :cite:`Moore:1977`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4394 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4395 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4396 |  |  |     :param int maxlength: the maximum length (default 6) of the code to return | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4397 |  |  |     :param bool modified: indicates whether to use USDA modified algorithm | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4398 |  |  |     :returns: the Statistics Canada name code value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4399 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4400 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4401 |  |  |     >>> statistics_canada('Christopher') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4402 |  |  |     'CHRS' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4403 |  |  |     >>> statistics_canada('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4404 |  |  |     'NL' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4405 |  |  |     >>> statistics_canada('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4406 |  |  |     'SMTH' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4407 |  |  |     >>> statistics_canada('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4408 |  |  |     'SCHM' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4409 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4410 |  |  |     # uppercase, normalize, decompose, and filter non-A-Z out | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4411 |  |  |     word = normalize('NFKD', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4412 |  |  |     word = word.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4413 |  |  |     word = ''.join(c for c in word if c in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4414 |  |  |                    {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4415 |  |  |                     'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4416 |  |  |                     'Y', 'Z'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4417 |  |  |     if not word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4418 |  |  |         return '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4419 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4420 |  |  |     code = word[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4421 |  |  |     for vowel in {'A', 'E', 'I', 'O', 'U', 'Y'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4422 |  |  |         code = code.replace(vowel, '') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4423 |  |  |     code = word[0]+code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4424 |  |  |     code = _delete_consecutive_repeats(code) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4425 |  |  |     code = code.replace(' ', '') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4426 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4427 |  |  |     return code[:maxlength] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4428 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4429 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4430 |  |  | def lein(word, maxlength=4, zero_pad=True): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4431 |  |  |     """Return the Lein code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4432 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4433 |  |  |     This is Lein name coding, described in :cite:`Moore:1977`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4434 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4435 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4436 |  |  |     :param int maxlength: the maximum length (default 4) of the code to return | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4437 |  |  |     :param bool zero_pad: pad the end of the return value with 0s to achieve a | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4438 |  |  |         maxlength string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4439 |  |  |     :returns: the Lein code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4440 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4441 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4442 |  |  |     >>> lein('Christopher') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4443 |  |  |     'C351' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4444 |  |  |     >>> lein('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4445 |  |  |     'N300' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4446 |  |  |     >>> lein('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4447 |  |  |     'S210' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4448 |  |  |     >>> lein('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4449 |  |  |     'S521' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4450 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4451 |  |  |     _lein_translation = dict(zip((ord(_) for _ in | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4452 |  |  |                                   'BCDFGJKLMNPQRSTVXZ'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4453 |  |  |                                  '451455532245351455')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4454 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4455 |  |  |     # uppercase, normalize, decompose, and filter non-A-Z out | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4456 |  |  |     word = normalize('NFKD', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4457 |  |  |     word = word.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4458 |  |  |     word = ''.join(c for c in word if c in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4459 |  |  |                    {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4460 |  |  |                     'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4461 |  |  |                     'Y', 'Z'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4462 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4463 |  |  |     if not word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4464 |  |  |         return '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4465 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4466 |  |  |     code = word[0]  # Rule 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4467 |  |  |     word = word[1:].translate({32: None, 65: None, 69: None, 72: None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4468 |  |  |                                73: None, 79: None, 85: None, 87: None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4469 |  |  |                                89: None})  # Rule 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4470 |  |  |     word = _delete_consecutive_repeats(word)  # Rule 3 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4471 |  |  |     code += word.translate(_lein_translation)  # Rule 4 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4472 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4473 |  |  |     if zero_pad: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4474 |  |  |         code += ('0'*maxlength)  # Rule 4 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4475 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4476 |  |  |     return code[:maxlength] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4477 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4478 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4479 |  |  | def roger_root(word, maxlength=5, zero_pad=True): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4480 |  |  |     """Return the Roger Root code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4481 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4482 |  |  |     This is Roger Root name coding, described in :cite:`Moore:1977`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4483 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4484 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4485 |  |  |     :param int maxlength: the maximum length (default 5) of the code to return | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4486 |  |  |     :param bool zero_pad: pad the end of the return value with 0s to achieve a | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4487 |  |  |         maxlength string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4488 |  |  |     :returns: the Roger Root code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4489 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4490 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4491 |  |  |     >>> roger_root('Christopher') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4492 |  |  |     '06401' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4493 |  |  |     >>> roger_root('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4494 |  |  |     '02500' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4495 |  |  |     >>> roger_root('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4496 |  |  |     '00310' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4497 |  |  |     >>> roger_root('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4498 |  |  |     '06310' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4499 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4500 |  |  |     # uppercase, normalize, decompose, and filter non-A-Z out | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4501 |  |  |     word = normalize('NFKD', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4502 |  |  |     word = word.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4503 |  |  |     word = ''.join(c for c in word if c in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4504 |  |  |                    {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4505 |  |  |                     'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4506 |  |  |                     'Y', 'Z'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4507 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4508 |  |  |     if not word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4509 |  |  |         return '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4510 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4511 |  |  |     # '*' is used to prevent combining by _delete_consecutive_repeats() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4512 |  |  |     _init_patterns = {4: {'TSCH': '06'}, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4513 |  |  |                       3: {'TSH': '06', 'SCH': '06'}, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4514 |  |  |                       2: {'CE': '0*0', 'CH': '06', 'CI': '0*0', 'CY': '0*0', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4515 |  |  |                           'DG': '07', 'GF': '08', 'GM': '03', 'GN': '02', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4516 |  |  |                           'KN': '02', 'PF': '08', 'PH': '08', 'PN': '02', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4517 |  |  |                           'SH': '06', 'TS': '0*0', 'WR': '04'}, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4518 |  |  |                       1: {'A': '1', 'B': '09', 'C': '07', 'D': '01', 'E': '1', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4519 |  |  |                           'F': '08', 'G': '07', 'H': '2', 'I': '1', 'J': '3', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4520 |  |  |                           'K': '07', 'L': '05', 'M': '03', 'N': '02', 'O': '1', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4521 |  |  |                           'P': '09', 'Q': '07', 'R': '04', 'S': '0*0', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4522 |  |  |                           'T': '01', 'U': '1', 'V': '08', 'W': '4', 'X': '07', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4523 |  |  |                           'Y': '5', 'Z': '0*0'}} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4524 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4525 |  |  |     _med_patterns = {4: {'TSCH': '6'}, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4526 |  |  |                      3: {'TSH': '6', 'SCH': '6'}, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4527 |  |  |                      2: {'CE': '0', 'CH': '6', 'CI': '0', 'CY': '0', 'DG': '7', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4528 |  |  |                          'PH': '8', 'SH': '6', 'TS': '0'}, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4529 |  |  |                      1: {'B': '9', 'C': '7', 'D': '1', 'F': '8', 'G': '7', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4530 |  |  |                          'J': '6', 'K': '7', 'L': '5', 'M': '3', 'N': '2', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4531 |  |  |                          'P': '9', 'Q': '7', 'R': '4', 'S': '0', 'T': '1', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4532 |  |  |                          'V': '8', 'X': '7', 'Z': '0', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4533 |  |  |                          'A': '*', 'E': '*', 'H': '*', 'I': '*', 'O': '*', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4534 |  |  |                          'U': '*', 'W': '*', 'Y': '*'}} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4535 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4536 |  |  |     code = '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4537 |  |  |     pos = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4538 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4539 |  |  |     # Do first digit(s) first | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4540 |  |  |     for num in range(4, 0, -1): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4541 |  |  |         if word[:num] in _init_patterns[num]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4542 |  |  |             code = _init_patterns[num][word[:num]] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4543 |  |  |             pos += num | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4544 |  |  |             break | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4545 |  |  |     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4546 |  |  |         pos += 1  # Advance if nothing is recognized | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4547 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4548 |  |  |     # Then code subsequent digits | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4549 |  |  |     while pos < len(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4550 |  |  |         for num in range(4, 0, -1): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4551 |  |  |             if word[pos:pos+num] in _med_patterns[num]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4552 |  |  |                 code += _med_patterns[num][word[pos:pos+num]] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4553 |  |  |                 pos += num | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4554 |  |  |                 break | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4555 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4556 |  |  |             pos += 1  # Advance if nothing is recognized | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4557 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4558 |  |  |     code = _delete_consecutive_repeats(code) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4559 |  |  |     code = code.replace('*', '') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4560 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4561 |  |  |     if zero_pad: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4562 |  |  |         code += '0'*maxlength | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4563 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4564 |  |  |     return code[:maxlength] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4565 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4566 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4567 |  |  | def onca(word, maxlength=4, zero_pad=True): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4568 |  |  |     """Return the Oxford Name Compression Algorithm (ONCA) code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4569 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4570 |  |  |     This is the Oxford Name Compression Algorithm, based on :cite:`Gill:1997`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4571 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4572 |  |  |     I can find no complete description of the "anglicised version of the NYSIIS | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4573 |  |  |     method" identified as the first step in this algorithm, so this is likely | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4574 |  |  |     not a precisely correct implementation, in that it employs the standard | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4575 |  |  |     NYSIIS algorithm. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4576 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4577 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4578 |  |  |     :param int maxlength: the maximum length (default 5) of the code to return | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4579 |  |  |     :param bool zero_pad: pad the end of the return value with 0s to achieve a | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4580 |  |  |         maxlength string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4581 |  |  |     :returns: the ONCA code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4582 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4583 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4584 |  |  |     >>> onca('Christopher') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4585 |  |  |     'C623' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4586 |  |  |     >>> onca('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4587 |  |  |     'N400' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4588 |  |  |     >>> onca('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4589 |  |  |     'S530' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4590 |  |  |     >>> onca('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4591 |  |  |     'S530' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4592 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4593 |  |  |     # In the most extreme case, 3 characters of NYSIIS input can be compressed | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4594 |  |  |     # to one character of output, so give it triple the maxlength. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4595 |  |  |     return soundex(nysiis(word, maxlength=maxlength*3), maxlength, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4596 |  |  |                    zero_pad=zero_pad) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4597 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4598 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4599 |  |  | def eudex(word, maxlength=8): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4600 |  |  |     """Return the eudex phonetic hash of a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4601 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4602 |  |  |     This implementation of eudex phonetic hashing is based on the specification | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4603 |  |  |     (not the reference implementation) at :cite:`Ticki:2016`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4604 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4605 |  |  |     Further details can be found at :cite:`Ticki:2016b`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4606 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4607 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4608 |  |  |     :param int maxlength: the length of the code returned (defaults to 8) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4609 |  |  |     :returns: the eudex hash | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4610 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4611 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4612 |  |  |     _trailing_phones = { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4613 |  |  |         'a': 0,  # a | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4614 |  |  |         'b': 0b01001000,  # b | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4615 |  |  |         'c': 0b00001100,  # c | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4616 |  |  |         'd': 0b00011000,  # d | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4617 |  |  |         'e': 0,  # e | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4618 |  |  |         'f': 0b01000100,  # f | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4619 |  |  |         'g': 0b00001000,  # g | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4620 |  |  |         'h': 0b00000100,  # h | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4621 |  |  |         'i': 1,  # i | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4622 |  |  |         'j': 0b00000101,  # j | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4623 |  |  |         'k': 0b00001001,  # k | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4624 |  |  |         'l': 0b10100000,  # l | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4625 |  |  |         'm': 0b00000010,  # m | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4626 |  |  |         'n': 0b00010010,  # n | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4627 |  |  |         'o': 0,  # o | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4628 |  |  |         'p': 0b01001001,  # p | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4629 |  |  |         'q': 0b10101000,  # q | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4630 |  |  |         'r': 0b10100001,  # r | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4631 |  |  |         's': 0b00010100,  # s | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4632 |  |  |         't': 0b00011101,  # t | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4633 |  |  |         'u': 1,  # u | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4634 |  |  |         'v': 0b01000101,  # v | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4635 |  |  |         'w': 0b00000000,  # w | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4636 |  |  |         'x': 0b10000100,  # x | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4637 |  |  |         'y': 1,  # y | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4638 |  |  |         'z': 0b10010100,  # z | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4639 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4640 |  |  |         'ß': 0b00010101,  # ß | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4641 |  |  |         'à': 0,  # à | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4642 |  |  |         'á': 0,  # á | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4643 |  |  |         'â': 0,  # â | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4644 |  |  |         'ã': 0,  # ã | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4645 |  |  |         'ä': 0,  # ä[æ] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4646 |  |  |         'å': 1,  # å[oː] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4647 |  |  |         'æ': 0,  # æ[æ] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4648 |  |  |         'ç': 0b10010101,  # ç[t͡ʃ] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4649 |  |  |         'è': 1,  # è | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4650 |  |  |         'é': 1,  # é | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4651 |  |  |         'ê': 1,  # ê | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4652 |  |  |         'ë': 1,  # ë | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4653 |  |  |         'ì': 1,  # ì | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4654 |  |  |         'í': 1,  # í | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4655 |  |  |         'î': 1,  # î | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4656 |  |  |         'ï': 1,  # ï | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4657 |  |  |         'ð': 0b00010101,  # ð[ð̠](represented as a non-plosive T) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4658 |  |  |         'ñ': 0b00010111,  # ñ[nj](represented as a combination of n and j) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4659 |  |  |         'ò': 0,  # ò | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4660 |  |  |         'ó': 0,  # ó | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4661 |  |  |         'ô': 0,  # ô | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4662 |  |  |         'õ': 0,  # õ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4663 |  |  |         'ö': 1,  # ö[ø] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4664 |  |  |         '÷': 0b11111111,  # ÷ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4665 |  |  |         'ø': 1,  # ø[ø] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4666 |  |  |         'ù': 1,  # ù | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4667 |  |  |         'ú': 1,  # ú | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4668 |  |  |         'û': 1,  # û | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4669 |  |  |         'ü': 1,  # ü | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4670 |  |  |         'ý': 1,  # ý | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4671 |  |  |         'þ': 0b00010101,  # þ[ð̠](represented as a non-plosive T) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4672 |  |  |         'ÿ': 1,  # ÿ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4673 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4674 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4675 |  |  |     _initial_phones = { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4676 |  |  |         'a': 0b10000100,  # a* | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4677 |  |  |         'b': 0b00100100,  # b | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4678 |  |  |         'c': 0b00000110,  # c | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4679 |  |  |         'd': 0b00001100,  # d | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4680 |  |  |         'e': 0b11011000,  # e* | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4681 |  |  |         'f': 0b00100010,  # f | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4682 |  |  |         'g': 0b00000100,  # g | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4683 |  |  |         'h': 0b00000010,  # h | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4684 |  |  |         'i': 0b11111000,  # i* | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4685 |  |  |         'j': 0b00000011,  # j | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4686 |  |  |         'k': 0b00000101,  # k | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4687 |  |  |         'l': 0b01010000,  # l | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4688 |  |  |         'm': 0b00000001,  # m | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4689 |  |  |         'n': 0b00001001,  # n | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4690 |  |  |         'o': 0b10010100,  # o* | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4691 |  |  |         'p': 0b00100101,  # p | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4692 |  |  |         'q': 0b01010100,  # q | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4693 |  |  |         'r': 0b01010001,  # r | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4694 |  |  |         's': 0b00001010,  # s | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4695 |  |  |         't': 0b00001110,  # t | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4696 |  |  |         'u': 0b11100000,  # u* | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4697 |  |  |         'v': 0b00100011,  # v | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4698 |  |  |         'w': 0b00000000,  # w | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4699 |  |  |         'x': 0b01000010,  # x | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4700 |  |  |         'y': 0b11100100,  # y* | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4701 |  |  |         'z': 0b01001010,  # z | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4702 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4703 |  |  |         'ß': 0b00001011,  # ß | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4704 |  |  |         'à': 0b10000101,  # à | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4705 |  |  |         'á': 0b10000101,  # á | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4706 |  |  |         'â': 0b10000000,  # â | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4707 |  |  |         'ã': 0b10000110,  # ã | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4708 |  |  |         'ä': 0b10100110,  # ä [æ] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4709 |  |  |         'å': 0b11000010,  # å [oː] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4710 |  |  |         'æ': 0b10100111,  # æ [æ] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4711 |  |  |         'ç': 0b01010100,  # ç [t͡ʃ] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4712 |  |  |         'è': 0b11011001,  # è | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4713 |  |  |         'é': 0b11011001,  # é | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4714 |  |  |         'ê': 0b11011001,  # ê | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4715 |  |  |         'ë': 0b11000110,  # ë [ə] or [œ] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4716 |  |  |         'ì': 0b11111001,  # ì | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4717 |  |  |         'í': 0b11111001,  # í | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4718 |  |  |         'î': 0b11111001,  # î | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4719 |  |  |         'ï': 0b11111001,  # ï | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4720 |  |  |         'ð': 0b00001011,  # ð [ð̠] (represented as a non-plosive T) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4721 |  |  |         'ñ': 0b00001011,  # ñ [nj] (represented as a combination of n and j) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4722 |  |  |         'ò': 0b10010101,  # ò | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4723 |  |  |         'ó': 0b10010101,  # ó | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4724 |  |  |         'ô': 0b10010101,  # ô | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4725 |  |  |         'õ': 0b10010101,  # õ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4726 |  |  |         'ö': 0b11011100,  # ö [œ] or [ø] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4727 |  |  |         '÷': 0b11111111,  # ÷ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4728 |  |  |         'ø': 0b11011101,  # ø [œ] or [ø] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4729 |  |  |         'ù': 0b11100001,  # ù | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4730 |  |  |         'ú': 0b11100001,  # ú | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4731 |  |  |         'û': 0b11100001,  # û | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4732 |  |  |         'ü': 0b11100101,  # ü | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4733 |  |  |         'ý': 0b11100101,  # ý | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4734 |  |  |         'þ': 0b00001011,  # þ [ð̠] (represented as a non-plosive T) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4735 |  |  |         'ÿ': 0b11100101,  # ÿ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4736 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4737 |  |  |     # Lowercase input & filter unknown characters | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4738 |  |  |     word = ''.join(char for char in word.lower() if char in _initial_phones) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4739 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4740 |  |  |     if not word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4741 |  |  |         word = '÷' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4742 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4743 |  |  |     # Perform initial eudex coding of each character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4744 |  |  |     values = [_initial_phones[word[0]]] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4745 |  |  |     values += [_trailing_phones[char] for char in word[1:]] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4746 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4747 |  |  |     # Right-shift by one to determine if second instance should be skipped | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4748 |  |  |     shifted_values = [_ >> 1 for _ in values] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4749 |  |  |     condensed_values = [values[0]] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4750 |  |  |     for n in range(1, len(shifted_values)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4751 |  |  |         if shifted_values[n] != shifted_values[n-1]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4752 |  |  |             condensed_values.append(values[n]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4753 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4754 |  |  |     # Add padding after first character & trim beyond maxlength | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4755 |  |  |     values = ([condensed_values[0]] + | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4756 |  |  |               [0]*max(0, maxlength - len(condensed_values)) + | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4757 |  |  |               condensed_values[1:maxlength]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4758 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4759 |  |  |     # Combine individual character values into eudex hash | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4760 |  |  |     hash_value = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4761 |  |  |     for val in values: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4762 |  |  |         hash_value = (hash_value << 8) | val | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4763 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4764 |  |  |     return hash_value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4765 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4766 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4767 |  |  | def haase_phonetik(word, primary_only=False): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4768 |  |  |     """Return the Haase Phonetik (numeric output) code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4769 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4770 |  |  |     Based on the algorithm described at :cite:`Prante:2015`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4771 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4772 |  |  |     Based on the original :cite:`Haase:2000`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4773 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4774 |  |  |     While the output code is numeric, it is nevertheless a str. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4775 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4776 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4777 |  |  |     :returns: the Haase Phonetik value as a numeric string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4778 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4779 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4780 |  |  |     def _after(word, i, letters): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4781 |  |  |         """Return True if word[i] follows one of the supplied letters.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4782 |  |  |         if i > 0 and word[i-1] in letters: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4783 |  |  |             return True | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4784 |  |  |         return False | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4785 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4786 |  |  |     def _before(word, i, letters): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4787 |  |  |         """Return True if word[i] precedes one of the supplied letters.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4788 |  |  |         if i+1 < len(word) and word[i+1] in letters: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4789 |  |  |             return True | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4790 |  |  |         return False | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4791 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4792 |  |  |     _vowels = {'A', 'E', 'I', 'J', 'O', 'U', 'Y'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4793 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4794 |  |  |     word = normalize('NFKD', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4795 |  |  |     word = word.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4796 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4797 |  |  |     word = word.replace('Ä', 'AE') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4798 |  |  |     word = word.replace('Ö', 'OE') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4799 |  |  |     word = word.replace('Ü', 'UE') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4800 |  |  |     word = ''.join(c for c in word if c in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4801 |  |  |                    {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4802 |  |  |                     'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4803 |  |  |                     'Y', 'Z'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4804 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4805 |  |  |     # Nothing to convert, return base case | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4806 |  |  |     if not word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4807 |  |  |         return '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4808 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4809 |  |  |     variants = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4810 |  |  |     if primary_only: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4811 |  |  |         variants = [word] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4812 |  |  |     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4813 |  |  |         pos = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4814 |  |  |         if word[:2] == 'CH': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4815 |  |  |             variants.append(('CH', 'SCH')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4816 |  |  |             pos += 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4817 |  |  |         len_3_vars = {'OWN': 'AUN', 'WSK': 'RSK', 'SCH': 'CH', 'GLI': 'LI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4818 |  |  |                       'AUX': 'O', 'EUX': 'O'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4819 |  |  |         while pos < len(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4820 |  |  |             if word[pos:pos+4] == 'ILLE': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4821 |  |  |                 variants.append(('ILLE', 'I')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4822 |  |  |                 pos += 4 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4823 |  |  |             elif word[pos:pos+3] in len_3_vars: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4824 |  |  |                 variants.append((word[pos:pos+3], len_3_vars[word[pos:pos+3]])) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4825 |  |  |                 pos += 3 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4826 |  |  |             elif word[pos:pos+2] == 'RB': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4827 |  |  |                 variants.append(('RB', 'RW')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4828 |  |  |                 pos += 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4829 |  |  |             elif len(word[pos:]) == 3 and word[pos:] == 'EAU': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4830 |  |  |                 variants.append(('EAU', 'O')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4831 |  |  |                 pos += 3 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4832 |  |  |             elif len(word[pos:]) == 1 and word[pos:] in {'A', 'O'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4833 |  |  |                 if word[pos:] == 'O': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4834 |  |  |                     variants.append(('O', 'OW')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4835 |  |  |                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4836 |  |  |                     variants.append(('A', 'AR')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4837 |  |  |                 pos += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4838 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4839 |  |  |                 variants.append((word[pos],)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4840 |  |  |                 pos += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4841 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4842 |  |  |         variants = [''.join(letters) for letters in product(*variants)] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4843 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4844 |  |  |     def _haase_code(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4845 |  |  |         sdx = '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4846 |  |  |         for i in range(len(word)): | 
            
                                                                                                            
                            
            
                                                                    
                                                                                                        
            
            
                | 4847 |  | View Code Duplication |             if word[i] in _vowels: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4848 |  |  |                 sdx += '9' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4849 |  |  |             elif word[i] == 'B': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4850 |  |  |                 sdx += '1' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4851 |  |  |             elif word[i] == 'P': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4852 |  |  |                 if _before(word, i, {'H'}): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4853 |  |  |                     sdx += '3' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4854 |  |  |                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4855 |  |  |                     sdx += '1' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4856 |  |  |             elif word[i] in {'D', 'T'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4857 |  |  |                 if _before(word, i, {'C', 'S', 'Z'}): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4858 |  |  |                     sdx += '8' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4859 |  |  |                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4860 |  |  |                     sdx += '2' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4861 |  |  |             elif word[i] in {'F', 'V', 'W'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4862 |  |  |                 sdx += '3' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4863 |  |  |             elif word[i] in {'G', 'K', 'Q'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4864 |  |  |                 sdx += '4' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4865 |  |  |             elif word[i] == 'C': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4866 |  |  |                 if _after(word, i, {'S', 'Z'}): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4867 |  |  |                     sdx += '8' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4868 |  |  |                 elif i == 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4869 |  |  |                     if _before(word, i, {'A', 'H', 'K', 'L', 'O', 'Q', 'R', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4870 |  |  |                                          'U', 'X'}): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4871 |  |  |                         sdx += '4' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4872 |  |  |                     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4873 |  |  |                         sdx += '8' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4874 |  |  |                 elif _before(word, i, {'A', 'H', 'K', 'O', 'Q', 'U', 'X'}): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4875 |  |  |                     sdx += '4' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4876 |  |  |                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4877 |  |  |                     sdx += '8' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4878 |  |  |             elif word[i] == 'X': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4879 |  |  |                 if _after(word, i, {'C', 'K', 'Q'}): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4880 |  |  |                     sdx += '8' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4881 |  |  |                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4882 |  |  |                     sdx += '48' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4883 |  |  |             elif word[i] == 'L': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4884 |  |  |                 sdx += '5' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4885 |  |  |             elif word[i] in {'M', 'N'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4886 |  |  |                 sdx += '6' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4887 |  |  |             elif word[i] == 'R': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4888 |  |  |                 sdx += '7' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4889 |  |  |             elif word[i] in {'S', 'Z'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4890 |  |  |                 sdx += '8' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4891 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4892 |  |  |         sdx = _delete_consecutive_repeats(sdx) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4893 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4894 |  |  |         # if sdx: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4895 |  |  |         #     sdx = sdx[0] + sdx[1:].replace('9', '') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4896 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4897 |  |  |         return sdx | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4898 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4899 |  |  |     return tuple(_haase_code(word) for word in variants) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4900 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4901 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4902 |  |  | def reth_schek_phonetik(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4903 |  |  |     """Return Reth-Schek Phonetik code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4904 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4905 |  |  |     This algorithm is proposed in :cite:`Reth:1977`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4906 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4907 |  |  |     Since I couldn't secure a copy of that document (maybe I'll look for it | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4908 |  |  |     next time I'm in Germany), this implementation is based on what I could | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4909 |  |  |     glean from the implementations published by German Record Linkage | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4910 |  |  |     Center (www.record-linkage.de): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4911 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4912 |  |  |     - Privacy-preserving Record Linkage (PPRL) (in R) :cite:`Rukasz:2018` | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4913 |  |  |     - Merge ToolBox (in Java) :cite:`Schnell:2004` | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4914 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4915 |  |  |     Rules that are unclear: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4916 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4917 |  |  |     - Should 'C' become 'G' or 'Z'? (PPRL has both, 'Z' rule blocked) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4918 |  |  |     - Should 'CC' become 'G'? (PPRL has blocked 'CK' that may be typo) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4919 |  |  |     - Should 'TUI' -> 'ZUI' rule exist? (PPRL has rule, but I can't | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4920 |  |  |       think of a German word with '-tui-' in it.) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4921 |  |  |     - Should we really change 'SCH' -> 'CH' and then 'CH' -> 'SCH'? | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4922 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4923 |  |  |     :param word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4924 |  |  |     :return: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4925 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4926 |  |  |     replacements = {3: {'AEH': 'E', 'IEH': 'I', 'OEH': 'OE', 'UEH': 'UE', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4927 |  |  |                         'SCH': 'CH', 'ZIO': 'TIO', 'TIU': 'TIO', 'ZIU': 'TIO', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4928 |  |  |                         'CHS': 'X', 'CKS': 'X', 'AEU': 'OI'}, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4929 |  |  |                     2: {'LL': 'L', 'AA': 'A', 'AH': 'A', 'BB': 'B', 'PP': 'B', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4930 |  |  |                         'BP': 'B', 'PB': 'B', 'DD': 'D', 'DT': 'D', 'TT': 'D', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4931 |  |  |                         'TH': 'D', 'EE': 'E', 'EH': 'E', 'AE': 'E', 'FF': 'F', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4932 |  |  |                         'PH': 'F', 'KK': 'K', 'GG': 'G', 'GK': 'G', 'KG': 'G', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4933 |  |  |                         'CK': 'G', 'CC': 'C', 'IE': 'I', 'IH': 'I', 'MM': 'M', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4934 |  |  |                         'NN': 'N', 'OO': 'O', 'OH': 'O', 'SZ': 'S', 'UH': 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4935 |  |  |                         'GS': 'X', 'KS': 'X', 'TZ': 'Z', 'AY': 'AI', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4936 |  |  |                         'EI': 'AI', 'EY': 'AI', 'EU': 'OI', 'RR': 'R', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4937 |  |  |                         'SS': 'S', 'KW': 'QU'}, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4938 |  |  |                     1: {'P': 'B', 'T': 'D', 'V': 'F', 'W': 'F', 'C': 'G', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4939 |  |  |                         'K': 'G', 'Y': 'I'}} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4940 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4941 |  |  |     # Uppercase | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4942 |  |  |     word = word.upper() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4943 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4944 |  |  |     # Replace umlauts/eszett | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4945 |  |  |     word = word.replace('Ä', 'AE') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4946 |  |  |     word = word.replace('Ö', 'OE') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4947 |  |  |     word = word.replace('Ü', 'UE') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4948 |  |  |     word = word.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4949 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4950 |  |  |     # Main loop, using above replacements table | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4951 |  |  |     pos = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4952 |  |  |     while pos < len(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4953 |  |  |         for num in range(3, 0, -1): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4954 |  |  |             if word[pos:pos+num] in replacements[num]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4955 |  |  |                 word = (word[:pos] + replacements[num][word[pos:pos+num]] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4956 |  |  |                         + word[pos+num:]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4957 |  |  |                 pos += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4958 |  |  |                 break | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4959 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4960 |  |  |             pos += 1  # Advance if nothing is recognized | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4961 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4962 |  |  |     # Change 'CH' back(?) to 'SCH' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4963 |  |  |     word = word.replace('CH', 'SCH') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4964 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4965 |  |  |     # Replace final sequences | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4966 |  |  |     if word[-2:] == 'ER': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4967 |  |  |         word = word[:-2]+'R' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4968 |  |  |     elif word[-2:] == 'EL': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4969 |  |  |         word = word[:-2]+'L' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4970 |  |  |     elif word[-1] == 'H': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4971 |  |  |         word = word[:-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4972 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4973 |  |  |     return word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4974 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4975 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4976 |  |  | def fonem(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4977 |  |  |     """Return the FONEM code of a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4978 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4979 |  |  |     FONEM is a phonetic algorithm designed for French (particularly surnames in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4980 |  |  |     Saguenay, Canada), defined in :cite:`Bouchard:1981`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4981 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4982 |  |  |     Guillaume Plique's Javascript implementation :cite:`Plique:2018` at | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4983 |  |  |     https://github.com/Yomguithereal/talisman/blob/master/src/phonetics/french/fonem.js | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4984 |  |  |     was also consulted for this implementation. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4985 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4986 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4987 |  |  |     :returns: the FONEM code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4988 |  |  |     :rtype: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4989 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4990 |  |  |     # I don't see a sane way of doing this without regexps :( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4991 |  |  |     rule_table = { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4992 |  |  |         # Vowels & groups of vowels | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4993 |  |  |         'V-1':     (re_compile('E?AU'), 'O'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4994 |  |  |         'V-2,5':   (re_compile('(E?AU|O)L[TX]$'), 'O'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4995 |  |  |         'V-3,4':   (re_compile('E?AU[TX]$'), 'O'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4996 |  |  |         'V-6':     (re_compile('E?AUL?D$'), 'O'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4997 |  |  |         'V-7':     (re_compile(r'(?<!G)AY$'), 'E'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4998 |  |  |         'V-8':     (re_compile('EUX$'), 'EU'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4999 |  |  |         'V-9':     (re_compile('EY(?=$|[BCDFGHJKLMNPQRSTVWXZ])'), 'E'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5000 |  |  |         'V-10':    ('Y', 'I'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5001 |  |  |         'V-11':    (re_compile('(?<=[AEIOUY])I(?=[AEIOUY])'), 'Y'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5002 |  |  |         'V-12':    (re_compile('(?<=[AEIOUY])ILL'), 'Y'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5003 |  |  |         'V-13':    (re_compile('OU(?=[AEOU]|I(?!LL))'), 'W'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5004 |  |  |         'V-14':    (re_compile(r'([AEIOUY])(?=\1)'), ''), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5005 |  |  |         # Nasal vowels | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5006 |  |  |         'V-15':    (re_compile('[AE]M(?=[BCDFGHJKLMPQRSTVWXZ])(?!$)'), 'EN'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5007 |  |  |         'V-16':    (re_compile('OM(?=[BCDFGHJKLMPQRSTVWXZ])'), 'ON'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5008 |  |  |         'V-17':    (re_compile('AN(?=[BCDFGHJKLMNPQRSTVWXZ])'), 'EN'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5009 |  |  |         'V-18':    (re_compile('(AI[MN]|EIN)(?=[BCDFGHJKLMNPQRSTVWXZ]|$)'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5010 |  |  |                     'IN'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5011 |  |  |         'V-19':    (re_compile('B(O|U|OU)RNE?$'), 'BURN'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5012 |  |  |         'V-20':    (re_compile('(^IM|(?<=[BCDFGHJKLMNPQRSTVWXZ])' + | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5013 |  |  |                                'IM(?=[BCDFGHJKLMPQRSTVWXZ]))'), 'IN'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5014 |  |  |         # Consonants and groups of consonants | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5015 |  |  |         'C-1':     ('BV', 'V'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5016 |  |  |         'C-2':     (re_compile('(?<=[AEIOUY])C(?=[EIY])'), 'SS'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5017 |  |  |         'C-3':     (re_compile('(?<=[BDFGHJKLMNPQRSTVWZ])C(?=[EIY])'), 'S'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5018 |  |  |         'C-4':     (re_compile('^C(?=[EIY])'), 'S'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5019 |  |  |         'C-5':     (re_compile('^C(?=[OUA])'), 'K'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5020 |  |  |         'C-6':     (re_compile('(?<=[AEIOUY])C$'), 'K'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5021 |  |  |         'C-7':     (re_compile('C(?=[BDFGJKLMNPQRSTVWXZ])'), 'K'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5022 |  |  |         'C-8':     (re_compile('CC(?=[AOU])'), 'K'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5023 |  |  |         'C-9':     (re_compile('CC(?=[EIY])'), 'X'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5024 |  |  |         'C-10':    (re_compile('G(?=[EIY])'), 'J'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5025 |  |  |         'C-11':    (re_compile('GA(?=I?[MN])'), 'G#'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5026 |  |  |         'C-12':    (re_compile('GE(O|AU)'), 'JO'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5027 |  |  |         'C-13':    (re_compile('GNI(?=[AEIOUY])'), 'GN'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5028 |  |  |         'C-14':    (re_compile('(?<![PCS])H'), ''), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5029 |  |  |         'C-15':    ('JEA', 'JA'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5030 |  |  |         'C-16':    (re_compile('^MAC(?=[BCDFGHJKLMNPQRSTVWXZ])'), 'MA#'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5031 |  |  |         'C-17':    (re_compile('^MC'), 'MA#'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5032 |  |  |         'C-18':    ('PH', 'F'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5033 |  |  |         'C-19':    ('QU', 'K'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5034 |  |  |         'C-20':    (re_compile('^SC(?=[EIY])'), 'S'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5035 |  |  |         'C-21':    (re_compile('(?<=.)SC(?=[EIY])'), 'SS'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5036 |  |  |         'C-22':    (re_compile('(?<=.)SC(?=[AOU])'), 'SK'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5037 |  |  |         'C-23':    ('SH', 'CH'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5038 |  |  |         'C-24':    (re_compile('TIA$'), 'SSIA'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5039 |  |  |         'C-25':    (re_compile('(?<=[AIOUY])W'), ''), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5040 |  |  |         'C-26':    (re_compile('X[CSZ]'), 'X'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5041 |  |  |         'C-27':    (re_compile('(?<=[AEIOUY])Z|(?<=[BCDFGHJKLMNPQRSTVWXZ])' + | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5042 |  |  |                                'Z(?=[BCDFGHJKLMNPQRSTVWXZ])'), 'S'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5043 |  |  |         'C-28':    (re_compile(r'([BDFGHJKMNPQRTVWXZ])\1'), r'\1'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5044 |  |  |         'C-28a':   (re_compile('CC(?=[BCDFGHJKLMNPQRSTVWXZ]|$)'), 'C'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5045 |  |  |         'C-28b':   (re_compile('((?<=[BCDFGHJKLMNPQRSTVWXZ])|^)SS'), 'S'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5046 |  |  |         'C-28bb':  (re_compile('SS(?=[BCDFGHJKLMNPQRSTVWXZ]|$)'), 'S'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5047 |  |  |         'C-28c':   (re_compile('((?<=[^I])|^)LL'), 'L'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5048 |  |  |         'C-28d':   (re_compile('ILE$'), 'ILLE'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5049 |  |  |         'C-29':    (re_compile('(ILS|[CS]H|[MN]P|R[CFKLNSX])$|([BCDFGHJKL' + | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5050 |  |  |                                'MNPQRSTVWXZ])[BCDFGHJKLMNPQRSTVWXZ]$'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5051 |  |  |                     lambda m: (m.group(1) or '') + (m.group(2) or '')), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5052 |  |  |         'C-30,32': (re_compile('^(SA?INT?|SEI[NM]|CINQ?|ST)(?!E)-?'), 'ST-'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5053 |  |  |         'C-31,33': (re_compile('^(SAINTE|STE)-?'), 'STE-'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5054 |  |  |         # Rules to undo rule bleeding prevention in C-11, C-16, C-17 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5055 |  |  |         'C-34':    ('G#', 'GA'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5056 |  |  |         'C-35':    ('MA#', 'MAC') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5057 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5058 |  |  |     rule_order = [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5059 |  |  |         'V-14', 'C-28', 'C-28a', 'C-28b', 'C-28bb', 'C-28c', 'C-28d', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5060 |  |  |         'C-12', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5061 |  |  |         'C-8', 'C-9', 'C-10', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5062 |  |  |         'C-16', 'C-17', 'C-2', 'C-3', 'C-7', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5063 |  |  |         'V-2,5', 'V-3,4', 'V-6', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5064 |  |  |         'V-1', 'C-14', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5065 |  |  |         'C-31,33', 'C-30,32', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5066 |  |  |         'C-11', 'V-15', 'V-17', 'V-18', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5067 |  |  |         'V-7', 'V-8', 'V-9', 'V-10', 'V-11', 'V-12', 'V-13', 'V-16', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5068 |  |  |         'V-19', 'V-20', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5069 |  |  |         'C-1', 'C-4', 'C-5', 'C-6', 'C-13', 'C-15', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5070 |  |  |         'C-18', 'C-19', 'C-20', 'C-21', 'C-22', 'C-23', 'C-24', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5071 |  |  |         'C-25', 'C-26', 'C-27', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5072 |  |  |         'C-29', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5073 |  |  |         'V-14', 'C-28', 'C-28a', 'C-28b', 'C-28bb', 'C-28c', 'C-28d', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5074 |  |  |         'C-34', 'C-35' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5075 |  |  |     ] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5076 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5077 |  |  |     # normalize, upper-case, and filter non-French letters | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5078 |  |  |     word = normalize('NFKD', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5079 |  |  |     word = word.translate({198: 'AE', 338: 'OE'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5080 |  |  |     word = ''.join(c for c in word if c in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5081 |  |  |                    {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5082 |  |  |                     'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5083 |  |  |                     'Y', 'Z', '-'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5084 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5085 |  |  |     for rule in rule_order: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5086 |  |  |         regex, repl = rule_table[rule] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5087 |  |  |         if isinstance(regex, text_type): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5088 |  |  |             word = word.replace(regex, repl) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5089 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5090 |  |  |             word = regex.sub(repl, word) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5091 |  |  |         # print(rule, word) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5092 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5093 |  |  |     return word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5094 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5095 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5096 |  |  | def parmar_kumbharana(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5097 |  |  |     """Return the Parmar-Kumbharana encoding of a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5098 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5099 |  |  |     This is based on the phonetic algorithm proposed in :cite:`Parmar:2014`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5100 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5101 |  |  |     :param word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5102 |  |  |     :return: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5103 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5104 |  |  |     rule_table = {4: {'OUGH': 'F'}, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5105 |  |  |                   3: {'DGE': 'J', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5106 |  |  |                       'OUL': 'U', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5107 |  |  |                       'GHT': 'T'}, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5108 |  |  |                   2: {'CE': 'S', 'CI': 'S', 'CY': 'S', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5109 |  |  |                       'GE': 'J', 'GI': 'J', 'GY': 'J', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5110 |  |  |                       'WR': 'R', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5111 |  |  |                       'GN': 'N', 'KN': 'N', 'PN': 'N', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5112 |  |  |                       'CK': 'K', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5113 |  |  |                       'SH': 'S'}} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5114 |  |  |     vowel_trans = {65: '', 69: '', 73: '', 79: '', 85: '', 89: ''} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5115 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5116 |  |  |     word = word.upper()  # Rule 3 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5117 |  |  |     word = _delete_consecutive_repeats(word)  # Rule 4 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5118 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5119 |  |  |     # Rule 5 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5120 |  |  |     i = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5121 |  |  |     while i < len(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5122 |  |  |         for match_len in range(4, 1, -1): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5123 |  |  |             if word[i:i+match_len] in rule_table[match_len]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5124 |  |  |                 repl = rule_table[match_len][word[i:i+match_len]] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5125 |  |  |                 word = (word[:i] + repl + word[i+match_len:]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5126 |  |  |                 i += len(repl) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5127 |  |  |                 break | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5128 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5129 |  |  |             i += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5130 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5131 |  |  |     word = word[0]+word[1:].translate(vowel_trans)  # Rule 6 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5132 |  |  |     return word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5133 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5134 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5135 |  |  | def davidson(lname, fname='.', omit_fname=False): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5136 |  |  |     """Return Davidson's Consonant Code. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5137 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5138 |  |  |     This is based on the name compression system described in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5139 |  |  |     :cite:`Davidson:1962`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5140 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5141 |  |  |     :cite:`Dolby:1970` identifies this as having been the name compression | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5142 |  |  |     algorithm used by SABRE. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5143 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5144 |  |  |     :param str lname: Last name (or word) to be encoded | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5145 |  |  |     :param str fname: First name (optional), of which the first character is | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5146 |  |  |         included in the code. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5147 |  |  |     :param str omit_fname: Set to True to completely omit the first character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5148 |  |  |         of the first name | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5149 |  |  |     :return: Davidson's Consonant Code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5150 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5151 |  |  |     trans = {65: '', 69: '', 73: '', 79: '', 85: '', 72: '', 87: '', 89: ''} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5152 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5153 |  |  |     lname = text_type(lname.upper()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5154 |  |  |     code = _delete_consecutive_repeats(lname[:1] + lname[1:].translate(trans)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5155 |  |  |     code = code[:4] + (4-len(code))*' ' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5156 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5157 |  |  |     if not omit_fname: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5158 |  |  |         code += fname[:1].upper() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5159 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5160 |  |  |     return code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5161 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5162 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5163 |  |  | def sound_d(word, maxlength=4): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5164 |  |  |     """Return the SoundD code. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5165 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5166 |  |  |     SoundD is defined in :cite:`Varol:2012`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5167 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5168 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5169 |  |  |     :param int maxlength: the length of the code returned (defaults to 4) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5170 |  |  |     :return: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5171 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5172 |  |  |     _ref_soundd_translation = dict(zip((ord(_) for _ in | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5173 |  |  |                                         'ABCDEFGHIJKLMNOPQRSTUVWXYZ'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5174 |  |  |                                        '01230120022455012623010202')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5175 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5176 |  |  |     word = normalize('NFKD', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5177 |  |  |     word = word.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5178 |  |  |     word = ''.join(c for c in word if c in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5179 |  |  |                    {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5180 |  |  |                     'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5181 |  |  |                     'Y', 'Z'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5182 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5183 |  |  |     if word[:2] in {'KN', 'GN', 'PN', 'AC', 'WR'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5184 |  |  |         word = word[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5185 |  |  |     elif word[:1] == 'X': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5186 |  |  |         word = 'S'+word[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5187 |  |  |     elif word[:2] == 'WH': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5188 |  |  |         word = 'W'+word[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5189 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5190 |  |  |     word = word.replace('DGE', '20').replace('DGI', '20').replace('GH', '0') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5191 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5192 |  |  |     word = word.translate(_ref_soundd_translation) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5193 |  |  |     word = _delete_consecutive_repeats(word) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5194 |  |  |     word = word.replace('0', '') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5195 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5196 |  |  |     if maxlength is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5197 |  |  |         if len(word) < maxlength: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5198 |  |  |             word += '0' * (maxlength-len(word)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5199 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5200 |  |  |             word = word[:maxlength] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5201 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5202 |  |  |     return word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5203 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5204 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5205 |  |  | def pshp_soundex_last(lname, maxlength=4, german=False): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5206 |  |  |     """Calculate the PSHP Soundex/Viewex Coding of a last name. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5207 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5208 |  |  |     This coding is based on :cite:`Hershberg:1976`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5209 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5210 |  |  |     Reference was also made to the German version of the same: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5211 |  |  |     :cite:`Hershberg:1979`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5212 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5213 |  |  |     A separate function, pshp_soundex_first() is used for first names. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5214 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5215 |  |  |     :param lname: the last name to encode | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5216 |  |  |     :param german: set to True if the name is German (different rules apply) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5217 |  |  |     :return: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5218 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5219 |  |  |     lname = normalize('NFKD', text_type(lname.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5220 |  |  |     lname = lname.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5221 |  |  |     lname = ''.join(c for c in lname if c in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5222 |  |  |                     {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5223 |  |  |                      'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5224 |  |  |                      'W', 'X', 'Y', 'Z'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5225 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5226 |  |  |     # A. Prefix treatment | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5227 |  |  |     if lname[:3] == 'VON' or lname[:3] == 'VAN': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5228 |  |  |         lname = lname[3:].strip() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5229 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5230 |  |  |     # The rule implemented below says "MC, MAC become 1". I believe it meant to | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5231 |  |  |     # say they become M except in German data (where superscripted 1 indicates | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5232 |  |  |     # "except in German data"). It doesn't make sense for them to become 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5233 |  |  |     # (BPFV -> 1) or to apply outside German. Unfortunately, both articles have | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5234 |  |  |     # this error(?). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5235 |  |  |     if not german: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5236 |  |  |         if lname[:3] == 'MAC': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5237 |  |  |             lname = 'M'+lname[3:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5238 |  |  |         elif lname[:2] == 'MC': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5239 |  |  |             lname = 'M'+lname[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5240 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5241 |  |  |     # The non-German-only rule to strip ' is unnecessary due to filtering | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5242 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5243 |  |  |     if lname[:1] in {'E', 'I', 'O', 'U'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5244 |  |  |         lname = 'A' + lname[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5245 |  |  |     elif lname[:2] in {'GE', 'GI', 'GY'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5246 |  |  |         lname = 'J' + lname[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5247 |  |  |     elif lname[:2] in {'CE', 'CI', 'CY'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5248 |  |  |         lname = 'S' + lname[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5249 |  |  |     elif lname[:3] == 'CHR': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5250 |  |  |         lname = 'K' + lname[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5251 |  |  |     elif lname[:1] == 'C' and lname[:2] != 'CH': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5252 |  |  |         lname = 'K' + lname[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5253 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5254 |  |  |     if lname[:2] == 'KN': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5255 |  |  |         lname = 'N' + lname[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5256 |  |  |     elif lname[:2] == 'PH': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5257 |  |  |         lname = 'F' + lname[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5258 |  |  |     elif lname[:3] in {'WIE', 'WEI'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5259 |  |  |         lname = 'V' + lname[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5260 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5261 |  |  |     if german and lname[:1] in {'W', 'M', 'Y', 'Z'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5262 |  |  |         lname = {'W': 'V', 'M': 'N', 'Y': 'J', 'Z': 'S'}[lname[0]]+lname[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5263 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5264 |  |  |     code = lname[:1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5265 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5266 |  |  |     # B. Postfix treatment | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5267 |  |  |     if lname[-1:] == 'R': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5268 |  |  |         lname = lname[:-1] + 'N' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5269 |  |  |     elif lname[-2:] in {'SE', 'CE'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5270 |  |  |         lname = lname[:-2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5271 |  |  |     if lname[-2:] == 'SS': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5272 |  |  |         lname = lname[:-2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5273 |  |  |     elif lname[-1:] == 'S': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5274 |  |  |         lname = lname[:-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5275 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5276 |  |  |     if not german: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5277 |  |  |         l5_repl = {'STOWN': 'SAWON', 'MPSON': 'MASON'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5278 |  |  |         l4_repl = {'NSEN': 'ASEN', 'MSON': 'ASON', 'STEN': 'SAEN', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5279 |  |  |                    'STON': 'SAON'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5280 |  |  |         if lname[-5:] in l5_repl: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5281 |  |  |             lname = lname[:-5] + l5_repl[lname[-5:]] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5282 |  |  |         elif lname[-4:] in l4_repl: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5283 |  |  |             lname = lname[:-4] + l4_repl[lname[-4:]] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5284 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5285 |  |  |     if lname[-2:] in {'NG', 'ND'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5286 |  |  |         lname = lname[:-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5287 |  |  |     if not german and lname[-3:] in {'GAN', 'GEN'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5288 |  |  |         lname = lname[:-3]+'A'+lname[-2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5289 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5290 |  |  |     if german: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5291 |  |  |         if lname[-3:] == 'TES': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5292 |  |  |             lname = lname[:-3] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5293 |  |  |         elif lname[-2:] == 'TS': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5294 |  |  |             lname = lname[:-2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5295 |  |  |         if lname[-3:] == 'TZE': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5296 |  |  |             lname = lname[:-3] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5297 |  |  |         elif lname[-2:] == 'ZE': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5298 |  |  |             lname = lname[:-2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5299 |  |  |         if lname[-1:] == 'Z': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5300 |  |  |             lname = lname[:-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5301 |  |  |         elif lname[-2:] == 'TE': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5302 |  |  |             lname = lname[:-2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5303 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5304 |  |  |     # C. Infix Treatment | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5305 |  |  |     lname = lname.replace('CK', 'C') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5306 |  |  |     lname = lname.replace('SCH', 'S') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5307 |  |  |     lname = lname.replace('DT', 'T') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5308 |  |  |     lname = lname.replace('ND', 'N') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5309 |  |  |     lname = lname.replace('NG', 'N') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5310 |  |  |     lname = lname.replace('LM', 'M') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5311 |  |  |     lname = lname.replace('MN', 'M') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5312 |  |  |     lname = lname.replace('WIE', 'VIE') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5313 |  |  |     lname = lname.replace('WEI', 'VEI') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5314 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5315 |  |  |     # D. Soundexing | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5316 |  |  |     # code for X & Y are unspecified, but presumably are 2 & 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5317 |  |  |     _pshp_translation = dict(zip((ord(_) for _ in | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5318 |  |  |                                   'ABCDEFGHIJKLMNOPQRSTUVWXYZ'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5319 |  |  |                                  '01230120022455012523010202')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5320 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5321 |  |  |     lname = lname.translate(_pshp_translation) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5322 |  |  |     lname = _delete_consecutive_repeats(lname) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5323 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5324 |  |  |     code += lname[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5325 |  |  |     code = code.replace('0', '')  # rule 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5326 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5327 |  |  |     if maxlength is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5328 |  |  |         if len(code) < maxlength: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5329 |  |  |             code += '0' * (maxlength-len(code)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5330 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5331 |  |  |             code = code[:maxlength] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5332 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5333 |  |  |     return code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5334 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5335 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5336 |  |  | def pshp_soundex_first(fname, maxlength=4, german=False): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5337 |  |  |     """Calculate the PSHP Soundex/Viewex Coding of a first name. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5338 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5339 |  |  |     This coding is based on :cite:`Hershberg:1976`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5340 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5341 |  |  |     Reference was also made to the German version of the same: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5342 |  |  |     :cite:`Hershberg:1979`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5343 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5344 |  |  |     A separate function, pshp_soundex_last() is used for last names. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5345 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5346 |  |  |     :param fname: the first name to encode | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5347 |  |  |     :param german: set to True if the name is German (different rules apply) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5348 |  |  |     :return: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5349 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5350 |  |  |     fname = normalize('NFKD', text_type(fname.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5351 |  |  |     fname = fname.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5352 |  |  |     fname = ''.join(c for c in fname if c in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5353 |  |  |                     {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5354 |  |  |                      'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5355 |  |  |                      'W', 'X', 'Y', 'Z'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5356 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5357 |  |  |     # special rules | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5358 |  |  |     if fname == 'JAMES': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5359 |  |  |         code = 'J7' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5360 |  |  |     elif fname == 'PAT': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5361 |  |  |         code = 'P7' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5362 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5363 |  |  |     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5364 |  |  |         # A. Prefix treatment | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5365 |  |  |         if fname[:2] in {'GE', 'GI', 'GY'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5366 |  |  |             fname = 'J' + fname[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5367 |  |  |         elif fname[:2] in {'CE', 'CI', 'CY'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5368 |  |  |             fname = 'S' + fname[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5369 |  |  |         elif fname[:3] == 'CHR': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5370 |  |  |             fname = 'K' + fname[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5371 |  |  |         elif fname[:1] == 'C' and fname[:2] != 'CH': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5372 |  |  |             fname = 'K' + fname[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5373 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5374 |  |  |         if fname[:2] == 'KN': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5375 |  |  |             fname = 'N' + fname[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5376 |  |  |         elif fname[:2] == 'PH': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5377 |  |  |             fname = 'F' + fname[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5378 |  |  |         elif fname[:3] in {'WIE', 'WEI'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5379 |  |  |             fname = 'V' + fname[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5380 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5381 |  |  |         if german and fname[:1] in {'W', 'M', 'Y', 'Z'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5382 |  |  |             fname = ({'W': 'V', 'M': 'N', 'Y': 'J', 'Z': 'S'}[fname[0]] + | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5383 |  |  |                      fname[1:]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5384 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5385 |  |  |         code = fname[:1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5386 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5387 |  |  |         # B. Soundex coding | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5388 |  |  |         # code for Y unspecified, but presumably is 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5389 |  |  |         _pshp_translation = dict(zip((ord(_) for _ in | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5390 |  |  |                                       'ABCDEFGHIJKLMNOPQRSTUVWXYZ'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5391 |  |  |                                      '01230120022455012523010202')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5392 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5393 |  |  |         fname = fname.translate(_pshp_translation) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5394 |  |  |         fname = _delete_consecutive_repeats(fname) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5395 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5396 |  |  |         code += fname[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5397 |  |  |         syl_ptr = code.find('0') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5398 |  |  |         syl2_ptr = code[syl_ptr + 1:].find('0') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5399 |  |  |         if syl_ptr != -1 and syl2_ptr != -1 and syl2_ptr - syl_ptr > -1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5400 |  |  |             code = code[:syl_ptr + 2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5401 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5402 |  |  |         code = code.replace('0', '')  # rule 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5403 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5404 |  |  |     if maxlength is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5405 |  |  |         if len(code) < maxlength: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5406 |  |  |             code += '0' * (maxlength-len(code)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5407 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5408 |  |  |             code = code[:maxlength] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5409 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5410 |  |  |     return code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5411 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5412 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5413 |  |  | def henry_early(word, maxlength=3): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5414 |  |  |     """Calculate the early version of the Henry code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5415 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5416 |  |  |     The early version of Henry coding is given in :cite:`Legare:1972`. This is | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5417 |  |  |     different from the later version defined in :cite:`Henry:1976`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5418 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5419 |  |  |     :param word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5420 |  |  |     :param int maxlength: the length of the code returned (defaults to 3) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5421 |  |  |     :return: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5422 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5423 |  |  |     _cons = {'B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5424 |  |  |              'R', 'S', 'T', 'V', 'W', 'X', 'Z'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5425 |  |  |     _vows = {'A', 'E', 'I', 'O', 'U', 'Y'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5426 |  |  |     _diph = {'AI': 'E', 'AY': 'E', 'EI': 'E', 'AU': 'O', 'OI': 'O', 'OU': 'O', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5427 |  |  |              'EU': 'U'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5428 |  |  |     _unaltered = {'B', 'D', 'F', 'J', 'K', 'L', 'M', 'N', 'R', 'T', 'V'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5429 |  |  |     _simple = {'W': 'V', 'X': 'S', 'V': 'S'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5430 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5431 |  |  |     word = normalize('NFKD', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5432 |  |  |     word = ''.join(c for c in word if c in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5433 |  |  |                    {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5434 |  |  |                     'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5435 |  |  |                     'Y', 'Z'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5436 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5437 |  |  |     if not word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5438 |  |  |         return '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5439 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5440 |  |  |     # Rule Ia seems to be covered entirely in II | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5441 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5442 |  |  |     # Rule Ib | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5443 |  |  |     if word[0] in _vows: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5444 |  |  |         # Ib1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5445 |  |  |         if (((word[1:2] in _cons-{'M', 'N'} and word[2:3] in _cons) or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5446 |  |  |              (word[1:2] in _cons and word[2:3] not in _cons))): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5447 |  |  |             if word[0] == 'Y': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5448 |  |  |                 word = 'I'+word[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5449 |  |  |         # Ib2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5450 |  |  |         elif word[1:2] in {'M', 'N'} and word[2:3] in _cons: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5451 |  |  |             if word[0] == 'E': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5452 |  |  |                 word = 'A'+word[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5453 |  |  |             elif word[0] in {'I', 'U', 'Y'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5454 |  |  |                 word = 'E'+word[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5455 |  |  |         # Ib3 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5456 |  |  |         elif word[:2] in _diph: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5457 |  |  |             word = _diph[word[:2]]+word[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5458 |  |  |         # Ib4 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5459 |  |  |         elif word[1:2] in _vows and word[0] == 'Y': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5460 |  |  |             word = 'I' + word[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5461 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5462 |  |  |     code = '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5463 |  |  |     skip = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5464 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5465 |  |  |     # Rule II | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5466 |  |  |     for pos, char in enumerate(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5467 |  |  |         nxch = char[pos+1:pos+2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5468 |  |  |         prev = char[pos-1:pos] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5469 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5470 |  |  |         if skip: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5471 |  |  |             skip -= 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5472 |  |  |         elif char in _vows: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5473 |  |  |             code += char | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5474 |  |  |         # IIc | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5475 |  |  |         elif char == nxch: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5476 |  |  |             skip = 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5477 |  |  |             code += char | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5478 |  |  |         elif word[pos:pos+2] in {'CQ', 'DT', 'SC'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5479 |  |  |             skip = 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5480 |  |  |             code += word[pos+1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5481 |  |  |         # IId | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5482 |  |  |         elif char == 'H' and prev in _cons: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5483 |  |  |             continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5484 |  |  |         elif char == 'S' and nxch in _cons: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5485 |  |  |             continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5486 |  |  |         elif char in _cons-{'L', 'R'} and nxch in _cons-{'L', 'R'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5487 |  |  |             continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5488 |  |  |         elif char == 'L' and nxch in {'M', 'N'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5489 |  |  |             continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5490 |  |  |         elif char in {'M', 'N'} and prev in _vows and nxch in _cons: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5491 |  |  |             continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5492 |  |  |         # IIa | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5493 |  |  |         elif char in _unaltered: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5494 |  |  |             code += char | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5495 |  |  |         # IIb | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5496 |  |  |         elif char in _simple: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5497 |  |  |             code += _simple[char] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5498 |  |  |         elif char in {'C', 'G', 'P', 'Q', 'S'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5499 |  |  |             if char == 'C': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5500 |  |  |                 if nxch in {'A', 'O', 'U', 'L', 'R'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5501 |  |  |                     code += 'K' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5502 |  |  |                 elif nxch in {'E', 'I', 'Y'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5503 |  |  |                     code += 'J' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5504 |  |  |                 elif nxch == 'H': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5505 |  |  |                     if word[pos+2:pos+3] in _vows: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5506 |  |  |                         code += 'C' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5507 |  |  |                     elif word[pos+2:pos+3] in {'R', 'L'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5508 |  |  |                         code += 'K' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5509 |  |  |             elif char == 'G': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5510 |  |  |                 if nxch in {'A', 'O', 'U', 'L', 'R'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5511 |  |  |                     code += 'G' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5512 |  |  |                 elif nxch in {'E', 'I', 'Y'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5513 |  |  |                     code += 'J' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5514 |  |  |                 elif nxch == 'N': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5515 |  |  |                     code += 'N' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5516 |  |  |             elif char == 'P': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5517 |  |  |                 if nxch != 'H': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5518 |  |  |                     code += 'P' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5519 |  |  |                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5520 |  |  |                     code += 'F' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5521 |  |  |             elif char == 'Q': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5522 |  |  |                 if word[pos+1:pos+2] in {'UE', 'UI', 'UY'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5523 |  |  |                     char += 'G' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5524 |  |  |                 elif word[pos + 1:pos + 2] in {'UA', 'UO'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5525 |  |  |                     char += 'K' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5526 |  |  |             elif char == 'S': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5527 |  |  |                 if word[pos:pos+6] == 'SAINTE': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5528 |  |  |                     code += 'X' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5529 |  |  |                     skip = 5 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5530 |  |  |                 elif word[pos:pos+5] == 'SAINT': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5531 |  |  |                     code += 'X' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5532 |  |  |                     skip = 4 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5533 |  |  |                 elif word[pos:pos+3] == 'STE': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5534 |  |  |                     code += 'X' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5535 |  |  |                     skip = 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5536 |  |  |                 elif word[pos:pos+2] == 'ST': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5537 |  |  |                     code += 'X' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5538 |  |  |                     skip = 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5539 |  |  |                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5540 |  |  |                     code += 'S' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5541 |  |  |         else:  # this should not be possible | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5542 |  |  |             continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5543 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5544 |  |  |     # IIe1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5545 |  |  |     if code[-4:] in {'AULT', 'EULT', 'OULT'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5546 |  |  |         code = code[:-2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5547 |  |  |     elif code[-4:-3] in _vows and code[-3:] == 'MPS': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5548 |  |  |         code = code[:-3] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5549 |  |  |     elif code[-3:-2] in _vows and code[-2:] in {'MB', 'MP', 'ND', 'NS', 'NT'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5550 |  |  |         code = code[:-2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5551 |  |  |     elif code[-2:-1] == 'R' and code[-1:] in _cons: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5552 |  |  |         code = code[:-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5553 |  |  |     # IIe2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5554 |  |  |     elif code[-2:-1] in _vows and code[-1:] in {'D', 'M', 'N', 'S', 'T'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5555 |  |  |         code = code[:-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5556 |  |  |     elif code[-2:] == 'ER': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5557 |  |  |         code = code[:-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5558 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5559 |  |  |     # Drop non-initial vowels | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5560 |  |  |     code = code[:1]+code[1:].translate({65: '', 69: '', 73: '', 79: '', 85: '', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5561 |  |  |                                         89: ''}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5562 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5563 |  |  |     if maxlength is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5564 |  |  |             code = code[:maxlength] | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5565 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5566 |  |  |     return code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5567 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5568 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5569 |  |  | def norphone(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5570 |  |  |     """Return the Norphone code. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5571 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5572 |  |  |     The reference implementation by Lars Marius Garshol is available in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5573 |  |  |     :cite:`Garshol:2015`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5574 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5575 |  |  |     Norphone was designed for Norwegian, but this implementation has been | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5576 |  |  |     extended to support Swedish vowels as well. This function incorporates | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5577 |  |  |     the "not implemented" rules from the above file's rule set. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5578 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5579 |  |  |     :param word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5580 |  |  |     :return: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5581 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5582 |  |  |     _vowels = {'A', 'E', 'I', 'O', 'U', 'Y', 'Å', 'Æ', 'Ø', 'Ä', 'Ö'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5583 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5584 |  |  |     replacements = {4: {'SKEI': 'X'}, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5585 |  |  |                     3: {'SKJ': 'X', 'KEI': 'X'}, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5586 |  |  |                     2: {'CH': 'K', 'CK': 'K', 'GJ': 'J', 'GH': 'K', 'HG': 'K', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5587 |  |  |                         'HJ': 'J', 'HL': 'L', 'HR': 'R', 'KJ': 'X', 'KI': 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5588 |  |  |                         'LD': 'L', 'ND': 'N', 'PH': 'F', 'TH': 'T', 'SJ': 'X'}, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5589 |  |  |                     1: {'W': 'V', 'X': 'KS', 'Z': 'S', 'D': 'T', 'G': 'K'}} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5590 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5591 |  |  |     word = word.upper() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5592 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5593 |  |  |     code = '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5594 |  |  |     skip = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5595 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5596 |  |  |     if word[0:2] == 'AA': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5597 |  |  |         code = 'Å' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5598 |  |  |         skip = 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5599 |  |  |     elif word[0:2] == 'GI': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5600 |  |  |         code = 'J' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5601 |  |  |         skip = 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5602 |  |  |     elif word[0:3] == 'SKY': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5603 |  |  |         code = 'X' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5604 |  |  |         skip = 3 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5605 |  |  |     elif word[0:2] == 'EI': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5606 |  |  |         code = 'Æ' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5607 |  |  |         skip = 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5608 |  |  |     elif word[0:2] == 'KY': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5609 |  |  |         code = 'X' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5610 |  |  |         skip = 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5611 |  |  |     elif word[:1] == 'C': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5612 |  |  |         code = 'K' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5613 |  |  |         skip = 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5614 |  |  |     elif word[:1] == 'Ä': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5615 |  |  |         code = 'Æ' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5616 |  |  |         skip = 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5617 |  |  |     elif word[:1] == 'Ö': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5618 |  |  |         code = 'Ø' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5619 |  |  |         skip = 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5620 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5621 |  |  |     if word[-2:] == 'DT': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5622 |  |  |         word = word[:-2]+'T' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5623 |  |  |     # Though the rules indicate this rule applies in all positions, the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5624 |  |  |     # reference implementation indicates it applies only in final position. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5625 |  |  |     elif word[-2:-1] in _vowels and word[-1:] == 'D': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5626 |  |  |         word = word[:-2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5627 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5628 |  |  |     for pos, char in enumerate(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5629 |  |  |         if skip: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5630 |  |  |             skip -= 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5631 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5632 |  |  |             for length in sorted(replacements, reverse=True): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5633 |  |  |                 if word[pos:pos+length] in replacements[length]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5634 |  |  |                     code += replacements[length][word[pos:pos+length]] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5635 |  |  |                     skip = length-1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5636 |  |  |                     break | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5637 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5638 |  |  |                 if not pos or char not in _vowels: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5639 |  |  |                     code += char | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5640 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5641 |  |  |     code = _delete_consecutive_repeats(code) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5642 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5643 |  |  |     return code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5644 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5645 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5646 |  |  | def dolby(word, maxlength=None, keep_vowels=False, vowel_char='*'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5647 |  |  |     r"""Return the Dolby Code of a name. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5648 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5649 |  |  |     This follows "A Spelling Equivalent Abbreviation Algorithm For Personal | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5650 |  |  |     Names" from :cite:`Dolby:1970` and :cite:`Cunningham:1969`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5651 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5652 |  |  |     :param word: the word to encode | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5653 |  |  |     :param maxlength: maximum length of the returned Dolby code -- this also | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5654 |  |  |         activates the fixed-length code mode | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5655 |  |  |     :param keep_vowels: if True, retains all vowel markers | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5656 |  |  |     :param vowel_char: the vowel marker character (default to \*) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5657 |  |  |     :return: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5658 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5659 |  |  |     _vowels = {'A', 'E', 'I', 'O', 'U', 'Y'} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5660 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5661 |  |  |     # uppercase, normalize, decompose, and filter non-A-Z out | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5662 |  |  |     word = normalize('NFKD', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5663 |  |  |     word = word.replace('ß', 'SS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5664 |  |  |     word = ''.join(c for c in word if c in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5665 |  |  |                    {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5666 |  |  |                     'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5667 |  |  |                     'Y', 'Z'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5668 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5669 |  |  |     # Rule 1 (FL2) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5670 |  |  |     if word[:3] in {'MCG', 'MAG', 'MAC'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5671 |  |  |         word = 'MK'+word[3:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5672 |  |  |     elif word[:2] == 'MC': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5673 |  |  |         word = 'MK'+word[2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5674 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5675 |  |  |     # Rule 2 (FL3) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5676 |  |  |     pos = len(word)-2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5677 |  |  |     while pos > -1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5678 |  |  |         if word[pos:pos+2] in {'DT', 'LD', 'ND', 'NT', 'RC', 'RD', 'RT', 'SC', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5679 |  |  |                                'SK', 'ST'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5680 |  |  |             word = word[:pos+1]+word[pos+2:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5681 |  |  |             pos += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5682 |  |  |         pos -= 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5683 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5684 |  |  |     # Rule 3 (FL4) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5685 |  |  |     # Although the rule indicates "after the first letter", the test cases make | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5686 |  |  |     # it clear that these apply to the first letter also. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5687 |  |  |     word = word.replace('X', 'KS') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5688 |  |  |     word = word.replace('CE', 'SE') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5689 |  |  |     word = word.replace('CI', 'SI') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5690 |  |  |     word = word.replace('CY', 'SI') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5691 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5692 |  |  |     # not in the rule set, but they seem to have intended it | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5693 |  |  |     word = word.replace('TCH', 'CH') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5694 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5695 |  |  |     pos = word.find('CH', 1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5696 |  |  |     while pos != -1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5697 |  |  |         if word[pos-1:pos] not in _vowels: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5698 |  |  |             word = word[:pos]+'S'+word[pos+1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5699 |  |  |         pos = word.find('CH', pos+1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5700 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5701 |  |  |     word = word.replace('C', 'K') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5702 |  |  |     word = word.replace('Z', 'S') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5703 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5704 |  |  |     word = word.replace('WR', 'R') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5705 |  |  |     word = word.replace('DG', 'G') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5706 |  |  |     word = word.replace('QU', 'K') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5707 |  |  |     word = word.replace('T', 'D') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5708 |  |  |     word = word.replace('PH', 'F') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5709 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5710 |  |  |     # Rule 4 (FL5) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5711 |  |  |     # Although the rule indicates "after the first letter", the test cases make | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5712 |  |  |     # it clear that these apply to the first letter also. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5713 |  |  |     pos = word.find('K', 0) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5714 |  |  |     while pos != -1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5715 |  |  |         if pos > 1 and word[pos-1:pos] not in _vowels | {'L', 'N', 'R'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5716 |  |  |             word = word[:pos-1]+word[pos:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5717 |  |  |             pos -= 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5718 |  |  |         pos = word.find('K', pos+1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5719 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5720 |  |  |     # Rule FL6 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5721 |  |  |     if maxlength and word[-1:] == 'E': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5722 |  |  |         word = word[:-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5723 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5724 |  |  |     # Rule 5 (FL7) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5725 |  |  |     word = _delete_consecutive_repeats(word) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5726 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5727 |  |  |     # Rule 6 (FL8) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5728 |  |  |     if word[:2] == 'PF': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5729 |  |  |         word = word[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5730 |  |  |     if word[-2:] == 'PF': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5731 |  |  |         word = word[:-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5732 |  |  |     elif word[-2:] == 'GH': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5733 |  |  |         if word[-3:-2] in _vowels: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5734 |  |  |             word = word[:-2]+'F' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5735 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5736 |  |  |             word = word[:-2]+'G' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5737 |  |  |     word = word.replace('GH', '') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5738 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5739 |  |  |     # Rule FL9 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5740 |  |  |     if maxlength: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5741 |  |  |         word = word.replace('V', 'F') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5742 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5743 |  |  |     # Rules 7-9 (FL10-FL12) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5744 |  |  |     first = 1 + (1 if maxlength else 0) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5745 |  |  |     code = '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5746 |  |  |     for pos, char in enumerate(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5747 |  |  |         if char in _vowels: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5748 |  |  |             if first or keep_vowels: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5749 |  |  |                 code += vowel_char | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5750 |  |  |                 first -= 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5751 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5752 |  |  |                 continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5753 |  |  |         elif pos > 0 and char in {'W', 'H'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5754 |  |  |             continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5755 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5756 |  |  |             code += char | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5757 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5758 |  |  |     if maxlength: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5759 |  |  |         # Rule FL13 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5760 |  |  |         if len(code) > maxlength and code[-1:] == 'S': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5761 |  |  |             code = code[:-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5762 |  |  |         if keep_vowels: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5763 |  |  |             code = code[:maxlength] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5764 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5765 |  |  |             # Rule FL14 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5766 |  |  |             code = code[:maxlength + 2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5767 |  |  |             # Rule FL15 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5768 |  |  |             while len(code) > maxlength: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5769 |  |  |                 vowels = len(code) - maxlength | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5770 |  |  |                 excess = vowels - 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5771 |  |  |                 word = code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5772 |  |  |                 code = '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5773 |  |  |                 for char in word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5774 |  |  |                     if char == vowel_char: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5775 |  |  |                         if vowels: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5776 |  |  |                             code += char | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5777 |  |  |                             vowels -= 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5778 |  |  |                     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5779 |  |  |                         code += char | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5780 |  |  |                 code = code[:maxlength + excess] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5781 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5782 |  |  |         # Rule FL16 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5783 |  |  |         code += ' ' * (maxlength - len(code)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5784 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5785 |  |  |     return code | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5786 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5787 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5788 |  |  | def phonetic_spanish(word, maxlength=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5789 |  |  |     """Return the PhoneticSpanish coding of word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5790 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5791 |  |  |     This follows the coding described in :cite:`Amon:2012` and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5792 |  |  |     :cite:`delPilarAngeles:2015`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5793 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5794 |  |  |     :param word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5795 |  |  |     :return: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5796 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5797 |  |  |     _es_soundex_translation = dict(zip((ord(_) for _ in | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5798 |  |  |                                         'BCDFGHJKLMNPQRSTVXYZ'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5799 |  |  |                                        '14328287566079431454')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5800 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5801 |  |  |     # uppercase, normalize, and decompose, filter to A-Z minus vowels & W | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5802 |  |  |     word = normalize('NFKD', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5803 |  |  |     word = ''.join(c for c in word if c in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5804 |  |  |                    {'B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5805 |  |  |                     'P', 'Q', 'R', 'S', 'T', 'V', 'X', 'Y', 'Z'}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5806 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5807 |  |  |     # merge repeated Ls & Rs | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5808 |  |  |     word = word.replace('LL', 'L') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5809 |  |  |     word = word.replace('R', 'R') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5810 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5811 |  |  |     # apply the Soundex algorithm | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5812 |  |  |     sdx = word.translate(_es_soundex_translation) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5813 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5814 |  |  |     if maxlength: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5815 |  |  |         sdx = sdx[:maxlength] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5816 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5817 |  |  |     return sdx | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5818 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5819 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5820 |  |  | def spanish_metaphone(word, maxlength=6, modified=False): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5821 |  |  |     """Return the Spanish Metaphone of a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5822 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5823 |  |  |     This is a quick rewrite of the Spanish Metaphone Algorithm, as presented at | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5824 |  |  |     https://github.com/amsqr/Spanish-Metaphone and discussed in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5825 |  |  |     :cite:`Mosquera:2012`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5826 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5827 |  |  |     Modified version based on :cite:`delPilarAngeles:2016`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5828 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5829 |  |  |     :param word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5830 |  |  |     :param maxlength: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5831 |  |  |     :param modified: Set to True to use del Pilar Angeles & Bailón-Miguel's | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5832 |  |  |         modified version of the algorithm | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5833 |  |  |     :return: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5834 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5835 |  |  |     def _is_vowel(pos): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5836 |  |  |         """Return True if the character at word[pos] is a vowel.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5837 |  |  |         if pos < len(word) and word[pos] in {'A', 'E', 'I', 'O', 'U'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5838 |  |  |             return True | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5839 |  |  |         return False | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5840 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5841 |  |  |     word = normalize('NFC', text_type(word.upper())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5842 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5843 |  |  |     meta_key = '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5844 |  |  |     pos = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5845 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5846 |  |  |     # do some replacements for the modified version | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5847 |  |  |     if modified: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5848 |  |  |         word = word.replace('MB', 'NB') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5849 |  |  |         word = word.replace('MP', 'NP') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5850 |  |  |         word = word.replace('BS', 'S') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5851 |  |  |         if word[:2] == 'PS': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5852 |  |  |             word = word[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5853 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5854 |  |  |     # simple replacements | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5855 |  |  |     word = word.replace('Á', 'A') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5856 |  |  |     word = word.replace('CH', 'X') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5857 |  |  |     word = word.replace('Ç', 'S') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5858 |  |  |     word = word.replace('É', 'E') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5859 |  |  |     word = word.replace('Í', 'I') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5860 |  |  |     word = word.replace('Ó', 'O') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5861 |  |  |     word = word.replace('Ú', 'U') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5862 |  |  |     word = word.replace('Ñ', 'NY') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5863 |  |  |     word = word.replace('GÜ', 'W') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5864 |  |  |     word = word.replace('Ü', 'U') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5865 |  |  |     word = word.replace('B', 'V') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5866 |  |  |     word = word.replace('LL', 'Y') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5867 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5868 |  |  |     while len(meta_key) < maxlength: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5869 |  |  |         if pos >= len(word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5870 |  |  |             break | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5871 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5872 |  |  |         # get the next character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5873 |  |  |         current_char = word[pos] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5874 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5875 |  |  |         # if a vowel in pos 0, add to key | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5876 |  |  |         if _is_vowel(pos) and pos == 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5877 |  |  |             meta_key += current_char | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5878 |  |  |             pos += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5879 |  |  |         # otherwise, do consonant rules | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5880 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5881 |  |  |             # simple consonants (unmutated) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5882 |  |  |             if current_char in {'D', 'F', 'J', 'K', 'M', 'N', 'P', 'T', 'V', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5883 |  |  |                                 'L', 'Y'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5884 |  |  |                 meta_key += current_char | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5885 |  |  |                 # skip doubled consonants | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5886 |  |  |                 if word[pos+1:pos+2] == current_char: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5887 |  |  |                     pos += 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5888 |  |  |                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5889 |  |  |                     pos += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5890 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5891 |  |  |                 if current_char == 'C': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5892 |  |  |                     # special case 'acción', 'reacción',etc. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5893 |  |  |                     if word[pos+1:pos+2] == 'C': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5894 |  |  |                         meta_key += 'X' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5895 |  |  |                         pos += 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5896 |  |  |                     # special case 'cesar', 'cien', 'cid', 'conciencia' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5897 |  |  |                     elif word[pos+1:pos+2] in {'E', 'I'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5898 |  |  |                         meta_key += 'Z' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5899 |  |  |                         pos += 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5900 |  |  |                     # base case | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5901 |  |  |                     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5902 |  |  |                         meta_key += 'K' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5903 |  |  |                         pos += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5904 |  |  |                 elif current_char == 'G': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5905 |  |  |                     # special case 'gente', 'ecologia',etc | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5906 |  |  |                     if word[pos + 1:pos + 2] in {'E', 'I'}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5907 |  |  |                         meta_key += 'J' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5908 |  |  |                         pos += 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5909 |  |  |                     # base case | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5910 |  |  |                     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5911 |  |  |                         meta_key += 'G' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5912 |  |  |                         pos += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5913 |  |  |                 elif current_char == 'H': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5914 |  |  |                     # since the letter 'H' is silent in Spanish, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5915 |  |  |                     # set the meta key to the vowel after the letter 'H' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5916 |  |  |                     if _is_vowel(pos+1): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5917 |  |  |                         meta_key += word[pos+1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5918 |  |  |                         pos += 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5919 |  |  |                     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5920 |  |  |                         meta_key += 'H' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5921 |  |  |                         pos += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5922 |  |  |                 elif current_char == 'Q': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5923 |  |  |                     if word[pos+1:pos+2] == 'U': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5924 |  |  |                         pos += 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5925 |  |  |                     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5926 |  |  |                         pos += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5927 |  |  |                     meta_key += 'K' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5928 |  |  |                 elif current_char == 'W': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5929 |  |  |                     meta_key += 'U' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5930 |  |  |                     pos += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5931 |  |  |                 elif current_char == 'R': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5932 |  |  |                     meta_key += 'R' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5933 |  |  |                     pos += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5934 |  |  |                 elif current_char == 'S': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5935 |  |  |                     if not _is_vowel(pos+1) and pos == 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5936 |  |  |                         meta_key += 'ES' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5937 |  |  |                         pos += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5938 |  |  |                     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5939 |  |  |                         meta_key += 'S' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5940 |  |  |                         pos += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5941 |  |  |                 elif current_char == 'Z': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5942 |  |  |                     meta_key += 'Z' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5943 |  |  |                     pos += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5944 |  |  |                 elif current_char == 'X': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5945 |  |  |                     if len(word) > 1 and pos == 0 and not _is_vowel(pos+1): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5946 |  |  |                         meta_key += 'EX' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5947 |  |  |                         pos += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5948 |  |  |                     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5949 |  |  |                         meta_key += 'X' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5950 |  |  |                         pos += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5951 |  |  |                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5952 |  |  |                     pos += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5953 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5954 |  |  |     # Final change from S to Z in modified version | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5955 |  |  |     if modified: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5956 |  |  |         meta_key = meta_key.replace('S', 'Z') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5957 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5958 |  |  |     return meta_key | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5959 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5960 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5961 |  |  | def metasoundex(word, language='en'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5962 |  |  |     """Return the MetaSoundex code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5963 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5964 |  |  |     This is based on :cite:`Koneru:2017`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5965 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5966 |  |  |     :param word: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5967 |  |  |     :param language: either 'en' for English or 'es' for Spanish | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5968 |  |  |     :return: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5969 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5970 |  |  |     _metasoundex_translation = dict(zip((ord(_) for _ in | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5971 |  |  |                                          'ABCDEFGHIJKLMNOPQRSTUVWXYZ'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5972 |  |  |                                         '07430755015866075943077514')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5973 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5974 |  |  |     if language == 'es': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5975 |  |  |         return phonetic_spanish(spanish_metaphone(word)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5976 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5977 |  |  |     word = soundex(metaphone(word)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5978 |  |  |     word = word[0].translate(_metasoundex_translation)+word[1:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5979 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5980 |  |  |     return word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5981 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5982 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5983 |  |  | def bmpm(word, language_arg=0, name_mode='gen', match_mode='approx', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5984 |  |  |          concat=False, filter_langs=False): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5985 |  |  |     """Return the Beider-Morse Phonetic Matching algorithm code for a word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5986 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5987 |  |  |     The Beider-Morse Phonetic Matching algorithm is described in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5988 |  |  |     :cite:`Beider:2008`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5989 |  |  |     The reference implementation is licensed under GPLv3. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5990 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5991 |  |  |     :param str word: the word to transform | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5992 |  |  |     :param str language_arg: the language of the term; supported values | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5993 |  |  |         include: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5994 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5995 |  |  |             - 'any' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5996 |  |  |             - 'arabic' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5997 |  |  |             - 'cyrillic' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5998 |  |  |             - 'czech' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5999 |  |  |             - 'dutch' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6000 |  |  |             - 'english' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6001 |  |  |             - 'french' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6002 |  |  |             - 'german' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6003 |  |  |             - 'greek' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6004 |  |  |             - 'greeklatin' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6005 |  |  |             - 'hebrew' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6006 |  |  |             - 'hungarian' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6007 |  |  |             - 'italian' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6008 |  |  |             - 'polish' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6009 |  |  |             - 'portuguese' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6010 |  |  |             - 'romanian' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6011 |  |  |             - 'russian' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6012 |  |  |             - 'spanish' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6013 |  |  |             - 'turkish' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6014 |  |  |             - 'germandjsg' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6015 |  |  |             - 'polishdjskp' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6016 |  |  |             - 'russiandjsre' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6017 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6018 |  |  |     :param str name_mode: the name mode of the algorithm: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6019 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6020 |  |  |             - 'gen' -- general (default) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6021 |  |  |             - 'ash' -- Ashkenazi | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6022 |  |  |             - 'sep' -- Sephardic | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6023 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6024 |  |  |     :param str match_mode: matching mode: 'approx' or 'exact' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6025 |  |  |     :param bool concat: concatenation mode | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6026 |  |  |     :param bool filter_langs: filter out incompatible languages | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6027 |  |  |     :returns: the BMPM value(s) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6028 |  |  |     :rtype: tuple | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6029 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6030 |  |  |     >>> bmpm('Christopher') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6031 |  |  |     'xrQstopir xrQstYpir xristopir xristYpir xrQstofir xrQstYfir xristofir | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6032 |  |  |     xristYfir xristopi xritopir xritopi xristofi xritofir xritofi tzristopir | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6033 |  |  |     tzristofir zristopir zristopi zritopir zritopi zristofir zristofi zritofir | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6034 |  |  |     zritofi' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6035 |  |  |     >>> bmpm('Niall') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6036 |  |  |     'nial niol' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6037 |  |  |     >>> bmpm('Smith') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6038 |  |  |     'zmit' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6039 |  |  |     >>> bmpm('Schmidt') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6040 |  |  |     'zmit stzmit' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6041 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6042 |  |  |     >>> bmpm('Christopher', language_arg='German') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6043 |  |  |     'xrQstopir xrQstYpir xristopir xristYpir xrQstofir xrQstYfir xristofir | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6044 |  |  |     xristYfir' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6045 |  |  |     >>> bmpm('Christopher', language_arg='English') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6046 |  |  |     'tzristofir tzrQstofir tzristafir tzrQstafir xristofir xrQstofir xristafir | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6047 |  |  |     xrQstafir' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6048 |  |  |     >>> bmpm('Christopher', language_arg='German', name_mode='ash') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6049 |  |  |     'xrQstopir xrQstYpir xristopir xristYpir xrQstofir xrQstYfir xristofir | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6050 |  |  |     xristYfir' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6051 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6052 |  |  |     >>> bmpm('Christopher', language_arg='German', match_mode='exact') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6053 |  |  |     'xriStopher xriStofer xristopher xristofer' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6054 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6055 |  |  |     return _bmpm(word, language_arg, name_mode, match_mode, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6056 |  |  |                  concat, filter_langs) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6057 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6058 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6059 |  |  | if __name__ == '__main__': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6060 |  |  |     import doctest | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 6061 |  |  |     doctest.testmod() | 
            
                                                        
            
                                    
            
            
                | 6062 |  |  |  |