| Total Complexity | 74 |
| Total Lines | 476 |
| Duplicated Lines | 18.49 % |
| Changes | 0 | ||
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like abydos.phonetic.de often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | # -*- coding: utf-8 -*- |
||
| 2 | |||
| 3 | # Copyright 2014-2018 by Christopher C. Little. |
||
| 4 | # This file is part of Abydos. |
||
| 5 | # |
||
| 6 | # Abydos is free software: you can redistribute it and/or modify |
||
| 7 | # it under the terms of the GNU General Public License as published by |
||
| 8 | # the Free Software Foundation, either version 3 of the License, or |
||
| 9 | # (at your option) any later version. |
||
| 10 | # |
||
| 11 | # Abydos is distributed in the hope that it will be useful, |
||
| 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
| 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
| 14 | # GNU General Public License for more details. |
||
| 15 | # |
||
| 16 | # You should have received a copy of the GNU General Public License |
||
| 17 | # along with Abydos. If not, see <http://www.gnu.org/licenses/>. |
||
| 18 | |||
| 19 | """abydos.phonetic.de. |
||
| 20 | |||
| 21 | The phonetic.de module implements the Kölner Phonetik and related |
||
| 22 | algorithms for German: |
||
| 23 | |||
| 24 | - Kölner Phonetik |
||
| 25 | - Phonem |
||
| 26 | - Haase Phonetik |
||
| 27 | - Reth-Schek Phonetik |
||
| 28 | """ |
||
| 29 | |||
| 30 | from __future__ import unicode_literals |
||
| 31 | |||
| 32 | from itertools import product |
||
| 33 | from unicodedata import normalize as unicode_normalize |
||
| 34 | |||
| 35 | from six import text_type |
||
| 36 | from six.moves import range |
||
| 37 | |||
| 38 | from . import _delete_consecutive_repeats |
||
| 39 | |||
| 40 | __all__ = ['haase_phonetik', 'koelner_phonetik', |
||
| 41 | 'koelner_phonetik_alpha', 'koelner_phonetik_num_to_alpha', |
||
| 42 | 'phonem', 'reth_schek_phonetik'] |
||
| 43 | |||
| 44 | |||
| 45 | def koelner_phonetik(word): |
||
| 46 | """Return the Kölner Phonetik (numeric output) code for a word. |
||
| 47 | |||
| 48 | Based on the algorithm defined by :cite:`Postel:1969`. |
||
| 49 | |||
| 50 | While the output code is numeric, it is still a str because 0s can lead |
||
| 51 | the code. |
||
| 52 | |||
| 53 | :param str word: the word to transform |
||
| 54 | :returns: the Kölner Phonetik value as a numeric string |
||
| 55 | :rtype: str |
||
| 56 | |||
| 57 | >>> koelner_phonetik('Christopher') |
||
| 58 | '478237' |
||
| 59 | >>> koelner_phonetik('Niall') |
||
| 60 | '65' |
||
| 61 | >>> koelner_phonetik('Smith') |
||
| 62 | '862' |
||
| 63 | >>> koelner_phonetik('Schmidt') |
||
| 64 | '862' |
||
| 65 | >>> koelner_phonetik('Müller') |
||
| 66 | '657' |
||
| 67 | >>> koelner_phonetik('Zimmermann') |
||
| 68 | '86766' |
||
| 69 | """ |
||
| 70 | def _after(word, pos, letters): |
||
| 71 | """Return True if word[i] follows one of the supplied letters.""" |
||
| 72 | return pos > 0 and word[pos-1] in letters |
||
| 73 | |||
| 74 | def _before(word, pos, letters): |
||
| 75 | """Return True if word[i] precedes one of the supplied letters.""" |
||
| 76 | return pos+1 < len(word) and word[pos+1] in letters |
||
| 77 | |||
| 78 | _vowels = {'A', 'E', 'I', 'J', 'O', 'U', 'Y'} |
||
| 79 | |||
| 80 | sdx = '' |
||
| 81 | |||
| 82 | word = unicode_normalize('NFKD', text_type(word.upper())) |
||
| 83 | word = word.replace('ß', 'SS') |
||
| 84 | |||
| 85 | word = word.replace('Ä', 'AE') |
||
| 86 | word = word.replace('Ö', 'OE') |
||
| 87 | word = word.replace('Ü', 'UE') |
||
| 88 | word = ''.join(c for c in word if c in |
||
| 89 | {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', |
||
| 90 | 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', |
||
| 91 | 'Y', 'Z'}) |
||
| 92 | |||
| 93 | # Nothing to convert, return base case |
||
| 94 | if not word: |
||
| 95 | return sdx |
||
| 96 | |||
| 97 | for i in range(len(word)): |
||
| 98 | View Code Duplication | if word[i] in _vowels: |
|
|
|
|||
| 99 | sdx += '0' |
||
| 100 | elif word[i] == 'B': |
||
| 101 | sdx += '1' |
||
| 102 | elif word[i] == 'P': |
||
| 103 | if _before(word, i, {'H'}): |
||
| 104 | sdx += '3' |
||
| 105 | else: |
||
| 106 | sdx += '1' |
||
| 107 | elif word[i] in {'D', 'T'}: |
||
| 108 | if _before(word, i, {'C', 'S', 'Z'}): |
||
| 109 | sdx += '8' |
||
| 110 | else: |
||
| 111 | sdx += '2' |
||
| 112 | elif word[i] in {'F', 'V', 'W'}: |
||
| 113 | sdx += '3' |
||
| 114 | elif word[i] in {'G', 'K', 'Q'}: |
||
| 115 | sdx += '4' |
||
| 116 | elif word[i] == 'C': |
||
| 117 | if _after(word, i, {'S', 'Z'}): |
||
| 118 | sdx += '8' |
||
| 119 | elif i == 0: |
||
| 120 | if _before(word, i, {'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', |
||
| 121 | 'X'}): |
||
| 122 | sdx += '4' |
||
| 123 | else: |
||
| 124 | sdx += '8' |
||
| 125 | elif _before(word, i, {'A', 'H', 'K', 'O', 'Q', 'U', 'X'}): |
||
| 126 | sdx += '4' |
||
| 127 | else: |
||
| 128 | sdx += '8' |
||
| 129 | elif word[i] == 'X': |
||
| 130 | if _after(word, i, {'C', 'K', 'Q'}): |
||
| 131 | sdx += '8' |
||
| 132 | else: |
||
| 133 | sdx += '48' |
||
| 134 | elif word[i] == 'L': |
||
| 135 | sdx += '5' |
||
| 136 | elif word[i] in {'M', 'N'}: |
||
| 137 | sdx += '6' |
||
| 138 | elif word[i] == 'R': |
||
| 139 | sdx += '7' |
||
| 140 | elif word[i] in {'S', 'Z'}: |
||
| 141 | sdx += '8' |
||
| 142 | |||
| 143 | sdx = _delete_consecutive_repeats(sdx) |
||
| 144 | |||
| 145 | if sdx: |
||
| 146 | sdx = sdx[:1] + sdx[1:].replace('0', '') |
||
| 147 | |||
| 148 | return sdx |
||
| 149 | |||
| 150 | |||
| 151 | def koelner_phonetik_num_to_alpha(num): |
||
| 152 | """Convert a Kölner Phonetik code from numeric to alphabetic. |
||
| 153 | |||
| 154 | :param str num: a numeric Kölner Phonetik representation (can be a str or |
||
| 155 | an int) |
||
| 156 | :returns: an alphabetic representation of the same word |
||
| 157 | :rtype: str |
||
| 158 | |||
| 159 | >>> koelner_phonetik_num_to_alpha('862') |
||
| 160 | 'SNT' |
||
| 161 | >>> koelner_phonetik_num_to_alpha('657') |
||
| 162 | 'NLR' |
||
| 163 | >>> koelner_phonetik_num_to_alpha('86766') |
||
| 164 | 'SNRNN' |
||
| 165 | """ |
||
| 166 | _koelner_num_translation = dict(zip((ord(_) for _ in '012345678'), |
||
| 167 | 'APTFKLNRS')) |
||
| 168 | num = ''.join(c for c in text_type(num) if c in {'0', '1', '2', '3', '4', |
||
| 169 | '5', '6', '7', '8'}) |
||
| 170 | return num.translate(_koelner_num_translation) |
||
| 171 | |||
| 172 | |||
| 173 | def koelner_phonetik_alpha(word): |
||
| 174 | """Return the Kölner Phonetik (alphabetic output) code for a word. |
||
| 175 | |||
| 176 | :param str word: the word to transform |
||
| 177 | :returns: the Kölner Phonetik value as an alphabetic string |
||
| 178 | :rtype: str |
||
| 179 | |||
| 180 | >>> koelner_phonetik_alpha('Smith') |
||
| 181 | 'SNT' |
||
| 182 | >>> koelner_phonetik_alpha('Schmidt') |
||
| 183 | 'SNT' |
||
| 184 | >>> koelner_phonetik_alpha('Müller') |
||
| 185 | 'NLR' |
||
| 186 | >>> koelner_phonetik_alpha('Zimmermann') |
||
| 187 | 'SNRNN' |
||
| 188 | """ |
||
| 189 | return koelner_phonetik_num_to_alpha(koelner_phonetik(word)) |
||
| 190 | |||
| 191 | |||
| 192 | def phonem(word): |
||
| 193 | """Return the Phonem code for a word. |
||
| 194 | |||
| 195 | Phonem is defined in :cite:`Wilde:1988`. |
||
| 196 | |||
| 197 | This version is based on the Perl implementation documented at |
||
| 198 | :cite:`Wilz:2005`. |
||
| 199 | It includes some enhancements presented in the Java port at |
||
| 200 | :cite:`dcm4che:2011`. |
||
| 201 | |||
| 202 | Phonem is intended chiefly for German names/words. |
||
| 203 | |||
| 204 | :param str word: the word to transform |
||
| 205 | :returns: the Phonem value |
||
| 206 | :rtype: str |
||
| 207 | |||
| 208 | >>> phonem('Christopher') |
||
| 209 | 'CRYSDOVR' |
||
| 210 | >>> phonem('Niall') |
||
| 211 | 'NYAL' |
||
| 212 | >>> phonem('Smith') |
||
| 213 | 'SMYD' |
||
| 214 | >>> phonem('Schmidt') |
||
| 215 | 'CMYD' |
||
| 216 | """ |
||
| 217 | _phonem_substitutions = (('SC', 'C'), ('SZ', 'C'), ('CZ', 'C'), |
||
| 218 | ('TZ', 'C'), ('TS', 'C'), ('KS', 'X'), |
||
| 219 | ('PF', 'V'), ('QU', 'KW'), ('PH', 'V'), |
||
| 220 | ('UE', 'Y'), ('AE', 'E'), ('OE', 'Ö'), |
||
| 221 | ('EI', 'AY'), ('EY', 'AY'), ('EU', 'OY'), |
||
| 222 | ('AU', 'A§'), ('OU', '§')) |
||
| 223 | _phonem_translation = dict(zip((ord(_) for _ in |
||
| 224 | 'ZKGQÇÑßFWPTÁÀÂÃÅÄÆÉÈÊËIJÌÍÎÏÜݧÚÙÛÔÒÓÕØ'), |
||
| 225 | 'CCCCCNSVVBDAAAAAEEEEEEYYYYYYYYUUUUOOOOÖ')) |
||
| 226 | |||
| 227 | word = unicode_normalize('NFC', text_type(word.upper())) |
||
| 228 | for i, j in _phonem_substitutions: |
||
| 229 | word = word.replace(i, j) |
||
| 230 | word = word.translate(_phonem_translation) |
||
| 231 | |||
| 232 | return ''.join(c for c in _delete_consecutive_repeats(word) |
||
| 233 | if c in {'A', 'B', 'C', 'D', 'L', 'M', 'N', 'O', 'R', 'S', |
||
| 234 | 'U', 'V', 'W', 'X', 'Y', 'Ö'}) |
||
| 235 | |||
| 236 | |||
| 237 | def haase_phonetik(word, primary_only=False): |
||
| 238 | """Return the Haase Phonetik (numeric output) code for a word. |
||
| 239 | |||
| 240 | Based on the algorithm described at :cite:`Prante:2015`. |
||
| 241 | |||
| 242 | Based on the original :cite:`Haase:2000`. |
||
| 243 | |||
| 244 | While the output code is numeric, it is nevertheless a str. |
||
| 245 | |||
| 246 | :param str word: the word to transform |
||
| 247 | :param bool primary_only: if True, only the primary code is returned |
||
| 248 | :returns: the Haase Phonetik value as a numeric string |
||
| 249 | :rtype: tuple |
||
| 250 | |||
| 251 | >>> haase_phonetik('Joachim') |
||
| 252 | ('9496',) |
||
| 253 | >>> haase_phonetik('Christoph') |
||
| 254 | ('4798293', '8798293') |
||
| 255 | >>> haase_phonetik('Jörg') |
||
| 256 | ('974',) |
||
| 257 | >>> haase_phonetik('Smith') |
||
| 258 | ('8692',) |
||
| 259 | >>> haase_phonetik('Schmidt') |
||
| 260 | ('8692', '4692') |
||
| 261 | """ |
||
| 262 | def _after(word, i, letters): |
||
| 263 | """Return True if word[i] follows one of the supplied letters.""" |
||
| 264 | if i > 0 and word[i-1] in letters: |
||
| 265 | return True |
||
| 266 | return False |
||
| 267 | |||
| 268 | def _before(word, i, letters): |
||
| 269 | """Return True if word[i] precedes one of the supplied letters.""" |
||
| 270 | if i+1 < len(word) and word[i+1] in letters: |
||
| 271 | return True |
||
| 272 | return False |
||
| 273 | |||
| 274 | _vowels = {'A', 'E', 'I', 'J', 'O', 'U', 'Y'} |
||
| 275 | |||
| 276 | word = unicode_normalize('NFKD', text_type(word.upper())) |
||
| 277 | word = word.replace('ß', 'SS') |
||
| 278 | |||
| 279 | word = word.replace('Ä', 'AE') |
||
| 280 | word = word.replace('Ö', 'OE') |
||
| 281 | word = word.replace('Ü', 'UE') |
||
| 282 | word = ''.join(c for c in word if c in |
||
| 283 | {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', |
||
| 284 | 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', |
||
| 285 | 'Y', 'Z'}) |
||
| 286 | |||
| 287 | variants = [] |
||
| 288 | if primary_only: |
||
| 289 | variants = [word] |
||
| 290 | else: |
||
| 291 | pos = 0 |
||
| 292 | if word[:2] == 'CH': |
||
| 293 | variants.append(('CH', 'SCH')) |
||
| 294 | pos += 2 |
||
| 295 | len_3_vars = {'OWN': 'AUN', 'WSK': 'RSK', 'SCH': 'CH', 'GLI': 'LI', |
||
| 296 | 'AUX': 'O', 'EUX': 'O'} |
||
| 297 | while pos < len(word): |
||
| 298 | if word[pos:pos+4] == 'ILLE': |
||
| 299 | variants.append(('ILLE', 'I')) |
||
| 300 | pos += 4 |
||
| 301 | elif word[pos:pos+3] in len_3_vars: |
||
| 302 | variants.append((word[pos:pos+3], len_3_vars[word[pos:pos+3]])) |
||
| 303 | pos += 3 |
||
| 304 | elif word[pos:pos+2] == 'RB': |
||
| 305 | variants.append(('RB', 'RW')) |
||
| 306 | pos += 2 |
||
| 307 | elif len(word[pos:]) == 3 and word[pos:] == 'EAU': |
||
| 308 | variants.append(('EAU', 'O')) |
||
| 309 | pos += 3 |
||
| 310 | elif len(word[pos:]) == 1 and word[pos:] in {'A', 'O'}: |
||
| 311 | if word[pos:] == 'O': |
||
| 312 | variants.append(('O', 'OW')) |
||
| 313 | else: |
||
| 314 | variants.append(('A', 'AR')) |
||
| 315 | pos += 1 |
||
| 316 | else: |
||
| 317 | variants.append((word[pos],)) |
||
| 318 | pos += 1 |
||
| 319 | |||
| 320 | variants = [''.join(letters) for letters in product(*variants)] |
||
| 321 | |||
| 322 | def _haase_code(word): |
||
| 323 | sdx = '' |
||
| 324 | for i in range(len(word)): |
||
| 325 | View Code Duplication | if word[i] in _vowels: |
|
| 326 | sdx += '9' |
||
| 327 | elif word[i] == 'B': |
||
| 328 | sdx += '1' |
||
| 329 | elif word[i] == 'P': |
||
| 330 | if _before(word, i, {'H'}): |
||
| 331 | sdx += '3' |
||
| 332 | else: |
||
| 333 | sdx += '1' |
||
| 334 | elif word[i] in {'D', 'T'}: |
||
| 335 | if _before(word, i, {'C', 'S', 'Z'}): |
||
| 336 | sdx += '8' |
||
| 337 | else: |
||
| 338 | sdx += '2' |
||
| 339 | elif word[i] in {'F', 'V', 'W'}: |
||
| 340 | sdx += '3' |
||
| 341 | elif word[i] in {'G', 'K', 'Q'}: |
||
| 342 | sdx += '4' |
||
| 343 | elif word[i] == 'C': |
||
| 344 | if _after(word, i, {'S', 'Z'}): |
||
| 345 | sdx += '8' |
||
| 346 | elif i == 0: |
||
| 347 | if _before(word, i, {'A', 'H', 'K', 'L', 'O', 'Q', 'R', |
||
| 348 | 'U', 'X'}): |
||
| 349 | sdx += '4' |
||
| 350 | else: |
||
| 351 | sdx += '8' |
||
| 352 | elif _before(word, i, {'A', 'H', 'K', 'O', 'Q', 'U', 'X'}): |
||
| 353 | sdx += '4' |
||
| 354 | else: |
||
| 355 | sdx += '8' |
||
| 356 | elif word[i] == 'X': |
||
| 357 | if _after(word, i, {'C', 'K', 'Q'}): |
||
| 358 | sdx += '8' |
||
| 359 | else: |
||
| 360 | sdx += '48' |
||
| 361 | elif word[i] == 'L': |
||
| 362 | sdx += '5' |
||
| 363 | elif word[i] in {'M', 'N'}: |
||
| 364 | sdx += '6' |
||
| 365 | elif word[i] == 'R': |
||
| 366 | sdx += '7' |
||
| 367 | elif word[i] in {'S', 'Z'}: |
||
| 368 | sdx += '8' |
||
| 369 | |||
| 370 | sdx = _delete_consecutive_repeats(sdx) |
||
| 371 | |||
| 372 | return sdx |
||
| 373 | |||
| 374 | encoded = tuple(_haase_code(word) for word in variants) |
||
| 375 | if len(encoded) > 1: |
||
| 376 | encoded_set = set() |
||
| 377 | encoded_single = [] |
||
| 378 | for code in encoded: |
||
| 379 | if code not in encoded_set: |
||
| 380 | encoded_set.add(code) |
||
| 381 | encoded_single.append(code) |
||
| 382 | return tuple(encoded_single) |
||
| 383 | |||
| 384 | return encoded |
||
| 385 | |||
| 386 | |||
| 387 | def reth_schek_phonetik(word): |
||
| 388 | """Return Reth-Schek Phonetik code for a word. |
||
| 389 | |||
| 390 | This algorithm is proposed in :cite:`Reth:1977`. |
||
| 391 | |||
| 392 | Since I couldn't secure a copy of that document (maybe I'll look for it |
||
| 393 | next time I'm in Germany), this implementation is based on what I could |
||
| 394 | glean from the implementations published by German Record Linkage |
||
| 395 | Center (www.record-linkage.de): |
||
| 396 | |||
| 397 | - Privacy-preserving Record Linkage (PPRL) (in R) :cite:`Rukasz:2018` |
||
| 398 | - Merge ToolBox (in Java) :cite:`Schnell:2004` |
||
| 399 | |||
| 400 | Rules that are unclear: |
||
| 401 | |||
| 402 | - Should 'C' become 'G' or 'Z'? (PPRL has both, 'Z' rule blocked) |
||
| 403 | - Should 'CC' become 'G'? (PPRL has blocked 'CK' that may be typo) |
||
| 404 | - Should 'TUI' -> 'ZUI' rule exist? (PPRL has rule, but I can't |
||
| 405 | think of a German word with '-tui-' in it.) |
||
| 406 | - Should we really change 'SCH' -> 'CH' and then 'CH' -> 'SCH'? |
||
| 407 | |||
| 408 | :param str word: the word to transform |
||
| 409 | :returns: the Reth-Schek Phonetik code |
||
| 410 | :rtype: str |
||
| 411 | |||
| 412 | >>> reth_schek_phonetik('Joachim') |
||
| 413 | 'JOAGHIM' |
||
| 414 | >>> reth_schek_phonetik('Christoph') |
||
| 415 | 'GHRISDOF' |
||
| 416 | >>> reth_schek_phonetik('Jörg') |
||
| 417 | 'JOERG' |
||
| 418 | >>> reth_schek_phonetik('Smith') |
||
| 419 | 'SMID' |
||
| 420 | >>> reth_schek_phonetik('Schmidt') |
||
| 421 | 'SCHMID' |
||
| 422 | """ |
||
| 423 | replacements = {3: {'AEH': 'E', 'IEH': 'I', 'OEH': 'OE', 'UEH': 'UE', |
||
| 424 | 'SCH': 'CH', 'ZIO': 'TIO', 'TIU': 'TIO', 'ZIU': 'TIO', |
||
| 425 | 'CHS': 'X', 'CKS': 'X', 'AEU': 'OI'}, |
||
| 426 | 2: {'LL': 'L', 'AA': 'A', 'AH': 'A', 'BB': 'B', 'PP': 'B', |
||
| 427 | 'BP': 'B', 'PB': 'B', 'DD': 'D', 'DT': 'D', 'TT': 'D', |
||
| 428 | 'TH': 'D', 'EE': 'E', 'EH': 'E', 'AE': 'E', 'FF': 'F', |
||
| 429 | 'PH': 'F', 'KK': 'K', 'GG': 'G', 'GK': 'G', 'KG': 'G', |
||
| 430 | 'CK': 'G', 'CC': 'C', 'IE': 'I', 'IH': 'I', 'MM': 'M', |
||
| 431 | 'NN': 'N', 'OO': 'O', 'OH': 'O', 'SZ': 'S', 'UH': 'U', |
||
| 432 | 'GS': 'X', 'KS': 'X', 'TZ': 'Z', 'AY': 'AI', |
||
| 433 | 'EI': 'AI', 'EY': 'AI', 'EU': 'OI', 'RR': 'R', |
||
| 434 | 'SS': 'S', 'KW': 'QU'}, |
||
| 435 | 1: {'P': 'B', 'T': 'D', 'V': 'F', 'W': 'F', 'C': 'G', |
||
| 436 | 'K': 'G', 'Y': 'I'}} |
||
| 437 | |||
| 438 | # Uppercase |
||
| 439 | word = word.upper() |
||
| 440 | |||
| 441 | # Replace umlauts/eszett |
||
| 442 | word = word.replace('Ä', 'AE') |
||
| 443 | word = word.replace('Ö', 'OE') |
||
| 444 | word = word.replace('Ü', 'UE') |
||
| 445 | word = word.replace('ß', 'SS') |
||
| 446 | |||
| 447 | # Main loop, using above replacements table |
||
| 448 | pos = 0 |
||
| 449 | while pos < len(word): |
||
| 450 | for num in range(3, 0, -1): |
||
| 451 | if word[pos:pos+num] in replacements[num]: |
||
| 452 | word = (word[:pos] + replacements[num][word[pos:pos+num]] |
||
| 453 | + word[pos+num:]) |
||
| 454 | pos += 1 |
||
| 455 | break |
||
| 456 | else: |
||
| 457 | pos += 1 # Advance if nothing is recognized |
||
| 458 | |||
| 459 | # Change 'CH' back(?) to 'SCH' |
||
| 460 | word = word.replace('CH', 'SCH') |
||
| 461 | |||
| 462 | # Replace final sequences |
||
| 463 | if word[-2:] == 'ER': |
||
| 464 | word = word[:-2]+'R' |
||
| 465 | elif word[-2:] == 'EL': |
||
| 466 | word = word[:-2]+'L' |
||
| 467 | elif word[-1:] == 'H': |
||
| 468 | word = word[:-1] |
||
| 469 | |||
| 470 | return word |
||
| 471 | |||
| 472 | |||
| 473 | if __name__ == '__main__': |
||
| 474 | import doctest |
||
| 475 | doctest.testmod() |
||
| 476 |