abydos.phonetic.sv.sfinxbis() - Code Metrics - Inspection of "Merge pull request #120 from chrislit/modularize" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Test Failed

Push — master ( 64abe2...a464fa )

by Chris

created 2018-10-19 22:32 UTC

abydos.phonetic.sv.sfinxbis() F

↳ Parent: abydos.phonetic.sv

Complexity

Conditions

Size

Total Lines	174
Code Lines	103

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	32
eloc	103
nop	2
dl	0
loc	174
rs	0
c	0
b	0
f	0

How to fix Long Method Complexity

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic.sv.

The phonetic.sv module implements phonetic algorithms for Scandinavian names
& languages (currently Swedish & Norwegian), including:

    - SfinxBis
    - Norphone
"""

from __future__ import unicode_literals

from unicodedata import normalize as unicode_normalize

from six import text_type

from . import _delete_consecutive_repeats

__all__ = ['norphone', 'sfinxbis']


def sfinxbis(word, max_length=-1):
    """Return the SfinxBis code for a word.

    SfinxBis is a Soundex-like algorithm defined in :cite:`Axelsson:2009`.

    This implementation follows the reference implementation:
    :cite:`Sjoo:2009`.

    SfinxBis is intended chiefly for Swedish names.

    :param str word: the word to transform
    :param int max_length: the length of the code returned (defaults to
        unlimited)
    :returns: the SfinxBis value
    :rtype: tuple

    >>> sfinxbis('Christopher')
    ('K68376',)
    >>> sfinxbis('Niall')
    ('N4',)
    >>> sfinxbis('Smith')
    ('S53',)
    >>> sfinxbis('Schmidt')
    ('S53',)

    >>> sfinxbis('Johansson')
    ('J585',)
    >>> sfinxbis('Sjöberg')
    ('#162',)
    """
    adelstitler = (' DE LA ', ' DE LAS ', ' DE LOS ', ' VAN DE ', ' VAN DEN ',
                   ' VAN DER ', ' VON DEM ', ' VON DER ',
                   ' AF ', ' AV ', ' DA ', ' DE ', ' DEL ', ' DEN ', ' DES ',
                   ' DI ', ' DO ', ' DON ', ' DOS ', ' DU ', ' E ', ' IN ',
                   ' LA ', ' LE ', ' MAC ', ' MC ', ' VAN ', ' VON ', ' Y ',
                   ' S:T ')

    _harde_vokaler = {'A', 'O', 'U', 'Å'}
    _mjuka_vokaler = {'E', 'I', 'Y', 'Ä', 'Ö'}
    _konsonanter = {'B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P',
                    'Q', 'R', 'S', 'T', 'V', 'W', 'X', 'Z'}
    _alfabet = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
                'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
                'Y', 'Z', 'Ä', 'Å', 'Ö'}

    _sfinxbis_translation = dict(zip((ord(_) for _ in

                                      'BCDFGHJKLMNPQRSTVZAOUÅEIYÄÖ'),
                                     '123729224551268378999999999'))

    _sfinxbis_substitutions = dict(zip((ord(_) for _ in
                                        'WZÀÁÂÃÆÇÈÉÊËÌÍÎÏÑÒÓÔÕØÙÚÛÜÝ'),
                                       'VSAAAAÄCEEEEIIIINOOOOÖUUUYY'))

    def _foersvensker(lokal_ordet):
        """Return the Swedish-ized form of the word."""
        lokal_ordet = lokal_ordet.replace('STIERN', 'STJÄRN')
        lokal_ordet = lokal_ordet.replace('HIE', 'HJ')
        lokal_ordet = lokal_ordet.replace('SIÖ', 'SJÖ')
        lokal_ordet = lokal_ordet.replace('SCH', 'SH')
        lokal_ordet = lokal_ordet.replace('QU', 'KV')
        lokal_ordet = lokal_ordet.replace('IO', 'JO')
        lokal_ordet = lokal_ordet.replace('PH', 'F')

        for i in _harde_vokaler:
            lokal_ordet = lokal_ordet.replace(i+'Ü', i+'J')
            lokal_ordet = lokal_ordet.replace(i+'Y', i+'J')
            lokal_ordet = lokal_ordet.replace(i+'I', i+'J')
        for i in _mjuka_vokaler:
            lokal_ordet = lokal_ordet.replace(i+'Ü', i+'J')
            lokal_ordet = lokal_ordet.replace(i+'Y', i+'J')
            lokal_ordet = lokal_ordet.replace(i+'I', i+'J')

        if 'H' in lokal_ordet:
            for i in _konsonanter:
                lokal_ordet = lokal_ordet.replace('H'+i, i)

        lokal_ordet = lokal_ordet.translate(_sfinxbis_substitutions)

        lokal_ordet = lokal_ordet.replace('Ð', 'ETH')
        lokal_ordet = lokal_ordet.replace('Þ', 'TH')
        lokal_ordet = lokal_ordet.replace('ß', 'SS')

        return lokal_ordet

    def _koda_foersta_ljudet(lokal_ordet):
        """Return the word with the first sound coded."""
        if (lokal_ordet[0:1] in _mjuka_vokaler or
                lokal_ordet[0:1] in _harde_vokaler):
            lokal_ordet = '$' + lokal_ordet[1:]
        elif lokal_ordet[0:2] in ('DJ', 'GJ', 'HJ', 'LJ'):
            lokal_ordet = 'J' + lokal_ordet[2:]
        elif lokal_ordet[0:1] == 'G' and lokal_ordet[1:2] in _mjuka_vokaler:
            lokal_ordet = 'J' + lokal_ordet[1:]
        elif lokal_ordet[0:1] == 'Q':
            lokal_ordet = 'K' + lokal_ordet[1:]
        elif (lokal_ordet[0:2] == 'CH' and
              lokal_ordet[2:3] in frozenset(_mjuka_vokaler | _harde_vokaler)):
            lokal_ordet = '#' + lokal_ordet[2:]
        elif lokal_ordet[0:1] == 'C' and lokal_ordet[1:2] in _harde_vokaler:
            lokal_ordet = 'K' + lokal_ordet[1:]
        elif lokal_ordet[0:1] == 'C' and lokal_ordet[1:2] in _konsonanter:
            lokal_ordet = 'K' + lokal_ordet[1:]
        elif lokal_ordet[0:1] == 'X':
            lokal_ordet = 'S' + lokal_ordet[1:]
        elif lokal_ordet[0:1] == 'C' and lokal_ordet[1:2] in _mjuka_vokaler:
            lokal_ordet = 'S' + lokal_ordet[1:]
        elif lokal_ordet[0:3] in ('SKJ', 'STJ', 'SCH'):
            lokal_ordet = '#' + lokal_ordet[3:]
        elif lokal_ordet[0:2] in ('SH', 'KJ', 'TJ', 'SJ'):
            lokal_ordet = '#' + lokal_ordet[2:]
        elif lokal_ordet[0:2] == 'SK' and lokal_ordet[2:3] in _mjuka_vokaler:
            lokal_ordet = '#' + lokal_ordet[2:]
        elif lokal_ordet[0:1] == 'K' and lokal_ordet[1:2] in _mjuka_vokaler:
            lokal_ordet = '#' + lokal_ordet[1:]
        return lokal_ordet

    # Steg 1, Versaler
    word = unicode_normalize('NFC', text_type(word.upper()))
    word = word.replace('ß', 'SS')
    word = word.replace('-', ' ')

    # Steg 2, Ta bort adelsprefix
    for adelstitel in adelstitler:
        while adelstitel in word:
            word = word.replace(adelstitel, ' ')
        if word.startswith(adelstitel[1:]):
            word = word[len(adelstitel)-1:]

    # Split word into tokens
    ordlista = word.split()

    # Steg 3, Ta bort dubbelteckning i början på namnet
    ordlista = [_delete_consecutive_repeats(ordet) for ordet in ordlista]
    if not ordlista:
        # noinspection PyRedundantParentheses
        return ('',)

    # Steg 4, Försvenskning
    ordlista = [_foersvensker(ordet) for ordet in ordlista]

    # Steg 5, Ta bort alla tecken som inte är A-Ö (65-90,196,197,214)
    ordlista = [''.join(c for c in ordet if c in _alfabet)
                for ordet in ordlista]

    # Steg 6, Koda första ljudet
    ordlista = [_koda_foersta_ljudet(ordet) for ordet in ordlista]

    # Steg 7, Dela upp namnet i två delar
    rest = [ordet[1:] for ordet in ordlista]

    # Steg 8, Utför fonetisk transformation i resten
    rest = [ordet.replace('DT', 'T') for ordet in rest]
    rest = [ordet.replace('X', 'KS') for ordet in rest]

    # Steg 9, Koda resten till en sifferkod
    for vokal in _mjuka_vokaler:
        rest = [ordet.replace('C'+vokal, '8'+vokal) for ordet in rest]
    rest = [ordet.translate(_sfinxbis_translation) for ordet in rest]

    # Steg 10, Ta bort intilliggande dubbletter
    rest = [_delete_consecutive_repeats(ordet) for ordet in rest]

    # Steg 11, Ta bort alla "9"
    rest = [ordet.replace('9', '') for ordet in rest]

    # Steg 12, Sätt ihop delarna igen
    ordlista = [''.join(ordet) for ordet in
                zip((_[0:1] for _ in ordlista), rest)]

    # truncate, if max_length is set
    if max_length > 0:
        ordlista = [ordet[:max_length] for ordet in ordlista]

    return tuple(ordlista)


def norphone(word):
    """Return the Norphone code.

    The reference implementation by Lars Marius Garshol is available in
    :cite:`Garshol:2015`.

    Norphone was designed for Norwegian, but this implementation has been
    extended to support Swedish vowels as well. This function incorporates
    the "not implemented" rules from the above file's rule set.

    :param str word: the word to transform
    :returns: the Norphone code
    :rtype: str

    >>> norphone('Hansen')
    'HNSN'
    >>> norphone('Larsen')
    'LRSN'
    >>> norphone('Aagaard')
    'ÅKRT'
    >>> norphone('Braaten')
    'BRTN'
    >>> norphone('Sandvik')
    'SNVK'
    """
    _vowels = {'A', 'E', 'I', 'O', 'U', 'Y', 'Å', 'Æ', 'Ø', 'Ä', 'Ö'}

    replacements = {4: {'SKEI': 'X'},
                    3: {'SKJ': 'X', 'KEI': 'X'},
                    2: {'CH': 'K', 'CK': 'K', 'GJ': 'J', 'GH': 'K', 'HG': 'K',
                        'HJ': 'J', 'HL': 'L', 'HR': 'R', 'KJ': 'X', 'KI': 'X',
                        'LD': 'L', 'ND': 'N', 'PH': 'F', 'TH': 'T', 'SJ': 'X'},
                    1: {'W': 'V', 'X': 'KS', 'Z': 'S', 'D': 'T', 'G': 'K'}}

    word = word.upper()

    code = ''
    skip = 0

    if word[0:2] == 'AA':
        code = 'Å'
        skip = 2
    elif word[0:2] == 'GI':
        code = 'J'
        skip = 2
    elif word[0:3] == 'SKY':
        code = 'X'
        skip = 3
    elif word[0:2] == 'EI':
        code = 'Æ'
        skip = 2
    elif word[0:2] == 'KY':
        code = 'X'
        skip = 2
    elif word[:1] == 'C':
        code = 'K'
        skip = 1
    elif word[:1] == 'Ä':
        code = 'Æ'
        skip = 1
    elif word[:1] == 'Ö':
        code = 'Ø'
        skip = 1

    if word[-2:] == 'DT':
        word = word[:-2]+'T'
    # Though the rules indicate this rule applies in all positions, the
    # reference implementation indicates it applies only in final position.
    elif word[-2:-1] in _vowels and word[-1:] == 'D':
        word = word[:-2]

    for pos, char in enumerate(word):
        if skip:
            skip -= 1
        else:
            for length in sorted(replacements, reverse=True):
                if word[pos:pos+length] in replacements[length]:
                    code += replacements[length][word[pos:pos+length]]
                    skip = length-1
                    break
            else:
                if not pos or char not in _vowels:
                    code += char

    code = _delete_consecutive_repeats(code)

    return code


if __name__ == '__main__':
    import doctest
    doctest.testmod()


1			# -- coding: utf-8 --
2
3			# Copyright 2014-2018 by Christopher C. Little.
4			# This file is part of Abydos.
5			#
6			# Abydos is free software: you can redistribute it and/or modify
7			# it under the terms of the GNU General Public License as published by
8			# the Free Software Foundation, either version 3 of the License, or
9			# (at your option) any later version.
10			#
11			# Abydos is distributed in the hope that it will be useful,
12			# but WITHOUT ANY WARRANTY; without even the implied warranty of
13			# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14			# GNU General Public License for more details.
15			#
16			# You should have received a copy of the GNU General Public License
17			# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19			"""abydos.phonetic.sv.
20
21			The phonetic.sv module implements phonetic algorithms for Scandinavian names
22			& languages (currently Swedish & Norwegian), including:
23
24			- SfinxBis
25			- Norphone
26			"""
27
28			from __future__ import unicode_literals
29
30			from unicodedata import normalize as unicode_normalize
31
32			from six import text_type
33
34			from . import _delete_consecutive_repeats
35
36			__all__ = ['norphone', 'sfinxbis']
37
38
39			def sfinxbis(word, max_length=-1):
40			"""Return the SfinxBis code for a word.
41
42			SfinxBis is a Soundex-like algorithm defined in :cite:`Axelsson:2009`.
43
44			This implementation follows the reference implementation:
45			:cite:`Sjoo:2009`.
46
47			SfinxBis is intended chiefly for Swedish names.
48
49			:param str word: the word to transform
50			:param int max_length: the length of the code returned (defaults to
51			unlimited)
52			:returns: the SfinxBis value
53			:rtype: tuple
54
55			>>> sfinxbis('Christopher')
56			('K68376',)
57			>>> sfinxbis('Niall')
58			('N4',)
59			>>> sfinxbis('Smith')
60			('S53',)
61			>>> sfinxbis('Schmidt')
62			('S53',)
63
64			>>> sfinxbis('Johansson')
65			('J585',)
66			>>> sfinxbis('Sjöberg')
67			('#162',)
68			"""
69			adelstitler = (' DE LA ', ' DE LAS ', ' DE LOS ', ' VAN DE ', ' VAN DEN ',
70			' VAN DER ', ' VON DEM ', ' VON DER ',
71			' AF ', ' AV ', ' DA ', ' DE ', ' DEL ', ' DEN ', ' DES ',
72			' DI ', ' DO ', ' DON ', ' DOS ', ' DU ', ' E ', ' IN ',
73			' LA ', ' LE ', ' MAC ', ' MC ', ' VAN ', ' VON ', ' Y ',
74			' S:T ')
75
76			_harde_vokaler = {'A', 'O', 'U', 'Å'}
77			_mjuka_vokaler = {'E', 'I', 'Y', 'Ä', 'Ö'}
78			_konsonanter = {'B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P',
79			'Q', 'R', 'S', 'T', 'V', 'W', 'X', 'Z'}
80			_alfabet = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
81			'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
82			'Y', 'Z', 'Ä', 'Å', 'Ö'}
83
84			_sfinxbis_translation = dict(zip((ord(_) for _ in
			0 ignored issues – show Comprehensibility Best Practice introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report The variable `_` does not seem to be defined. Loading history...
85			'BCDFGHJKLMNPQRSTVZAOUÅEIYÄÖ'),
86			'123729224551268378999999999'))
87
88			_sfinxbis_substitutions = dict(zip((ord(_) for _ in
89			'WZÀÁÂÃÆÇÈÉÊËÌÍÎÏÑÒÓÔÕØÙÚÛÜÝ'),
90			'VSAAAAÄCEEEEIIIINOOOOÖUUUYY'))
91
92			def _foersvensker(lokal_ordet):
93			"""Return the Swedish-ized form of the word."""
94			lokal_ordet = lokal_ordet.replace('STIERN', 'STJÄRN')
95			lokal_ordet = lokal_ordet.replace('HIE', 'HJ')
96			lokal_ordet = lokal_ordet.replace('SIÖ', 'SJÖ')
97			lokal_ordet = lokal_ordet.replace('SCH', 'SH')
98			lokal_ordet = lokal_ordet.replace('QU', 'KV')
99			lokal_ordet = lokal_ordet.replace('IO', 'JO')
100			lokal_ordet = lokal_ordet.replace('PH', 'F')
101
102			for i in _harde_vokaler:
103			lokal_ordet = lokal_ordet.replace(i+'Ü', i+'J')
104			lokal_ordet = lokal_ordet.replace(i+'Y', i+'J')
105			lokal_ordet = lokal_ordet.replace(i+'I', i+'J')
106			for i in _mjuka_vokaler:
107			lokal_ordet = lokal_ordet.replace(i+'Ü', i+'J')
108			lokal_ordet = lokal_ordet.replace(i+'Y', i+'J')
109			lokal_ordet = lokal_ordet.replace(i+'I', i+'J')
110
111			if 'H' in lokal_ordet:
112			for i in _konsonanter:
113			lokal_ordet = lokal_ordet.replace('H'+i, i)
114
115			lokal_ordet = lokal_ordet.translate(_sfinxbis_substitutions)
116
117			lokal_ordet = lokal_ordet.replace('Ð', 'ETH')
118			lokal_ordet = lokal_ordet.replace('Þ', 'TH')
119			lokal_ordet = lokal_ordet.replace('ß', 'SS')
120
121			return lokal_ordet
122
123			def _koda_foersta_ljudet(lokal_ordet):
124			"""Return the word with the first sound coded."""
125			if (lokal_ordet[0:1] in _mjuka_vokaler or
126			lokal_ordet[0:1] in _harde_vokaler):
127			lokal_ordet = '$' + lokal_ordet[1:]
128			elif lokal_ordet[0:2] in ('DJ', 'GJ', 'HJ', 'LJ'):
129			lokal_ordet = 'J' + lokal_ordet[2:]
130			elif lokal_ordet[0:1] == 'G' and lokal_ordet[1:2] in _mjuka_vokaler:
131			lokal_ordet = 'J' + lokal_ordet[1:]
132			elif lokal_ordet[0:1] == 'Q':
133			lokal_ordet = 'K' + lokal_ordet[1:]
134			elif (lokal_ordet[0:2] == 'CH' and
135			lokal_ordet[2:3] in frozenset(_mjuka_vokaler \| _harde_vokaler)):
136			lokal_ordet = '#' + lokal_ordet[2:]
137			elif lokal_ordet[0:1] == 'C' and lokal_ordet[1:2] in _harde_vokaler:
138			lokal_ordet = 'K' + lokal_ordet[1:]
139			elif lokal_ordet[0:1] == 'C' and lokal_ordet[1:2] in _konsonanter:
140			lokal_ordet = 'K' + lokal_ordet[1:]
141			elif lokal_ordet[0:1] == 'X':
142			lokal_ordet = 'S' + lokal_ordet[1:]
143			elif lokal_ordet[0:1] == 'C' and lokal_ordet[1:2] in _mjuka_vokaler:
144			lokal_ordet = 'S' + lokal_ordet[1:]
145			elif lokal_ordet[0:3] in ('SKJ', 'STJ', 'SCH'):
146			lokal_ordet = '#' + lokal_ordet[3:]
147			elif lokal_ordet[0:2] in ('SH', 'KJ', 'TJ', 'SJ'):
148			lokal_ordet = '#' + lokal_ordet[2:]
149			elif lokal_ordet[0:2] == 'SK' and lokal_ordet[2:3] in _mjuka_vokaler:
150			lokal_ordet = '#' + lokal_ordet[2:]
151			elif lokal_ordet[0:1] == 'K' and lokal_ordet[1:2] in _mjuka_vokaler:
152			lokal_ordet = '#' + lokal_ordet[1:]
153			return lokal_ordet
154
155			# Steg 1, Versaler
156			word = unicode_normalize('NFC', text_type(word.upper()))
157			word = word.replace('ß', 'SS')
158			word = word.replace('-', ' ')
159
160			# Steg 2, Ta bort adelsprefix
161			for adelstitel in adelstitler:
162			while adelstitel in word:
163			word = word.replace(adelstitel, ' ')
164			if word.startswith(adelstitel[1:]):
165			word = word[len(adelstitel)-1:]
166
167			# Split word into tokens
168			ordlista = word.split()
169
170			# Steg 3, Ta bort dubbelteckning i början på namnet
171			ordlista = [_delete_consecutive_repeats(ordet) for ordet in ordlista]
172			if not ordlista:
173			# noinspection PyRedundantParentheses
174			return ('',)
175
176			# Steg 4, Försvenskning
177			ordlista = [_foersvensker(ordet) for ordet in ordlista]
178
179			# Steg 5, Ta bort alla tecken som inte är A-Ö (65-90,196,197,214)
180			ordlista = [''.join(c for c in ordet if c in _alfabet)
181			for ordet in ordlista]
182
183			# Steg 6, Koda första ljudet
184			ordlista = [_koda_foersta_ljudet(ordet) for ordet in ordlista]
185
186			# Steg 7, Dela upp namnet i två delar
187			rest = [ordet[1:] for ordet in ordlista]
188
189			# Steg 8, Utför fonetisk transformation i resten
190			rest = [ordet.replace('DT', 'T') for ordet in rest]
191			rest = [ordet.replace('X', 'KS') for ordet in rest]
192
193			# Steg 9, Koda resten till en sifferkod
194			for vokal in _mjuka_vokaler:
195			rest = [ordet.replace('C'+vokal, '8'+vokal) for ordet in rest]
196			rest = [ordet.translate(_sfinxbis_translation) for ordet in rest]
197
198			# Steg 10, Ta bort intilliggande dubbletter
199			rest = [_delete_consecutive_repeats(ordet) for ordet in rest]
200
201			# Steg 11, Ta bort alla "9"
202			rest = [ordet.replace('9', '') for ordet in rest]
203
204			# Steg 12, Sätt ihop delarna igen
205			ordlista = [''.join(ordet) for ordet in
206			zip((_[0:1] for _ in ordlista), rest)]
207
208			# truncate, if max_length is set
209			if max_length > 0:
210			ordlista = [ordet[:max_length] for ordet in ordlista]
211
212			return tuple(ordlista)
213
214
215			def norphone(word):
216			"""Return the Norphone code.
217
218			The reference implementation by Lars Marius Garshol is available in
219			:cite:`Garshol:2015`.
220
221			Norphone was designed for Norwegian, but this implementation has been
222			extended to support Swedish vowels as well. This function incorporates
223			the "not implemented" rules from the above file's rule set.
224
225			:param str word: the word to transform
226			:returns: the Norphone code
227			:rtype: str
228
229			>>> norphone('Hansen')
230			'HNSN'
231			>>> norphone('Larsen')
232			'LRSN'
233			>>> norphone('Aagaard')
234			'ÅKRT'
235			>>> norphone('Braaten')
236			'BRTN'
237			>>> norphone('Sandvik')
238			'SNVK'
239			"""
240			_vowels = {'A', 'E', 'I', 'O', 'U', 'Y', 'Å', 'Æ', 'Ø', 'Ä', 'Ö'}
241
242			replacements = {4: {'SKEI': 'X'},
243			3: {'SKJ': 'X', 'KEI': 'X'},
244			2: {'CH': 'K', 'CK': 'K', 'GJ': 'J', 'GH': 'K', 'HG': 'K',
245			'HJ': 'J', 'HL': 'L', 'HR': 'R', 'KJ': 'X', 'KI': 'X',
246			'LD': 'L', 'ND': 'N', 'PH': 'F', 'TH': 'T', 'SJ': 'X'},
247			1: {'W': 'V', 'X': 'KS', 'Z': 'S', 'D': 'T', 'G': 'K'}}
248
249			word = word.upper()
250
251			code = ''
252			skip = 0
253
254			if word[0:2] == 'AA':
255			code = 'Å'
256			skip = 2
257			elif word[0:2] == 'GI':
258			code = 'J'
259			skip = 2
260			elif word[0:3] == 'SKY':
261			code = 'X'
262			skip = 3
263			elif word[0:2] == 'EI':
264			code = 'Æ'
265			skip = 2
266			elif word[0:2] == 'KY':
267			code = 'X'
268			skip = 2
269			elif word[:1] == 'C':
270			code = 'K'
271			skip = 1
272			elif word[:1] == 'Ä':
273			code = 'Æ'
274			skip = 1
275			elif word[:1] == 'Ö':
276			code = 'Ø'
277			skip = 1
278
279			if word[-2:] == 'DT':
280			word = word[:-2]+'T'
281			# Though the rules indicate this rule applies in all positions, the
282			# reference implementation indicates it applies only in final position.
283			elif word[-2:-1] in _vowels and word[-1:] == 'D':
284			word = word[:-2]
285
286			for pos, char in enumerate(word):
287			if skip:
288			skip -= 1
289			else:
290			for length in sorted(replacements, reverse=True):
291			if word[pos:pos+length] in replacements[length]:
292			code += replacements[length][word[pos:pos+length]]
293			skip = length-1
294			break
295			else:
296			if not pos or char not in _vowels:
297			code += char
298
299			code = _delete_consecutive_repeats(code)
300
301			return code
302
303
304			if __name__ == '__main__':
305			import doctest
306			doctest.testmod()
307

chrislit / abydos

Push — master ( 64abe2...a464fa )

abydos.phonetic.sv.sfinxbis() F

Complexity

Size

Duplication

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like