abydos.phonetic.nysiis.nysiis() - Code Metrics - Inspection of "Merge pull request #120 from chrislit/modularize" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Test Failed

Push — master ( 64abe2...a464fa )

by Chris

created 2018-10-19 22:32 UTC

abydos.phonetic.nysiis.nysiis() F

↳ Parent: abydos.phonetic.nysiis

Complexity

Conditions

Size

Total Lines	163
Code Lines	112

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	66
eloc	112
nop	3
dl	0
loc	163
rs	0
c	0
b	0
f	0

How to fix Long Method Complexity

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic.nysiis.

The phonetic.nysiis module implements New York State Identification and
Intelligence System (NYSIIS) phonetic encoding.
"""

from __future__ import unicode_literals

from six.moves import range

from . import _delete_consecutive_repeats

__all__ = ['nysiis']


def nysiis(word, max_length=6, modified=False):
    """Return the NYSIIS code for a word.

    The New York State Identification and Intelligence System algorithm is
    defined in :cite:`Taft:1970`.

    The modified version of this algorithm is described in Appendix B of
    :cite:`Lynch:1977`.

    :param str word: the word to transform
    :param int max_length: the maximum length (default 6) of the code to return
    :param bool modified: indicates whether to use USDA modified NYSIIS
    :returns: the NYSIIS value
    :rtype: str

    >>> nysiis('Christopher')
    'CRASTA'
    >>> nysiis('Niall')
    'NAL'
    >>> nysiis('Smith')
    'SNAT'
    >>> nysiis('Schmidt')
    'SNAD'

    >>> nysiis('Christopher', max_length=-1)
    'CRASTAFAR'

    >>> nysiis('Christopher', max_length=8, modified=True)
    'CRASTAFA'
    >>> nysiis('Niall', max_length=8, modified=True)
    'NAL'
    >>> nysiis('Smith', max_length=8, modified=True)
    'SNAT'
    >>> nysiis('Schmidt', max_length=8, modified=True)
    'SNAD'
    """
    # Require a max_length of at least 6
    if max_length > -1:
        max_length = max(6, max_length)

    _vowels = {'A', 'E', 'I', 'O', 'U'}

    word = ''.join(c for c in word.upper() if c.isalpha())
    word = word.replace('ß', 'SS')

    # exit early if there are no alphas
    if not word:
        return ''

    original_first_char = word[0]

    if word[:3] == 'MAC':
        word = 'MCC'+word[3:]
    elif word[:2] == 'KN':
        word = 'NN'+word[2:]
    elif word[:1] == 'K':
        word = 'C'+word[1:]
    elif word[:2] in {'PH', 'PF'}:
        word = 'FF'+word[2:]
    elif word[:3] == 'SCH':
        word = 'SSS'+word[3:]
    elif modified:
        if word[:2] == 'WR':
            word = 'RR'+word[2:]
        elif word[:2] == 'RH':
            word = 'RR'+word[2:]
        elif word[:2] == 'DG':
            word = 'GG'+word[2:]
        elif word[:1] in _vowels:
            word = 'A'+word[1:]

    if modified and word[-1:] in {'S', 'Z'}:
        word = word[:-1]

    if word[-2:] == 'EE' or word[-2:] == 'IE' or (modified and
                                                  word[-2:] == 'YE'):
        word = word[:-2]+'Y'
    elif word[-2:] in {'DT', 'RT', 'RD'}:
        word = word[:-2]+'D'
    elif word[-2:] in {'NT', 'ND'}:
        word = word[:-2]+('N' if modified else 'D')
    elif modified:
        if word[-2:] == 'IX':
            word = word[:-2]+'ICK'
        elif word[-2:] == 'EX':
            word = word[:-2]+'ECK'
        elif word[-2:] in {'JR', 'SR'}:
            return 'ERROR'

    key = word[:1]

    skip = 0
    for i in range(1, len(word)):
        if i >= len(word):
            continue
        elif skip:
            skip -= 1
            continue
        elif word[i:i+2] == 'EV':
            word = word[:i] + 'AF' + word[i+2:]
            skip = 1
        elif word[i] in _vowels:
            word = word[:i] + 'A' + word[i+1:]
        elif modified and i != len(word)-1 and word[i] == 'Y':
            word = word[:i] + 'A' + word[i+1:]
        elif word[i] == 'Q':
            word = word[:i] + 'G' + word[i+1:]
        elif word[i] == 'Z':
            word = word[:i] + 'S' + word[i+1:]
        elif word[i] == 'M':
            word = word[:i] + 'N' + word[i+1:]
        elif word[i:i+2] == 'KN':
            word = word[:i] + 'N' + word[i+2:]
        elif word[i] == 'K':
            word = word[:i] + 'C' + word[i+1:]
        elif modified and i == len(word)-3 and word[i:i+3] == 'SCH':
            word = word[:i] + 'SSA'
            skip = 2
        elif word[i:i+3] == 'SCH':
            word = word[:i] + 'SSS' + word[i+3:]
            skip = 2
        elif modified and i == len(word)-2 and word[i:i+2] == 'SH':
            word = word[:i] + 'SA'
            skip = 1
        elif word[i:i+2] == 'SH':
            word = word[:i] + 'SS' + word[i+2:]
            skip = 1
        elif word[i:i+2] == 'PH':
            word = word[:i] + 'FF' + word[i+2:]
            skip = 1
        elif modified and word[i:i+3] == 'GHT':
            word = word[:i] + 'TTT' + word[i+3:]
            skip = 2
        elif modified and word[i:i+2] == 'DG':
            word = word[:i] + 'GG' + word[i+2:]
            skip = 1
        elif modified and word[i:i+2] == 'WR':
            word = word[:i] + 'RR' + word[i+2:]
            skip = 1
        elif word[i] == 'H' and (word[i-1] not in _vowels or
                                 word[i+1:i+2] not in _vowels):
            word = word[:i] + word[i-1] + word[i+1:]
        elif word[i] == 'W' and word[i-1] in _vowels:
            word = word[:i] + word[i-1] + word[i+1:]

        if word[i:i+skip+1] != key[-1:]:
            key += word[i:i+skip+1]

    key = _delete_consecutive_repeats(key)

    if key[-1:] == 'S':
        key = key[:-1]
    if key[-2:] == 'AY':
        key = key[:-2] + 'Y'
    if key[-1:] == 'A':
        key = key[:-1]
    if modified and key[:1] == 'A':
        key = original_first_char + key[1:]

    if max_length > 0:
        key = key[:max_length]

    return key


if __name__ == '__main__':
    import doctest
    doctest.testmod()


1			# -- coding: utf-8 --
2
3			# Copyright 2014-2018 by Christopher C. Little.
4			# This file is part of Abydos.
5			#
6			# Abydos is free software: you can redistribute it and/or modify
7			# it under the terms of the GNU General Public License as published by
8			# the Free Software Foundation, either version 3 of the License, or
9			# (at your option) any later version.
10			#
11			# Abydos is distributed in the hope that it will be useful,
12			# but WITHOUT ANY WARRANTY; without even the implied warranty of
13			# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14			# GNU General Public License for more details.
15			#
16			# You should have received a copy of the GNU General Public License
17			# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19			"""abydos.phonetic.nysiis.
20
21			The phonetic.nysiis module implements New York State Identification and
22			Intelligence System (NYSIIS) phonetic encoding.
23			"""
24
25			from __future__ import unicode_literals
26
27			from six.moves import range
28
29			from . import _delete_consecutive_repeats
30
31			__all__ = ['nysiis']
32
33
34			def nysiis(word, max_length=6, modified=False):
35			"""Return the NYSIIS code for a word.
36
37			The New York State Identification and Intelligence System algorithm is
38			defined in :cite:`Taft:1970`.
39
40			The modified version of this algorithm is described in Appendix B of
41			:cite:`Lynch:1977`.
42
43			:param str word: the word to transform
44			:param int max_length: the maximum length (default 6) of the code to return
45			:param bool modified: indicates whether to use USDA modified NYSIIS
46			:returns: the NYSIIS value
47			:rtype: str
48
49			>>> nysiis('Christopher')
50			'CRASTA'
51			>>> nysiis('Niall')
52			'NAL'
53			>>> nysiis('Smith')
54			'SNAT'
55			>>> nysiis('Schmidt')
56			'SNAD'
57
58			>>> nysiis('Christopher', max_length=-1)
59			'CRASTAFAR'
60
61			>>> nysiis('Christopher', max_length=8, modified=True)
62			'CRASTAFA'
63			>>> nysiis('Niall', max_length=8, modified=True)
64			'NAL'
65			>>> nysiis('Smith', max_length=8, modified=True)
66			'SNAT'
67			>>> nysiis('Schmidt', max_length=8, modified=True)
68			'SNAD'
69			"""
70			# Require a max_length of at least 6
71			if max_length > -1:
72			max_length = max(6, max_length)
73
74			_vowels = {'A', 'E', 'I', 'O', 'U'}
75
76			word = ''.join(c for c in word.upper() if c.isalpha())
77			word = word.replace('ß', 'SS')
78
79			# exit early if there are no alphas
80			if not word:
81			return ''
82
83			original_first_char = word[0]
84
85			if word[:3] == 'MAC':
86			word = 'MCC'+word[3:]
87			elif word[:2] == 'KN':
88			word = 'NN'+word[2:]
89			elif word[:1] == 'K':
90			word = 'C'+word[1:]
91			elif word[:2] in {'PH', 'PF'}:
92			word = 'FF'+word[2:]
93			elif word[:3] == 'SCH':
94			word = 'SSS'+word[3:]
95			elif modified:
96			if word[:2] == 'WR':
97			word = 'RR'+word[2:]
98			elif word[:2] == 'RH':
99			word = 'RR'+word[2:]
100			elif word[:2] == 'DG':
101			word = 'GG'+word[2:]
102			elif word[:1] in _vowels:
103			word = 'A'+word[1:]
104
105			if modified and word[-1:] in {'S', 'Z'}:
106			word = word[:-1]
107
108			if word[-2:] == 'EE' or word[-2:] == 'IE' or (modified and
109			word[-2:] == 'YE'):
110			word = word[:-2]+'Y'
111			elif word[-2:] in {'DT', 'RT', 'RD'}:
112			word = word[:-2]+'D'
113			elif word[-2:] in {'NT', 'ND'}:
114			word = word[:-2]+('N' if modified else 'D')
115			elif modified:
116			if word[-2:] == 'IX':
117			word = word[:-2]+'ICK'
118			elif word[-2:] == 'EX':
119			word = word[:-2]+'ECK'
120			elif word[-2:] in {'JR', 'SR'}:
121			return 'ERROR'
122
123			key = word[:1]
124
125			skip = 0
126			for i in range(1, len(word)):
127			if i >= len(word):
128			continue
129			elif skip:
130			skip -= 1
131			continue
132			elif word[i:i+2] == 'EV':
133			word = word[:i] + 'AF' + word[i+2:]
134			skip = 1
135			elif word[i] in _vowels:
136			word = word[:i] + 'A' + word[i+1:]
137			elif modified and i != len(word)-1 and word[i] == 'Y':
138			word = word[:i] + 'A' + word[i+1:]
139			elif word[i] == 'Q':
140			word = word[:i] + 'G' + word[i+1:]
141			elif word[i] == 'Z':
142			word = word[:i] + 'S' + word[i+1:]
143			elif word[i] == 'M':
144			word = word[:i] + 'N' + word[i+1:]
145			elif word[i:i+2] == 'KN':
146			word = word[:i] + 'N' + word[i+2:]
147			elif word[i] == 'K':
148			word = word[:i] + 'C' + word[i+1:]
149			elif modified and i == len(word)-3 and word[i:i+3] == 'SCH':
150			word = word[:i] + 'SSA'
151			skip = 2
152			elif word[i:i+3] == 'SCH':
153			word = word[:i] + 'SSS' + word[i+3:]
154			skip = 2
155			elif modified and i == len(word)-2 and word[i:i+2] == 'SH':
156			word = word[:i] + 'SA'
157			skip = 1
158			elif word[i:i+2] == 'SH':
159			word = word[:i] + 'SS' + word[i+2:]
160			skip = 1
161			elif word[i:i+2] == 'PH':
162			word = word[:i] + 'FF' + word[i+2:]
163			skip = 1
164			elif modified and word[i:i+3] == 'GHT':
165			word = word[:i] + 'TTT' + word[i+3:]
166			skip = 2
167			elif modified and word[i:i+2] == 'DG':
168			word = word[:i] + 'GG' + word[i+2:]
169			skip = 1
170			elif modified and word[i:i+2] == 'WR':
171			word = word[:i] + 'RR' + word[i+2:]
172			skip = 1
173			elif word[i] == 'H' and (word[i-1] not in _vowels or
174			word[i+1:i+2] not in _vowels):
175			word = word[:i] + word[i-1] + word[i+1:]
176			elif word[i] == 'W' and word[i-1] in _vowels:
177			word = word[:i] + word[i-1] + word[i+1:]
178
179			if word[i:i+skip+1] != key[-1:]:
180			key += word[i:i+skip+1]
181
182			key = _delete_consecutive_repeats(key)
183
184			if key[-1:] == 'S':
185			key = key[:-1]
186			if key[-2:] == 'AY':
187			key = key[:-2] + 'Y'
188			if key[-1:] == 'A':
189			key = key[:-1]
190			if modified and key[:1] == 'A':
191			key = original_first_char + key[1:]
192
193			if max_length > 0:
194			key = key[:max_length]
195
196			return key
197
198
199			if __name__ == '__main__':
200			import doctest
201			doctest.testmod()
202

chrislit / abydos

Push — master ( 64abe2...a464fa )

abydos.phonetic.nysiis.nysiis() F

Complexity

Size

Duplication

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like