abydos.phonetic._nysiis - Code Metrics - Inspection of "78a222a9f7d8976f6744d263e3d6d01a2a991c27" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Branch — master (78a222)

by Chris

created 2018-10-26 11:30 UTC

abydos.phonetic._nysiis F

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	207
Duplicated Lines	0 %

Test Coverage

Coverage

100%

Importance

Changes

Metric	Value
wmc	66
eloc	122
dl	0
loc	207
ccs	115
cts	115
cp	1
rs	3.12
c	0
b	0
f	0

1 Function

Rating	Name	Duplication	Size	Complexity
F	nysiis()	0	167	66

How to fix Complexity

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._nysiis.

The phonetic._nysiis module implements New York State Identification and
Intelligence System (NYSIIS) phonetic encoding.
"""

from __future__ import unicode_literals

from six.moves import range

from ._util import _delete_consecutive_repeats

__all__ = ['nysiis']


def nysiis(word, max_length=6, modified=False):
    """Return the NYSIIS code for a word.

    The New York State Identification and Intelligence System algorithm is
    defined in :cite:`Taft:1970`.

    The modified version of this algorithm is described in Appendix B of
    :cite:`Lynch:1977`.

    :param str word: the word to transform
    :param int max_length: the maximum length (default 6) of the code to return
    :param bool modified: indicates whether to use USDA modified NYSIIS
    :returns: the NYSIIS value
    :rtype: str

    >>> nysiis('Christopher')
    'CRASTA'
    >>> nysiis('Niall')
    'NAL'
    >>> nysiis('Smith')
    'SNAT'
    >>> nysiis('Schmidt')
    'SNAD'

    >>> nysiis('Christopher', max_length=-1)
    'CRASTAFAR'

    >>> nysiis('Christopher', max_length=8, modified=True)
    'CRASTAFA'
    >>> nysiis('Niall', max_length=8, modified=True)
    'NAL'
    >>> nysiis('Smith', max_length=8, modified=True)
    'SNAT'
    >>> nysiis('Schmidt', max_length=8, modified=True)
    'SNAD'
    """
    # Require a max_length of at least 6
    if max_length > -1:
        max_length = max(6, max_length)

    _vowels = {'A', 'E', 'I', 'O', 'U'}

    word = ''.join(c for c in word.upper() if c.isalpha())
    word = word.replace('ß', 'SS')

    # exit early if there are no alphas
    if not word:
        return ''

    original_first_char = word[0]

    if word[:3] == 'MAC':
        word = 'MCC' + word[3:]
    elif word[:2] == 'KN':
        word = 'NN' + word[2:]
    elif word[:1] == 'K':
        word = 'C' + word[1:]
    elif word[:2] in {'PH', 'PF'}:
        word = 'FF' + word[2:]
    elif word[:3] == 'SCH':
        word = 'SSS' + word[3:]
    elif modified:
        if word[:2] == 'WR':
            word = 'RR' + word[2:]
        elif word[:2] == 'RH':
            word = 'RR' + word[2:]
        elif word[:2] == 'DG':
            word = 'GG' + word[2:]
        elif word[:1] in _vowels:
            word = 'A' + word[1:]

    if modified and word[-1:] in {'S', 'Z'}:
        word = word[:-1]

    if (
        word[-2:] == 'EE'

        or word[-2:] == 'IE'

        or (modified and word[-2:] == 'YE')

    ):
        word = word[:-2] + 'Y'
    elif word[-2:] in {'DT', 'RT', 'RD'}:
        word = word[:-2] + 'D'
    elif word[-2:] in {'NT', 'ND'}:
        word = word[:-2] + ('N' if modified else 'D')
    elif modified:
        if word[-2:] == 'IX':
            word = word[:-2] + 'ICK'
        elif word[-2:] == 'EX':
            word = word[:-2] + 'ECK'
        elif word[-2:] in {'JR', 'SR'}:
            return 'ERROR'

    key = word[:1]

    skip = 0
    for i in range(1, len(word)):
        if i >= len(word):
            continue
        elif skip:
            skip -= 1
            continue
        elif word[i : i + 2] == 'EV':
            word = word[:i] + 'AF' + word[i + 2 :]
            skip = 1
        elif word[i] in _vowels:
            word = word[:i] + 'A' + word[i + 1 :]
        elif modified and i != len(word) - 1 and word[i] == 'Y':
            word = word[:i] + 'A' + word[i + 1 :]
        elif word[i] == 'Q':
            word = word[:i] + 'G' + word[i + 1 :]
        elif word[i] == 'Z':
            word = word[:i] + 'S' + word[i + 1 :]
        elif word[i] == 'M':
            word = word[:i] + 'N' + word[i + 1 :]
        elif word[i : i + 2] == 'KN':
            word = word[:i] + 'N' + word[i + 2 :]
        elif word[i] == 'K':
            word = word[:i] + 'C' + word[i + 1 :]
        elif modified and i == len(word) - 3 and word[i : i + 3] == 'SCH':
            word = word[:i] + 'SSA'
            skip = 2
        elif word[i : i + 3] == 'SCH':
            word = word[:i] + 'SSS' + word[i + 3 :]
            skip = 2
        elif modified and i == len(word) - 2 and word[i : i + 2] == 'SH':
            word = word[:i] + 'SA'
            skip = 1
        elif word[i : i + 2] == 'SH':
            word = word[:i] + 'SS' + word[i + 2 :]
            skip = 1
        elif word[i : i + 2] == 'PH':
            word = word[:i] + 'FF' + word[i + 2 :]
            skip = 1
        elif modified and word[i : i + 3] == 'GHT':
            word = word[:i] + 'TTT' + word[i + 3 :]
            skip = 2
        elif modified and word[i : i + 2] == 'DG':
            word = word[:i] + 'GG' + word[i + 2 :]
            skip = 1
        elif modified and word[i : i + 2] == 'WR':
            word = word[:i] + 'RR' + word[i + 2 :]
            skip = 1
        elif word[i] == 'H' and (
            word[i - 1] not in _vowels or word[i + 1 : i + 2] not in _vowels

        ):
            word = word[:i] + word[i - 1] + word[i + 1 :]
        elif word[i] == 'W' and word[i - 1] in _vowels:
            word = word[:i] + word[i - 1] + word[i + 1 :]

        if word[i : i + skip + 1] != key[-1:]:
            key += word[i : i + skip + 1]

    key = _delete_consecutive_repeats(key)

    if key[-1:] == 'S':
        key = key[:-1]
    if key[-2:] == 'AY':
        key = key[:-2] + 'Y'
    if key[-1:] == 'A':
        key = key[:-1]
    if modified and key[:1] == 'A':
        key = original_first_char + key[1:]

    if max_length > 0:
        key = key[:max_length]

    return key


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._nysiis.
20
21		The phonetic._nysiis module implements New York State Identification and
22		Intelligence System (NYSIIS) phonetic encoding.
23		"""
24
25	1	from __future__ import unicode_literals
26
27	1	from six.moves import range
28
29	1	from ._util import _delete_consecutive_repeats
30
31	1	__all__ = ['nysiis']
32
33
34	1	def nysiis(word, max_length=6, modified=False):
35		"""Return the NYSIIS code for a word.
36
37		The New York State Identification and Intelligence System algorithm is
38		defined in :cite:`Taft:1970`.
39
40		The modified version of this algorithm is described in Appendix B of
41		:cite:`Lynch:1977`.
42
43		:param str word: the word to transform
44		:param int max_length: the maximum length (default 6) of the code to return
45		:param bool modified: indicates whether to use USDA modified NYSIIS
46		:returns: the NYSIIS value
47		:rtype: str
48
49		>>> nysiis('Christopher')
50		'CRASTA'
51		>>> nysiis('Niall')
52		'NAL'
53		>>> nysiis('Smith')
54		'SNAT'
55		>>> nysiis('Schmidt')
56		'SNAD'
57
58		>>> nysiis('Christopher', max_length=-1)
59		'CRASTAFAR'
60
61		>>> nysiis('Christopher', max_length=8, modified=True)
62		'CRASTAFA'
63		>>> nysiis('Niall', max_length=8, modified=True)
64		'NAL'
65		>>> nysiis('Smith', max_length=8, modified=True)
66		'SNAT'
67		>>> nysiis('Schmidt', max_length=8, modified=True)
68		'SNAD'
69		"""
70		# Require a max_length of at least 6
71	1	if max_length > -1:
72	1	max_length = max(6, max_length)
73
74	1	_vowels = {'A', 'E', 'I', 'O', 'U'}
75
76	1	word = ''.join(c for c in word.upper() if c.isalpha())
77	1	word = word.replace('ß', 'SS')
78
79		# exit early if there are no alphas
80	1	if not word:
81	1	return ''
82
83	1	original_first_char = word[0]
84
85	1	if word[:3] == 'MAC':
86	1	word = 'MCC' + word[3:]
87	1	elif word[:2] == 'KN':
88	1	word = 'NN' + word[2:]
89	1	elif word[:1] == 'K':
90	1	word = 'C' + word[1:]
91	1	elif word[:2] in {'PH', 'PF'}:
92	1	word = 'FF' + word[2:]
93	1	elif word[:3] == 'SCH':
94	1	word = 'SSS' + word[3:]
95	1	elif modified:
96	1	if word[:2] == 'WR':
97	1	word = 'RR' + word[2:]
98	1	elif word[:2] == 'RH':
99	1	word = 'RR' + word[2:]
100	1	elif word[:2] == 'DG':
101	1	word = 'GG' + word[2:]
102	1	elif word[:1] in _vowels:
103	1	word = 'A' + word[1:]
104
105	1	if modified and word[-1:] in {'S', 'Z'}:
106	1	word = word[:-1]
107
108	1	if (
109		word[-2:] == 'EE'
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
110		or word[-2:] == 'IE'
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
111		or (modified and word[-2:] == 'YE')
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
112		):
113	1	word = word[:-2] + 'Y'
114	1	elif word[-2:] in {'DT', 'RT', 'RD'}:
115	1	word = word[:-2] + 'D'
116	1	elif word[-2:] in {'NT', 'ND'}:
117	1	word = word[:-2] + ('N' if modified else 'D')
118	1	elif modified:
119	1	if word[-2:] == 'IX':
120	1	word = word[:-2] + 'ICK'
121	1	elif word[-2:] == 'EX':
122	1	word = word[:-2] + 'ECK'
123	1	elif word[-2:] in {'JR', 'SR'}:
124	1	return 'ERROR'
125
126	1	key = word[:1]
127
128	1	skip = 0
129	1	for i in range(1, len(word)):
130	1	if i >= len(word):
131	1	continue
132	1	elif skip:
133	1	skip -= 1
134	1	continue
135	1	elif word[i : i + 2] == 'EV':
136	1	word = word[:i] + 'AF' + word[i + 2 :]
137	1	skip = 1
138	1	elif word[i] in _vowels:
139	1	word = word[:i] + 'A' + word[i + 1 :]
140	1	elif modified and i != len(word) - 1 and word[i] == 'Y':
141	1	word = word[:i] + 'A' + word[i + 1 :]
142	1	elif word[i] == 'Q':
143	1	word = word[:i] + 'G' + word[i + 1 :]
144	1	elif word[i] == 'Z':
145	1	word = word[:i] + 'S' + word[i + 1 :]
146	1	elif word[i] == 'M':
147	1	word = word[:i] + 'N' + word[i + 1 :]
148	1	elif word[i : i + 2] == 'KN':
149	1	word = word[:i] + 'N' + word[i + 2 :]
150	1	elif word[i] == 'K':
151	1	word = word[:i] + 'C' + word[i + 1 :]
152	1	elif modified and i == len(word) - 3 and word[i : i + 3] == 'SCH':
153	1	word = word[:i] + 'SSA'
154	1	skip = 2
155	1	elif word[i : i + 3] == 'SCH':
156	1	word = word[:i] + 'SSS' + word[i + 3 :]
157	1	skip = 2
158	1	elif modified and i == len(word) - 2 and word[i : i + 2] == 'SH':
159	1	word = word[:i] + 'SA'
160	1	skip = 1
161	1	elif word[i : i + 2] == 'SH':
162	1	word = word[:i] + 'SS' + word[i + 2 :]
163	1	skip = 1
164	1	elif word[i : i + 2] == 'PH':
165	1	word = word[:i] + 'FF' + word[i + 2 :]
166	1	skip = 1
167	1	elif modified and word[i : i + 3] == 'GHT':
168	1	word = word[:i] + 'TTT' + word[i + 3 :]
169	1	skip = 2
170	1	elif modified and word[i : i + 2] == 'DG':
171	1	word = word[:i] + 'GG' + word[i + 2 :]
172	1	skip = 1
173	1	elif modified and word[i : i + 2] == 'WR':
174	1	word = word[:i] + 'RR' + word[i + 2 :]
175	1	skip = 1
176	1	elif word[i] == 'H' and (
177		word[i - 1] not in _vowels or word[i + 1 : i + 2] not in _vowels
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
178		):
179	1	word = word[:i] + word[i - 1] + word[i + 1 :]
180	1	elif word[i] == 'W' and word[i - 1] in _vowels:
181	1	word = word[:i] + word[i - 1] + word[i + 1 :]
182
183	1	if word[i : i + skip + 1] != key[-1:]:
184	1	key += word[i : i + skip + 1]
185
186	1	key = _delete_consecutive_repeats(key)
187
188	1	if key[-1:] == 'S':
189	1	key = key[:-1]
190	1	if key[-2:] == 'AY':
191	1	key = key[:-2] + 'Y'
192	1	if key[-1:] == 'A':
193	1	key = key[:-1]
194	1	if modified and key[:1] == 'A':
195	1	key = original_first_char + key[1:]
196
197	1	if max_length > 0:
198	1	key = key[:max_length]
199
200	1	return key
201
202
203		if __name__ == '__main__':
204		import doctest
205
206		doctest.testmod()
207

chrislit / abydos

Branch — master (78a222)

abydos.phonetic._nysiis F

Complexity

Size/Duplication

Test Coverage

Importance

1 Function

How to fix Complexity

Complexity

Duplication Side-by-Side

Filter issues like