abydos.phonetic._NYSIIS.nysiis() - Code Metrics - Inspection of "0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#141)

by Chris

created 2018-11-10 01:31 UTC

abydos.phonetic._NYSIIS.nysiis() A

↳ Parent: abydos.phonetic._NYSIIS

Complexity

Conditions

Size

Total Lines	37
Code Lines	2

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	2
CRAP Score	1

Importance

Changes

Metric	Value
cc	1
eloc	2
nop	3
dl	0
loc	37
ccs	2
cts	2
cp	1
crap	1
rs	10
c	0
b	0
f	0

# -*- coding: utf-8 -*-


# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._NYSIIS.

New York State Identification and Intelligence System (NYSIIS) phonetic
encoding
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from six.moves import range

from ._Phonetic import Phonetic

__all__ = ['NYSIIS', 'nysiis']


class NYSIIS(Phonetic):

    """NYSIIS Code.

    The New York State Identification and Intelligence System algorithm is
    defined in :cite:`Taft:1970`.

    The modified version of this algorithm is described in Appendix B of
    :cite:`Lynch:1977`.
    """

    def encode(self, word, max_length=6, modified=False):

        """Return the NYSIIS code for a word.

        Args:
            word (str): The word to transform
            max_length (int): The maximum length (default 6) of the code to
                return
            modified (bool): Indicates whether to use USDA modified NYSIIS

        Returns:
            str: The NYSIIS value

        Examples:
            >>> pe = NYSIIS()
            >>> pe.encode('Christopher')
            'CRASTA'
            >>> pe.encode('Niall')
            'NAL'
            >>> pe.encode('Smith')
            'SNAT'
            >>> pe.encode('Schmidt')
            'SNAD'

            >>> pe.encode('Christopher', max_length=-1)
            'CRASTAFAR'

            >>> pe.encode('Christopher', max_length=8, modified=True)
            'CRASTAFA'
            >>> pe.encode('Niall', max_length=8, modified=True)
            'NAL'
            >>> pe.encode('Smith', max_length=8, modified=True)
            'SNAT'
            >>> pe.encode('Schmidt', max_length=8, modified=True)
            'SNAD'

        """
        # Require a max_length of at least 6
        if max_length > -1:
            max_length = max(6, max_length)

        word = ''.join(c for c in word.upper() if c.isalpha())
        word = word.replace('ß', 'SS')

        # exit early if there are no alphas
        if not word:
            return ''

        original_first_char = word[0]

        if word[:3] == 'MAC':
            word = 'MCC' + word[3:]
        elif word[:2] == 'KN':
            word = 'NN' + word[2:]
        elif word[:1] == 'K':
            word = 'C' + word[1:]
        elif word[:2] in {'PH', 'PF'}:
            word = 'FF' + word[2:]
        elif word[:3] == 'SCH':
            word = 'SSS' + word[3:]
        elif modified:
            if word[:2] == 'WR':
                word = 'RR' + word[2:]
            elif word[:2] == 'RH':
                word = 'RR' + word[2:]
            elif word[:2] == 'DG':
                word = 'GG' + word[2:]
            elif word[:1] in self._uc_v_set:
                word = 'A' + word[1:]

        if modified and word[-1:] in {'S', 'Z'}:
            word = word[:-1]

        if (
            word[-2:] == 'EE'

            or word[-2:] == 'IE'

            or (modified and word[-2:] == 'YE')

        ):
            word = word[:-2] + 'Y'
        elif word[-2:] in {'DT', 'RT', 'RD'}:
            word = word[:-2] + 'D'
        elif word[-2:] in {'NT', 'ND'}:
            word = word[:-2] + ('N' if modified else 'D')
        elif modified:
            if word[-2:] == 'IX':
                word = word[:-2] + 'ICK'
            elif word[-2:] == 'EX':
                word = word[:-2] + 'ECK'
            elif word[-2:] in {'JR', 'SR'}:
                return 'ERROR'

        key = word[:1]

        skip = 0
        for i in range(1, len(word)):
            if i >= len(word):
                continue
            elif skip:
                skip -= 1
                continue
            elif word[i : i + 2] == 'EV':
                word = word[:i] + 'AF' + word[i + 2 :]
                skip = 1
            elif word[i] in self._uc_v_set:
                word = word[:i] + 'A' + word[i + 1 :]
            elif modified and i != len(word) - 1 and word[i] == 'Y':
                word = word[:i] + 'A' + word[i + 1 :]
            elif word[i] == 'Q':
                word = word[:i] + 'G' + word[i + 1 :]
            elif word[i] == 'Z':
                word = word[:i] + 'S' + word[i + 1 :]
            elif word[i] == 'M':
                word = word[:i] + 'N' + word[i + 1 :]
            elif word[i : i + 2] == 'KN':
                word = word[:i] + 'N' + word[i + 2 :]
            elif word[i] == 'K':
                word = word[:i] + 'C' + word[i + 1 :]
            elif modified and i == len(word) - 3 and word[i : i + 3] == 'SCH':
                word = word[:i] + 'SSA'
                skip = 2
            elif word[i : i + 3] == 'SCH':
                word = word[:i] + 'SSS' + word[i + 3 :]
                skip = 2
            elif modified and i == len(word) - 2 and word[i : i + 2] == 'SH':
                word = word[:i] + 'SA'
                skip = 1
            elif word[i : i + 2] == 'SH':
                word = word[:i] + 'SS' + word[i + 2 :]
                skip = 1
            elif word[i : i + 2] == 'PH':
                word = word[:i] + 'FF' + word[i + 2 :]
                skip = 1
            elif modified and word[i : i + 3] == 'GHT':
                word = word[:i] + 'TTT' + word[i + 3 :]
                skip = 2
            elif modified and word[i : i + 2] == 'DG':
                word = word[:i] + 'GG' + word[i + 2 :]
                skip = 1
            elif modified and word[i : i + 2] == 'WR':
                word = word[:i] + 'RR' + word[i + 2 :]
                skip = 1
            elif word[i] == 'H' and (
                word[i - 1] not in self._uc_v_set

                or word[i + 1 : i + 2] not in self._uc_v_set

            ):
                word = word[:i] + word[i - 1] + word[i + 1 :]
            elif word[i] == 'W' and word[i - 1] in self._uc_v_set:
                word = word[:i] + word[i - 1] + word[i + 1 :]

            if word[i : i + skip + 1] != key[-1:]:
                key += word[i : i + skip + 1]

        key = self._delete_consecutive_repeats(key)

        if key[-1:] == 'S':
            key = key[:-1]
        if key[-2:] == 'AY':
            key = key[:-2] + 'Y'
        if key[-1:] == 'A':
            key = key[:-1]
        if modified and key[:1] == 'A':
            key = original_first_char + key[1:]

        if max_length > 0:
            key = key[:max_length]

        return key


def nysiis(word, max_length=6, modified=False):
    """Return the NYSIIS code for a word.

    This is a wrapper for :py:meth:`NYSIIS.encode`.

    Args:
        word (str): The word to transform
        max_length (int): The maximum length (default 6) of the code to return
        modified (bool): Indicates whether to use USDA modified NYSIIS

    Returns:
        str: The NYSIIS value

    Examples:
        >>> nysiis('Christopher')
        'CRASTA'
        >>> nysiis('Niall')
        'NAL'
        >>> nysiis('Smith')
        'SNAT'
        >>> nysiis('Schmidt')
        'SNAD'

        >>> nysiis('Christopher', max_length=-1)
        'CRASTAFAR'

        >>> nysiis('Christopher', max_length=8, modified=True)
        'CRASTAFA'
        >>> nysiis('Niall', max_length=8, modified=True)
        'NAL'
        >>> nysiis('Smith', max_length=8, modified=True)
        'SNAT'
        >>> nysiis('Schmidt', max_length=8, modified=True)
        'SNAD'

    """
    return NYSIIS().encode(word, max_length, modified)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
		0 ignored issues – show Coding Style Naming introduced 2018-11-10 01:42 UTC by Report Bug Copy Issue Report The name `_NYSIIS` does not conform to the module naming conventions (`(([a-z_][a-z0-9_]*)\|([A-Z][a-zA-Z0-9]+))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._NYSIIS.
20
21		New York State Identification and Intelligence System (NYSIIS) phonetic
22		encoding
23		"""
24
25	1	from __future__ import (
26		absolute_import,
27		division,
28		print_function,
29		unicode_literals,
30		)
31
32	1	from six.moves import range
33
34	1	from ._Phonetic import Phonetic
35
36	1	__all__ = ['NYSIIS', 'nysiis']
37
38
39	1	class NYSIIS(Phonetic):
		0 ignored issues – show Unused Code introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
40		"""NYSIIS Code.
41
42		The New York State Identification and Intelligence System algorithm is
43		defined in :cite:`Taft:1970`.
44
45		The modified version of this algorithm is described in Appendix B of
46		:cite:`Lynch:1977`.
47		"""
48
49	1	def encode(self, word, max_length=6, modified=False):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'encode' method Loading history...
50		"""Return the NYSIIS code for a word.
51
52		Args:
53		word (str): The word to transform
54		max_length (int): The maximum length (default 6) of the code to
55		return
56		modified (bool): Indicates whether to use USDA modified NYSIIS
57
58		Returns:
59		str: The NYSIIS value
60
61		Examples:
62		>>> pe = NYSIIS()
63		>>> pe.encode('Christopher')
64		'CRASTA'
65		>>> pe.encode('Niall')
66		'NAL'
67		>>> pe.encode('Smith')
68		'SNAT'
69		>>> pe.encode('Schmidt')
70		'SNAD'
71
72		>>> pe.encode('Christopher', max_length=-1)
73		'CRASTAFAR'
74
75		>>> pe.encode('Christopher', max_length=8, modified=True)
76		'CRASTAFA'
77		>>> pe.encode('Niall', max_length=8, modified=True)
78		'NAL'
79		>>> pe.encode('Smith', max_length=8, modified=True)
80		'SNAT'
81		>>> pe.encode('Schmidt', max_length=8, modified=True)
82		'SNAD'
83
84		"""
85		# Require a max_length of at least 6
86	1	if max_length > -1:
87	1	max_length = max(6, max_length)
88
89	1	word = ''.join(c for c in word.upper() if c.isalpha())
90	1	word = word.replace('ß', 'SS')
91
92		# exit early if there are no alphas
93	1	if not word:
94	1	return ''
95
96	1	original_first_char = word[0]
97
98	1	if word[:3] == 'MAC':
99	1	word = 'MCC' + word[3:]
100	1	elif word[:2] == 'KN':
101	1	word = 'NN' + word[2:]
102	1	elif word[:1] == 'K':
103	1	word = 'C' + word[1:]
104	1	elif word[:2] in {'PH', 'PF'}:
105	1	word = 'FF' + word[2:]
106	1	elif word[:3] == 'SCH':
107	1	word = 'SSS' + word[3:]
108	1	elif modified:
109	1	if word[:2] == 'WR':
110	1	word = 'RR' + word[2:]
111	1	elif word[:2] == 'RH':
112	1	word = 'RR' + word[2:]
113	1	elif word[:2] == 'DG':
114	1	word = 'GG' + word[2:]
115	1	elif word[:1] in self._uc_v_set:
116	1	word = 'A' + word[1:]
117
118	1	if modified and word[-1:] in {'S', 'Z'}:
119	1	word = word[:-1]
120
121	1	if (
122		word[-2:] == 'EE'
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
123		or word[-2:] == 'IE'
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
124		or (modified and word[-2:] == 'YE')
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
125		):
126	1	word = word[:-2] + 'Y'
127	1	elif word[-2:] in {'DT', 'RT', 'RD'}:
128	1	word = word[:-2] + 'D'
129	1	elif word[-2:] in {'NT', 'ND'}:
130	1	word = word[:-2] + ('N' if modified else 'D')
131	1	elif modified:
132	1	if word[-2:] == 'IX':
133	1	word = word[:-2] + 'ICK'
134	1	elif word[-2:] == 'EX':
135	1	word = word[:-2] + 'ECK'
136	1	elif word[-2:] in {'JR', 'SR'}:
137	1	return 'ERROR'
138
139	1	key = word[:1]
140
141	1	skip = 0
142	1	for i in range(1, len(word)):
143	1	if i >= len(word):
144	1	continue
145	1	elif skip:
146	1	skip -= 1
147	1	continue
148	1	elif word[i : i + 2] == 'EV':
149	1	word = word[:i] + 'AF' + word[i + 2 :]
150	1	skip = 1
151	1	elif word[i] in self._uc_v_set:
152	1	word = word[:i] + 'A' + word[i + 1 :]
153	1	elif modified and i != len(word) - 1 and word[i] == 'Y':
154	1	word = word[:i] + 'A' + word[i + 1 :]
155	1	elif word[i] == 'Q':
156	1	word = word[:i] + 'G' + word[i + 1 :]
157	1	elif word[i] == 'Z':
158	1	word = word[:i] + 'S' + word[i + 1 :]
159	1	elif word[i] == 'M':
160	1	word = word[:i] + 'N' + word[i + 1 :]
161	1	elif word[i : i + 2] == 'KN':
162	1	word = word[:i] + 'N' + word[i + 2 :]
163	1	elif word[i] == 'K':
164	1	word = word[:i] + 'C' + word[i + 1 :]
165	1	elif modified and i == len(word) - 3 and word[i : i + 3] == 'SCH':
166	1	word = word[:i] + 'SSA'
167	1	skip = 2
168	1	elif word[i : i + 3] == 'SCH':
169	1	word = word[:i] + 'SSS' + word[i + 3 :]
170	1	skip = 2
171	1	elif modified and i == len(word) - 2 and word[i : i + 2] == 'SH':
172	1	word = word[:i] + 'SA'
173	1	skip = 1
174	1	elif word[i : i + 2] == 'SH':
175	1	word = word[:i] + 'SS' + word[i + 2 :]
176	1	skip = 1
177	1	elif word[i : i + 2] == 'PH':
178	1	word = word[:i] + 'FF' + word[i + 2 :]
179	1	skip = 1
180	1	elif modified and word[i : i + 3] == 'GHT':
181	1	word = word[:i] + 'TTT' + word[i + 3 :]
182	1	skip = 2
183	1	elif modified and word[i : i + 2] == 'DG':
184	1	word = word[:i] + 'GG' + word[i + 2 :]
185	1	skip = 1
186	1	elif modified and word[i : i + 2] == 'WR':
187	1	word = word[:i] + 'RR' + word[i + 2 :]
188	1	skip = 1
189	1	elif word[i] == 'H' and (
190		word[i - 1] not in self._uc_v_set
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
191		or word[i + 1 : i + 2] not in self._uc_v_set
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
192		):
193	1	word = word[:i] + word[i - 1] + word[i + 1 :]
194	1	elif word[i] == 'W' and word[i - 1] in self._uc_v_set:
195	1	word = word[:i] + word[i - 1] + word[i + 1 :]
196
197	1	if word[i : i + skip + 1] != key[-1:]:
198	1	key += word[i : i + skip + 1]
199
200	1	key = self._delete_consecutive_repeats(key)
201
202	1	if key[-1:] == 'S':
203	1	key = key[:-1]
204	1	if key[-2:] == 'AY':
205	1	key = key[:-2] + 'Y'
206	1	if key[-1:] == 'A':
207	1	key = key[:-1]
208	1	if modified and key[:1] == 'A':
209	1	key = original_first_char + key[1:]
210
211	1	if max_length > 0:
212	1	key = key[:max_length]
213
214	1	return key
215
216
217	1	def nysiis(word, max_length=6, modified=False):
218		"""Return the NYSIIS code for a word.
219
220		This is a wrapper for :py:meth:`NYSIIS.encode`.
221
222		Args:
223		word (str): The word to transform
224		max_length (int): The maximum length (default 6) of the code to return
225		modified (bool): Indicates whether to use USDA modified NYSIIS
226
227		Returns:
228		str: The NYSIIS value
229
230		Examples:
231		>>> nysiis('Christopher')
232		'CRASTA'
233		>>> nysiis('Niall')
234		'NAL'
235		>>> nysiis('Smith')
236		'SNAT'
237		>>> nysiis('Schmidt')
238		'SNAD'
239
240		>>> nysiis('Christopher', max_length=-1)
241		'CRASTAFAR'
242
243		>>> nysiis('Christopher', max_length=8, modified=True)
244		'CRASTAFA'
245		>>> nysiis('Niall', max_length=8, modified=True)
246		'NAL'
247		>>> nysiis('Smith', max_length=8, modified=True)
248		'SNAT'
249		>>> nysiis('Schmidt', max_length=8, modified=True)
250		'SNAD'
251
252		"""
253	1	return NYSIIS().encode(word, max_length, modified)
254
255
256		if __name__ == '__main__':
257		import doctest
258
259		doctest.testmod()
260

chrislit / abydos

Pull Request — master (#141)

abydos.phonetic._NYSIIS.nysiis() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like