abydos.phonetic._nysiis.NYSIIS.__init__()   A
last analyzed

Complexity

Conditions 2

Size

Total Lines 20
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 2

Importance

Changes 0
Metric Value
eloc 5
dl 0
loc 20
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 2
nop 3
crap 2
1
# Copyright 2014-2020 by Christopher C. Little.
2
# This file is part of Abydos.
3
#
4
# Abydos is free software: you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# Abydos is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17
"""abydos.phonetic._nysiis.
18
19 1
New York State Identification and Intelligence System (NYSIIS) phonetic
20
encoding
21
"""
22
23
from ._phonetic import _Phonetic
24
25 1
__all__ = ['NYSIIS']
26
27
28
class NYSIIS(_Phonetic):
29
    """NYSIIS Code.
30
31
    The New York State Identification and Intelligence System algorithm is
32 1
    defined in :cite:`Taft:1970`.
33
34 1
    The modified version of this algorithm is described in Appendix B of
35
    :cite:`Lynch:1977`.
36 1
37 1
    .. versionadded:: 0.3.6
38
    """
39 1
40
    def __init__(self, max_length: int = 6, modified: bool = False) -> None:
41
        """Initialize AlphaSIS instance.
42 1
43
        Parameters
44
        ----------
45
        max_length : int
46
            The maximum length (default 6) of the code to return
47
        modified : bool
48
            Indicates whether to use USDA modified NYSIIS
49
50
51
        .. versionadded:: 0.4.0
52
53
        """
54 1
        self._max_length = max_length
55
        # Require a max_length of at least 6
56
        if self._max_length > -1:
57
            self._max_length = max(6, self._max_length)
58
59
        self._modified = modified
60
61
    def encode(self, word: str) -> str:
62
        """Return the NYSIIS code for a word.
63
64
        Parameters
65
        ----------
66
        word : str
67
            The word to transform
68 1
69
        Returns
70 1
        -------
71 1
        str
72
            The NYSIIS value
73 1
74
        Examples
75 1
        --------
76
        >>> pe = NYSIIS()
77
        >>> pe.encode('Christopher')
78
        'CRASTA'
79
        >>> pe.encode('Niall')
80
        'NAL'
81
        >>> pe.encode('Smith')
82
        'SNAT'
83
        >>> pe.encode('Schmidt')
84
        'SNAD'
85
86
        >>> NYSIIS(max_length=-1).encode('Christopher')
87
        'CRASTAFAR'
88
89
        >>> pe_8m = NYSIIS(max_length=8, modified=True)
90
        >>> pe_8m.encode('Christopher')
91
        'CRASTAFA'
92
        >>> pe_8m.encode('Niall')
93
        'NAL'
94
        >>> pe_8m.encode('Smith')
95
        'SNAT'
96
        >>> pe_8m.encode('Schmidt')
97
        'SNAD'
98
99
100
        .. versionadded:: 0.1.0
101
        .. versionchanged:: 0.3.6
102
            Encapsulated in class
103
104
        """
105
106
        word = ''.join(c for c in word.upper() if c.isalpha())
107
108
        # exit early if there are no alphas
109
        if not word:
110
            return ''
111
112
        original_first_char = word[0]
113
114
        if word[:3] == 'MAC':
115
            word = 'MCC' + word[3:]
116
        elif word[:2] == 'KN':
117
            word = 'NN' + word[2:]
118
        elif word[:1] == 'K':
119
            word = 'C' + word[1:]
120 1
        elif word[:2] in {'PH', 'PF'}:
121 1
            word = 'FF' + word[2:]
122
        elif word[:3] == 'SCH':
123
            word = 'SSS' + word[3:]
124 1
        elif self._modified:
125 1
            if word[:2] == 'WR':
126
                word = 'RR' + word[2:]
127 1
            elif word[:2] == 'RH':
128
                word = 'RR' + word[2:]
129 1
            elif word[:2] == 'DG':
130 1
                word = 'GG' + word[2:]
131 1
            elif word[:1] in self._uc_v_set:
132 1
                word = 'A' + word[1:]
133 1
134 1
        if self._modified and word[-1:] in {'S', 'Z'}:
135 1
            word = word[:-1]
136 1
137 1
        if (
138 1
            word[-2:] == 'EE'
139 1
            or word[-2:] == 'IE'
140 1
            or (self._modified and word[-2:] == 'YE')
141 1
        ):
142 1
            word = word[:-2] + 'Y'
143 1
        elif word[-2:] in {'DT', 'RT', 'RD'}:
144 1
            word = word[:-2] + 'D'
145 1
        elif word[-2:] in {'NT', 'ND'}:
146 1
            word = word[:-2] + ('N' if self._modified else 'D')
147 1
        elif self._modified:
148
            if word[-2:] == 'IX':
149 1
                word = word[:-2] + 'ICK'
150 1
            elif word[-2:] == 'EX':
151
                word = word[:-2] + 'ECK'
152 1
            elif word[-2:] in {'JR', 'SR'}:
153
                return 'ERROR'
154
155
        key = word[:1]
156
157 1
        skip = 0
158 1
        for i in range(1, len(word)):
159 1
            if i >= len(word):
160 1
                continue
161 1
            elif skip:
162 1
                skip -= 1
163 1
                continue
164 1
            elif word[i : i + 2] == 'EV':
165 1
                word = word[:i] + 'AF' + word[i + 2 :]
166 1
                skip = 1
167 1
            elif word[i] in self._uc_v_set:
168 1
                word = word[:i] + 'A' + word[i + 1 :]
169
            elif self._modified and i != len(word) - 1 and word[i] == 'Y':
170 1
                word = word[:i] + 'A' + word[i + 1 :]
171
            elif word[i] == 'Q':
172 1
                word = word[:i] + 'G' + word[i + 1 :]
173 1
            elif word[i] == 'Z':
174 1
                word = word[:i] + 'S' + word[i + 1 :]
175 1
            elif word[i] == 'M':
176 1
                word = word[:i] + 'N' + word[i + 1 :]
177 1
            elif word[i : i + 2] == 'KN':
178 1
                word = word[:i] + 'N' + word[i + 2 :]
179 1
            elif word[i] == 'K':
180 1
                word = word[:i] + 'C' + word[i + 1 :]
181 1
            elif (
182 1
                self._modified
183 1
                and i == len(word) - 3
184 1
                and word[i : i + 3] == 'SCH'
185 1
            ):
186 1
                word = word[:i] + 'SSA'
187 1
                skip = 2
188 1
            elif word[i : i + 3] == 'SCH':
189 1
                word = word[:i] + 'SSS' + word[i + 3 :]
190 1
                skip = 2
191 1
            elif (
192 1
                self._modified
193 1
                and i == len(word) - 2
194 1
                and word[i : i + 2] == 'SH'
195 1
            ):
196 1
                word = word[:i] + 'SA'
197
                skip = 1
198
            elif word[i : i + 2] == 'SH':
199
                word = word[:i] + 'SS' + word[i + 2 :]
200
                skip = 1
201 1
            elif word[i : i + 2] == 'PH':
202 1
                word = word[:i] + 'FF' + word[i + 2 :]
203 1
                skip = 1
204 1
            elif self._modified and word[i : i + 3] == 'GHT':
205 1
                word = word[:i] + 'TTT' + word[i + 3 :]
206 1
                skip = 2
207
            elif self._modified and word[i : i + 2] == 'DG':
208
                word = word[:i] + 'GG' + word[i + 2 :]
209
                skip = 1
210
            elif self._modified and word[i : i + 2] == 'WR':
211 1
                word = word[:i] + 'RR' + word[i + 2 :]
212 1
                skip = 1
213 1
            elif word[i] == 'H' and (
214 1
                word[i - 1] not in self._uc_v_set
215 1
                or word[i + 1 : i + 2] not in self._uc_v_set
216 1
            ):
217 1
                word = word[:i] + word[i - 1] + word[i + 1 :]
218 1
            elif word[i] == 'W' and word[i - 1] in self._uc_v_set:
219 1
                word = word[:i] + word[i - 1] + word[i + 1 :]
220 1
221 1
            if word[i : i + skip + 1] != key[-1:]:
222 1
                key += word[i : i + skip + 1]
223 1
224 1
        key = self._delete_consecutive_repeats(key)
225 1
226 1
        if key[-1:] == 'S':
227 1
            key = key[:-1]
228 1
        if key[-2:] == 'AY':
229
            key = key[:-2] + 'Y'
230
        if key[-1:] == 'A':
231
            key = key[:-1]
232 1
        if self._modified and key[:1] == 'A':
233 1
            key = original_first_char + key[1:]
234 1
235
        if self._max_length > 0:
236 1
            key = key[: self._max_length]
237 1
238
        return key
239 1
240
241 1
if __name__ == '__main__':
242 1
    import doctest
243 1
244
    doctest.testmod()
245