abydos.phonetic._alpha_sis.AlphaSIS.encode() - Code Metrics - Inspection of "started new entry in HISTORY for 0.4.0" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Test Failed

Push — master ( 23810f...afe14d )

by Chris

created 2019-06-01 00:50 UTC

abydos.phonetic._alpha_sis.AlphaSIS.encode() F

↳ Parent: abydos.phonetic._alpha_sis

Complexity

Conditions

Size

Total Lines	88
Code Lines	41

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	40
CRAP Score	14

Importance

Changes

Metric	Value
cc	14
eloc	41
nop	3
dl	0
loc	88
ccs	40
cts	40
cp	1
crap	14
rs	3.6
c	0
b	0
f	0

How to fix Long Method Complexity

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._alpha_sis.

IBM's Alpha Search Inquiry System coding
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from unicodedata import normalize as unicode_normalize

from six import text_type
from six.moves import range

from ._phonetic import _Phonetic

__all__ = ['AlphaSIS', 'alpha_sis']


class AlphaSIS(_Phonetic):
    """Alpha-SIS.

    The Alpha Search Inquiry System code is defined in :cite:`IBM:1973`.
    This implementation is based on the description in :cite:`Moore:1977`.
    """

    _alpha_sis_initials = {
        'GF': '08',
        'GM': '03',
        'GN': '02',
        'KN': '02',
        'PF': '08',
        'PN': '02',
        'PS': '00',
        'WR': '04',
        'A': '1',
        'E': '1',
        'H': '2',
        'I': '1',
        'J': '3',
        'O': '1',
        'U': '1',
        'W': '4',
        'Y': '5',
    }
    _alpha_sis_initials_order = (
        'GF',
        'GM',
        'GN',
        'KN',
        'PF',
        'PN',
        'PS',
        'WR',
        'A',
        'E',
        'H',
        'I',
        'J',
        'O',
        'U',
        'W',
        'Y',
    )
    _alpha_sis_basic = {
        'SCH': '6',
        'CZ': ('70', '6', '0'),
        'CH': ('6', '70', '0'),
        'CK': ('7', '6'),
        'DS': ('0', '10'),
        'DZ': ('0', '10'),
        'TS': ('0', '10'),
        'TZ': ('0', '10'),
        'CI': '0',
        'CY': '0',
        'CE': '0',
        'SH': '6',
        'DG': '7',
        'PH': '8',
        'C': ('7', '6'),
        'K': ('7', '6'),
        'Z': '0',
        'S': '0',
        'D': '1',
        'T': '1',
        'N': '2',
        'M': '3',
        'R': '4',
        'L': '5',
        'J': '6',
        'G': '7',
        'Q': '7',
        'X': '7',
        'F': '8',
        'V': '8',
        'B': '9',
        'P': '9',
    }
    _alpha_sis_basic_order = (
        'SCH',
        'CZ',
        'CH',
        'CK',
        'DS',
        'DZ',
        'TS',
        'TZ',
        'CI',
        'CY',
        'CE',
        'SH',
        'DG',
        'PH',
        'C',
        'K',
        'Z',
        'S',
        'D',
        'T',
        'N',
        'M',
        'R',
        'L',
        'J',
        'C',
        'G',
        'K',
        'Q',
        'X',
        'F',
        'V',
        'B',
        'P',
    )

    def encode(self, word, max_length=14):
        """Return the IBM Alpha Search Inquiry System code for a word.

        A collection is necessary as the return type since there can be
        multiple values for a single word. But the collection must be ordered
        since the first value is the primary coding.

        Parameters
        ----------
        word : str
            The word to transform
        max_length : int
            The length of the code returned (defaults to 14)

        Returns
        -------
        tuple
            The Alpha-SIS value

        Examples
        --------
        >>> pe = AlphaSIS()
        >>> pe.encode('Christopher')
        ('06401840000000', '07040184000000', '04018400000000')
        >>> pe.encode('Niall')
        ('02500000000000',)
        >>> pe.encode('Smith')
        ('03100000000000',)
        >>> pe.encode('Schmidt')
        ('06310000000000',)

        """
        alpha = ['']
        pos = 0
        word = unicode_normalize('NFKD', text_type(word.upper()))
        word = word.replace('ß', 'SS')
        word = ''.join(c for c in word if c in self._uc_set)

        # Clamp max_length to [4, 64]
        if max_length != -1:
            max_length = min(max(4, max_length), 64)
        else:
            max_length = 64

        # Do special processing for initial substrings
        for k in self._alpha_sis_initials_order:
            if word.startswith(k):
                alpha[0] += self._alpha_sis_initials[k]
                pos += len(k)
                break

        # Add a '0' if alpha is still empty
        if not alpha[0]:
            alpha[0] += '0'

        # Whether or not any special initial codes were encoded, iterate
        # through the length of the word in the main encoding loop
        while pos < len(word):
            orig_pos = pos
            for k in self._alpha_sis_basic_order:
                if word[pos:].startswith(k):
                    if isinstance(self._alpha_sis_basic[k], tuple):
                        newalpha = []
                        for i in range(len(self._alpha_sis_basic[k])):
                            newalpha += [
                                _ + self._alpha_sis_basic[k][i] for _ in alpha
                            ]
                        alpha = newalpha
                    else:
                        alpha = [_ + self._alpha_sis_basic[k] for _ in alpha]
                    pos += len(k)
                    break
            if pos == orig_pos:
                alpha = [_ + '_' for _ in alpha]
                pos += 1

        # Trim doublets and placeholders
        for i in range(len(alpha)):
            pos = 1
            while pos < len(alpha[i]):
                if alpha[i][pos] == alpha[i][pos - 1]:
                    alpha[i] = alpha[i][:pos] + alpha[i][pos + 1 :]
                pos += 1
        alpha = (_.replace('_', '') for _ in alpha)


        # Trim codes and return tuple
        alpha = ((_ + ('0' * max_length))[:max_length] for _ in alpha)
        return tuple(alpha)


def alpha_sis(word, max_length=14):
    """Return the IBM Alpha Search Inquiry System code for a word.

    This is a wrapper for :py:meth:`AlphaSIS.encode`.

    Parameters
    ----------
    word : str
        The word to transform
    max_length : int
        The length of the code returned (defaults to 14)

    Returns
    -------
    tuple
        The Alpha-SIS value

    Examples
    --------
    >>> alpha_sis('Christopher')
    ('06401840000000', '07040184000000', '04018400000000')
    >>> alpha_sis('Niall')
    ('02500000000000',)
    >>> alpha_sis('Smith')
    ('03100000000000',)
    >>> alpha_sis('Schmidt')
    ('06310000000000',)

    """
    return AlphaSIS().encode(word, max_length)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._alpha_sis.
20
21		IBM's Alpha Search Inquiry System coding
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from unicodedata import normalize as unicode_normalize
32
33	1	from six import text_type
34	1	from six.moves import range
35
36	1	from ._phonetic import _Phonetic
37
38	1	__all__ = ['AlphaSIS', 'alpha_sis']
39
40
41	1	class AlphaSIS(_Phonetic):
42		"""Alpha-SIS.
43
44		The Alpha Search Inquiry System code is defined in :cite:`IBM:1973`.
45		This implementation is based on the description in :cite:`Moore:1977`.
46		"""
47
48	1	_alpha_sis_initials = {
49		'GF': '08',
50		'GM': '03',
51		'GN': '02',
52		'KN': '02',
53		'PF': '08',
54		'PN': '02',
55		'PS': '00',
56		'WR': '04',
57		'A': '1',
58		'E': '1',
59		'H': '2',
60		'I': '1',
61		'J': '3',
62		'O': '1',
63		'U': '1',
64		'W': '4',
65		'Y': '5',
66		}
67	1	_alpha_sis_initials_order = (
68		'GF',
69		'GM',
70		'GN',
71		'KN',
72		'PF',
73		'PN',
74		'PS',
75		'WR',
76		'A',
77		'E',
78		'H',
79		'I',
80		'J',
81		'O',
82		'U',
83		'W',
84		'Y',
85		)
86	1	_alpha_sis_basic = {
87		'SCH': '6',
88		'CZ': ('70', '6', '0'),
89		'CH': ('6', '70', '0'),
90		'CK': ('7', '6'),
91		'DS': ('0', '10'),
92		'DZ': ('0', '10'),
93		'TS': ('0', '10'),
94		'TZ': ('0', '10'),
95		'CI': '0',
96		'CY': '0',
97		'CE': '0',
98		'SH': '6',
99		'DG': '7',
100		'PH': '8',
101		'C': ('7', '6'),
102		'K': ('7', '6'),
103		'Z': '0',
104		'S': '0',
105		'D': '1',
106		'T': '1',
107		'N': '2',
108		'M': '3',
109		'R': '4',
110		'L': '5',
111		'J': '6',
112		'G': '7',
113		'Q': '7',
114		'X': '7',
115		'F': '8',
116		'V': '8',
117		'B': '9',
118		'P': '9',
119		}
120	1	_alpha_sis_basic_order = (
121		'SCH',
122		'CZ',
123		'CH',
124		'CK',
125		'DS',
126		'DZ',
127		'TS',
128		'TZ',
129		'CI',
130		'CY',
131		'CE',
132		'SH',
133		'DG',
134		'PH',
135		'C',
136		'K',
137		'Z',
138		'S',
139		'D',
140		'T',
141		'N',
142		'M',
143		'R',
144		'L',
145		'J',
146		'C',
147		'G',
148		'K',
149		'Q',
150		'X',
151		'F',
152		'V',
153		'B',
154		'P',
155		)
156
157	1	def encode(self, word, max_length=14):
158		"""Return the IBM Alpha Search Inquiry System code for a word.
159
160		A collection is necessary as the return type since there can be
161		multiple values for a single word. But the collection must be ordered
162		since the first value is the primary coding.
163
164		Parameters
165		----------
166		word : str
167		The word to transform
168		max_length : int
169		The length of the code returned (defaults to 14)
170
171		Returns
172		-------
173		tuple
174		The Alpha-SIS value
175
176		Examples
177		--------
178		>>> pe = AlphaSIS()
179		>>> pe.encode('Christopher')
180		('06401840000000', '07040184000000', '04018400000000')
181		>>> pe.encode('Niall')
182		('02500000000000',)
183		>>> pe.encode('Smith')
184		('03100000000000',)
185		>>> pe.encode('Schmidt')
186		('06310000000000',)
187
188		"""
189	1	alpha = ['']
190	1	pos = 0
191	1	word = unicode_normalize('NFKD', text_type(word.upper()))
192	1	word = word.replace('ß', 'SS')
193	1	word = ''.join(c for c in word if c in self._uc_set)
194
195		# Clamp max_length to [4, 64]
196	1	if max_length != -1:
197	1	max_length = min(max(4, max_length), 64)
198		else:
199	1	max_length = 64
200
201		# Do special processing for initial substrings
202	1	for k in self._alpha_sis_initials_order:
203	1	if word.startswith(k):
204	1	alpha[0] += self._alpha_sis_initials[k]
205	1	pos += len(k)
206	1	break
207
208		# Add a '0' if alpha is still empty
209	1	if not alpha[0]:
210	1	alpha[0] += '0'
211
212		# Whether or not any special initial codes were encoded, iterate
213		# through the length of the word in the main encoding loop
214	1	while pos < len(word):
215	1	orig_pos = pos
216	1	for k in self._alpha_sis_basic_order:
217	1	if word[pos:].startswith(k):
218	1	if isinstance(self._alpha_sis_basic[k], tuple):
219	1	newalpha = []
220	1	for i in range(len(self._alpha_sis_basic[k])):
221	1	newalpha += [
222		_ + self._alpha_sis_basic[k][i] for _ in alpha
223		]
224	1	alpha = newalpha
225		else:
226	1	alpha = [_ + self._alpha_sis_basic[k] for _ in alpha]
227	1	pos += len(k)
228	1	break
229	1	if pos == orig_pos:
230	1	alpha = [_ + '_' for _ in alpha]
231	1	pos += 1
232
233		# Trim doublets and placeholders
234	1	for i in range(len(alpha)):
235	1	pos = 1
236	1	while pos < len(alpha[i]):
237	1	if alpha[i][pos] == alpha[i][pos - 1]:
238	1	alpha[i] = alpha[i][:pos] + alpha[i][pos + 1 :]
239	1	pos += 1
240	1	alpha = (_.replace('_', '') for _ in alpha)
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report The variable `_` does not seem to be defined. Loading history...
241
242		# Trim codes and return tuple
243	1	alpha = ((_ + ('0' * max_length))[:max_length] for _ in alpha)
244	1	return tuple(alpha)
245
246
247	1	def alpha_sis(word, max_length=14):
248		"""Return the IBM Alpha Search Inquiry System code for a word.
249
250		This is a wrapper for :py:meth:`AlphaSIS.encode`.
251
252		Parameters
253		----------
254		word : str
255		The word to transform
256		max_length : int
257		The length of the code returned (defaults to 14)
258
259		Returns
260		-------
261		tuple
262		The Alpha-SIS value
263
264		Examples
265		--------
266		>>> alpha_sis('Christopher')
267		('06401840000000', '07040184000000', '04018400000000')
268		>>> alpha_sis('Niall')
269		('02500000000000',)
270		>>> alpha_sis('Smith')
271		('03100000000000',)
272		>>> alpha_sis('Schmidt')
273		('06310000000000',)
274
275		"""
276	1	return AlphaSIS().encode(word, max_length)
277
278
279		if __name__ == '__main__':
280		import doctest
281
282		doctest.testmod()
283

chrislit / abydos

Push — master ( 23810f...afe14d )

abydos.phonetic._alpha_sis.AlphaSIS.encode() F

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like