abydos.phonetic._henry_early.HenryEarly.encode() - Code Metrics - Inspection of "0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#141)

by Chris

created 2018-11-10 03:25 UTC

abydos.phonetic._henry_early.HenryEarly.encode() F

↳ Parent: abydos.phonetic._henry_early

Complexity

Conditions

Size

Total Lines	175
Code Lines	111

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	96
CRAP Score	56

Importance

Changes

Metric	Value
cc	56
eloc	111
nop	3
dl	0
loc	175
ccs	96
cts	96
cp	1
crap	56
rs	0
c	0
b	0
f	0

How to fix Long Method Complexity

# -*- coding: utf-8 -*-

# Copyright 2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._henry_early.

an early version of Henry Code
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from unicodedata import normalize as unicode_normalize

from six import text_type

from ._phonetic import Phonetic

__all__ = ['HenryEarly', 'henry_early']


class HenryEarly(Phonetic):

    """Henry code, early version.

    The early version of Henry coding is given in :cite:`Legare:1972`. This is
    different from the later version defined in :cite:`Henry:1976`.
    """

    _uc_c_set = set('BCDFGHJKLMNPQRSTVWXZ')
    _diph = {
        'AI': 'E',
        'AY': 'E',
        'EI': 'E',
        'AU': 'O',
        'OI': 'O',
        'OU': 'O',
        'EU': 'U',
    }
    _simple = {'W': 'V', 'X': 'S', 'Z': 'S'}

    def encode(self, word, max_length=3):

        """Calculate the early version of the Henry code for a word.

        Args:
            word (str): The word to transform
            max_length (int): The length of the code returned (defaults to 3)

        Returns:
            str: The early Henry code

        Examples:
            >>> henry_early('Marchand')
            'MRC'
            >>> henry_early('Beaulieu')
            'BL'
            >>> henry_early('Beaumont')
            'BM'
            >>> henry_early('Legrand')
            'LGR'
            >>> henry_early('Pelletier')
            'PLT'

        """
        word = unicode_normalize('NFKD', text_type(word.upper()))
        word = ''.join(c for c in word if c in self._uc_set)

        if not word:
            return ''

        # Rule Ia seems to be covered entirely in II

        # Rule Ib
        if word[0] in self._uc_vy_set:
            # Ib1
            if (
                word[1:2] in self._uc_c_set - {'M', 'N'}

                and word[2:3] in self._uc_c_set

            ) or (
                word[1:2] in self._uc_c_set and word[2:3] not in self._uc_c_set
            ):
                if word[0] == 'Y':
                    word = 'I' + word[1:]
            # Ib2
            elif word[1:2] in {'M', 'N'} and word[2:3] in self._uc_c_set:
                if word[0] == 'E':
                    word = 'A' + word[1:]
                elif word[0] in {'I', 'U', 'Y'}:
                    word = 'E' + word[1:]
            # Ib3
            elif word[:2] in self._diph:
                word = self._diph[word[:2]] + word[2:]
            # Ib4
            elif word[1:2] in self._uc_vy_set and word[0] == 'Y':
                word = 'I' + word[1:]

        code = ''
        skip = 0

        # Rule II
        for pos, char in enumerate(word):
            nxch = word[pos + 1 : pos + 2]
            prev = word[pos - 1 : pos]

            if skip:
                skip -= 1
            elif char in self._uc_vy_set:
                code += char
            # IIc
            elif char == nxch:
                skip = 1
                code += char
            elif word[pos : pos + 2] in {'CQ', 'DT', 'SC'}:
                continue
            # IIb
            elif char in self._simple:
                code += self._simple[char]
            elif char in {'C', 'G', 'P', 'Q', 'S'}:
                if char == 'C':
                    if nxch in {'A', 'O', 'U', 'L', 'R'}:
                        code += 'K'
                    elif nxch in {'E', 'I', 'Y'}:
                        code += 'S'
                    elif nxch == 'H':
                        if word[pos + 2 : pos + 3] in self._uc_vy_set:
                            code += 'C'
                        else:  # CHR, CHL, etc.
                            code += 'K'
                    else:
                        code += 'C'
                elif char == 'G':
                    if nxch in {'A', 'O', 'U', 'L', 'R'}:
                        code += 'G'
                    elif nxch in {'E', 'I', 'Y'}:
                        code += 'J'
                    elif nxch == 'N':
                        code += 'N'
                elif char == 'P':
                    if nxch != 'H':
                        code += 'P'
                    else:
                        code += 'F'
                elif char == 'Q':
                    if word[pos + 1 : pos + 3] in {'UE', 'UI', 'UY'}:
                        code += 'G'
                    else:  # QUA, QUO, etc.
                        code += 'K'
                else:  # S...
                    if word[pos : pos + 6] == 'SAINTE':
                        code += 'X'
                        skip = 5
                    elif word[pos : pos + 5] == 'SAINT':
                        code += 'X'
                        skip = 4
                    elif word[pos : pos + 3] == 'STE':
                        code += 'X'
                        skip = 2
                    elif word[pos : pos + 2] == 'ST':
                        code += 'X'
                        skip = 1
                    elif nxch in self._uc_c_set:
                        continue
                    else:
                        code += 'S'
            # IId
            elif char == 'H' and prev in self._uc_c_set:
                continue
            elif char in self._uc_c_set - {
                'L',

                'R',

            } and nxch in self._uc_c_set - {'L', 'R'}:
                continue
            elif char == 'L' and nxch in {'M', 'N'}:
                continue
            elif (
                char in {'M', 'N'}

                and prev in self._uc_vy_set

                and nxch in self._uc_c_set

            ):
                continue
            # IIa
            else:
                code += char

        # IIe1
        if code[-4:] in {'AULT', 'EULT', 'OULT'}:
            code = code[:-2]
        # The following are blocked by rules above
        # elif code[-4:-3] in _vows and code[-3:] == 'MPS':
        #    code = code[:-3]
        # elif code[-3:-2] in _vows and code[-2:] in {'MB', 'MP', 'ND',
        #                                             'NS', 'NT'}:
        #    code = code[:-2]
        elif code[-2:-1] == 'R' and code[-1:] in self._uc_c_set:
            code = code[:-1]
        # IIe2
        elif code[-2:-1] in self._uc_vy_set and code[-1:] in {
            'D',

            'M',

            'N',

            'S',

            'T',

        }:
            code = code[:-1]
        elif code[-2:] == 'ER':
            code = code[:-1]

        # Drop non-initial vowels
        code = code[:1] + code[1:].translate(
            {65: '', 69: '', 73: '', 79: '', 85: '', 89: ''}
        )

        if max_length != -1:
            code = code[:max_length]

        return code


def henry_early(word, max_length=3):
    """Calculate the early version of the Henry code for a word.

    This is a wrapper for :py:meth:`HenryEarly.encode`.

    Args:
        word (str): The word to transform
        max_length (int): The length of the code returned (defaults to 3)

    Returns:
        str: The early Henry code

    Examples:
        >>> henry_early('Marchand')
        'MRC'
        >>> henry_early('Beaulieu')
        'BL'
        >>> henry_early('Beaumont')
        'BM'
        >>> henry_early('Legrand')
        'LGR'
        >>> henry_early('Pelletier')
        'PLT'

    """
    return HenryEarly().encode(word, max_length)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._henry_early.
20
21		an early version of Henry Code
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from unicodedata import normalize as unicode_normalize
32
33	1	from six import text_type
34
35	1	from ._phonetic import Phonetic
36
37	1	__all__ = ['HenryEarly', 'henry_early']
38
39
40	1	class HenryEarly(Phonetic):
		0 ignored issues – show Unused Code introduced 2018-11-10 01:42 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
41		"""Henry code, early version.
42
43		The early version of Henry coding is given in :cite:`Legare:1972`. This is
44		different from the later version defined in :cite:`Henry:1976`.
45		"""
46
47	1	_uc_c_set = set('BCDFGHJKLMNPQRSTVWXZ')
48	1	_diph = {
49		'AI': 'E',
50		'AY': 'E',
51		'EI': 'E',
52		'AU': 'O',
53		'OI': 'O',
54		'OU': 'O',
55		'EU': 'U',
56		}
57	1	_simple = {'W': 'V', 'X': 'S', 'Z': 'S'}
58
59	1	def encode(self, word, max_length=3):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'encode' method Loading history...
60		"""Calculate the early version of the Henry code for a word.
61
62		Args:
63		word (str): The word to transform
64		max_length (int): The length of the code returned (defaults to 3)
65
66		Returns:
67		str: The early Henry code
68
69		Examples:
70		>>> henry_early('Marchand')
71		'MRC'
72		>>> henry_early('Beaulieu')
73		'BL'
74		>>> henry_early('Beaumont')
75		'BM'
76		>>> henry_early('Legrand')
77		'LGR'
78		>>> henry_early('Pelletier')
79		'PLT'
80
81		"""
82	1	word = unicode_normalize('NFKD', text_type(word.upper()))
83	1	word = ''.join(c for c in word if c in self._uc_set)
84
85	1	if not word:
86	1	return ''
87
88		# Rule Ia seems to be covered entirely in II
89
90		# Rule Ib
91	1	if word[0] in self._uc_vy_set:
92		# Ib1
93	1	if (
94		word[1:2] in self._uc_c_set - {'M', 'N'}
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
95		and word[2:3] in self._uc_c_set
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
96		) or (
97		word[1:2] in self._uc_c_set and word[2:3] not in self._uc_c_set
98		):
99	1	if word[0] == 'Y':
100	1	word = 'I' + word[1:]
101		# Ib2
102	1	elif word[1:2] in {'M', 'N'} and word[2:3] in self._uc_c_set:
103	1	if word[0] == 'E':
104	1	word = 'A' + word[1:]
105	1	elif word[0] in {'I', 'U', 'Y'}:
106	1	word = 'E' + word[1:]
107		# Ib3
108	1	elif word[:2] in self._diph:
109	1	word = self._diph[word[:2]] + word[2:]
110		# Ib4
111	1	elif word[1:2] in self._uc_vy_set and word[0] == 'Y':
112	1	word = 'I' + word[1:]
113
114	1	code = ''
115	1	skip = 0
116
117		# Rule II
118	1	for pos, char in enumerate(word):
119	1	nxch = word[pos + 1 : pos + 2]
120	1	prev = word[pos - 1 : pos]
121
122	1	if skip:
123	1	skip -= 1
124	1	elif char in self._uc_vy_set:
125	1	code += char
126		# IIc
127	1	elif char == nxch:
128	1	skip = 1
129	1	code += char
130	1	elif word[pos : pos + 2] in {'CQ', 'DT', 'SC'}:
131	1	continue
132		# IIb
133	1	elif char in self._simple:
134	1	code += self._simple[char]
135	1	elif char in {'C', 'G', 'P', 'Q', 'S'}:
136	1	if char == 'C':
137	1	if nxch in {'A', 'O', 'U', 'L', 'R'}:
138	1	code += 'K'
139	1	elif nxch in {'E', 'I', 'Y'}:
140	1	code += 'S'
141	1	elif nxch == 'H':
142	1	if word[pos + 2 : pos + 3] in self._uc_vy_set:
143	1	code += 'C'
144		else: # CHR, CHL, etc.
145	1	code += 'K'
146		else:
147	1	code += 'C'
148	1	elif char == 'G':
149	1	if nxch in {'A', 'O', 'U', 'L', 'R'}:
150	1	code += 'G'
151	1	elif nxch in {'E', 'I', 'Y'}:
152	1	code += 'J'
153	1	elif nxch == 'N':
154	1	code += 'N'
155	1	elif char == 'P':
156	1	if nxch != 'H':
157	1	code += 'P'
158		else:
159	1	code += 'F'
160	1	elif char == 'Q':
161	1	if word[pos + 1 : pos + 3] in {'UE', 'UI', 'UY'}:
162	1	code += 'G'
163		else: # QUA, QUO, etc.
164	1	code += 'K'
165		else: # S...
166	1	if word[pos : pos + 6] == 'SAINTE':
167	1	code += 'X'
168	1	skip = 5
169	1	elif word[pos : pos + 5] == 'SAINT':
170	1	code += 'X'
171	1	skip = 4
172	1	elif word[pos : pos + 3] == 'STE':
173	1	code += 'X'
174	1	skip = 2
175	1	elif word[pos : pos + 2] == 'ST':
176	1	code += 'X'
177	1	skip = 1
178	1	elif nxch in self._uc_c_set:
179	1	continue
180		else:
181	1	code += 'S'
182		# IId
183	1	elif char == 'H' and prev in self._uc_c_set:
184	1	continue
185	1	elif char in self._uc_c_set - {
186		'L',
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
187		'R',
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
188		} and nxch in self._uc_c_set - {'L', 'R'}:
189	1	continue
190	1	elif char == 'L' and nxch in {'M', 'N'}:
191	1	continue
192	1	elif (
193		char in {'M', 'N'}
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
194		and prev in self._uc_vy_set
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
195		and nxch in self._uc_c_set
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
196		):
197	1	continue
198		# IIa
199		else:
200	1	code += char
201
202		# IIe1
203	1	if code[-4:] in {'AULT', 'EULT', 'OULT'}:
204	1	code = code[:-2]
205		# The following are blocked by rules above
206		# elif code[-4:-3] in _vows and code[-3:] == 'MPS':
207		# code = code[:-3]
208		# elif code[-3:-2] in _vows and code[-2:] in {'MB', 'MP', 'ND',
209		# 'NS', 'NT'}:
210		# code = code[:-2]
211	1	elif code[-2:-1] == 'R' and code[-1:] in self._uc_c_set:
212	1	code = code[:-1]
213		# IIe2
214	1	elif code[-2:-1] in self._uc_vy_set and code[-1:] in {
215		'D',
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
216		'M',
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
217		'N',
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
218		'S',
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
219		'T',
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
220		}:
221	1	code = code[:-1]
222	1	elif code[-2:] == 'ER':
223	1	code = code[:-1]
224
225		# Drop non-initial vowels
226	1	code = code[:1] + code[1:].translate(
227		{65: '', 69: '', 73: '', 79: '', 85: '', 89: ''}
228		)
229
230	1	if max_length != -1:
231	1	code = code[:max_length]
232
233	1	return code
234
235
236	1	def henry_early(word, max_length=3):
237		"""Calculate the early version of the Henry code for a word.
238
239		This is a wrapper for :py:meth:`HenryEarly.encode`.
240
241		Args:
242		word (str): The word to transform
243		max_length (int): The length of the code returned (defaults to 3)
244
245		Returns:
246		str: The early Henry code
247
248		Examples:
249		>>> henry_early('Marchand')
250		'MRC'
251		>>> henry_early('Beaulieu')
252		'BL'
253		>>> henry_early('Beaumont')
254		'BM'
255		>>> henry_early('Legrand')
256		'LGR'
257		>>> henry_early('Pelletier')
258		'PLT'
259
260		"""
261	1	return HenryEarly().encode(word, max_length)
262
263
264		if __name__ == '__main__':
265		import doctest
266
267		doctest.testmod()
268

chrislit / abydos

Pull Request — master (#141)

abydos.phonetic._henry_early.HenryEarly.encode() F

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like