abydos.phonetic._henry_early.henry_early() - Code Metrics - Inspection of "Merge pull request #149 from chrislit/0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( f43547...71985b )

by Chris

created 2018-11-17 08:52 UTC

abydos.phonetic._henry_early.henry_early() A

↳ Parent: abydos.phonetic._henry_early

Complexity

Conditions

Size

Total Lines	32
Code Lines	2

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	2
CRAP Score	1

Importance

Changes

Metric	Value
cc	1
eloc	2
nop	2
dl	0
loc	32
ccs	2
cts	2
cp	1
crap	1
rs	10
c	0
b	0
f	0

# -*- coding: utf-8 -*-

# Copyright 2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._henry_early.

an early version of Henry Code
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from unicodedata import normalize as unicode_normalize

from six import text_type

from ._phonetic import _Phonetic

__all__ = ['HenryEarly', 'henry_early']


class HenryEarly(_Phonetic):

    """Henry code, early version.

    The early version of Henry coding is given in :cite:`Legare:1972`. This is
    different from the later version defined in :cite:`Henry:1976`.
    """

    _uc_c_set = set('BCDFGHJKLMNPQRSTVWXZ')
    _diph = {
        'AI': 'E',
        'AY': 'E',
        'EI': 'E',
        'AU': 'O',
        'OI': 'O',
        'OU': 'O',
        'EU': 'U',
    }
    _simple = {'W': 'V', 'X': 'S', 'Z': 'S'}

    def encode(self, word, max_length=3):

        """Calculate the early version of the Henry code for a word.

        Parameters
        ----------
        word : str
            The word to transform
        max_length : int
            The length of the code returned (defaults to 3)

        Returns
        -------
        str
            The early Henry code

        Examples
        --------
        >>> henry_early('Marchand')
        'MRC'
        >>> henry_early('Beaulieu')
        'BL'
        >>> henry_early('Beaumont')
        'BM'
        >>> henry_early('Legrand')
        'LGR'
        >>> henry_early('Pelletier')
        'PLT'

        """
        word = unicode_normalize('NFKD', text_type(word.upper()))
        word = ''.join(c for c in word if c in self._uc_set)

        if not word:
            return ''

        # Rule Ia seems to be covered entirely in II

        # Rule Ib
        if word[0] in self._uc_vy_set:
            # Ib1
            if (
                word[1:2] in self._uc_c_set - {'M', 'N'}

                and word[2:3] in self._uc_c_set

            ) or (
                word[1:2] in self._uc_c_set and word[2:3] not in self._uc_c_set
            ):
                if word[0] == 'Y':
                    word = 'I' + word[1:]
            # Ib2
            elif word[1:2] in {'M', 'N'} and word[2:3] in self._uc_c_set:
                if word[0] == 'E':
                    word = 'A' + word[1:]
                elif word[0] in {'I', 'U', 'Y'}:
                    word = 'E' + word[1:]
            # Ib3
            elif word[:2] in self._diph:
                word = self._diph[word[:2]] + word[2:]
            # Ib4
            elif word[1:2] in self._uc_vy_set and word[0] == 'Y':
                word = 'I' + word[1:]

        code = ''
        skip = 0

        # Rule II
        for pos, char in enumerate(word):
            nxch = word[pos + 1 : pos + 2]
            prev = word[pos - 1 : pos]

            if skip:
                skip -= 1
            elif char in self._uc_vy_set:
                code += char
            # IIc
            elif char == nxch:
                skip = 1
                code += char
            elif word[pos : pos + 2] in {'CQ', 'DT', 'SC'}:
                continue
            # IIb
            elif char in self._simple:
                code += self._simple[char]
            elif char in {'C', 'G', 'P', 'Q', 'S'}:
                if char == 'C':
                    if nxch in {'A', 'O', 'U', 'L', 'R'}:
                        code += 'K'
                    elif nxch in {'E', 'I', 'Y'}:
                        code += 'S'
                    elif nxch == 'H':
                        if word[pos + 2 : pos + 3] in self._uc_vy_set:
                            code += 'C'
                        else:  # CHR, CHL, etc.
                            code += 'K'
                    else:
                        code += 'C'
                elif char == 'G':
                    if nxch in {'A', 'O', 'U', 'L', 'R'}:
                        code += 'G'
                    elif nxch in {'E', 'I', 'Y'}:
                        code += 'J'
                    elif nxch == 'N':
                        code += 'N'
                elif char == 'P':
                    if nxch != 'H':
                        code += 'P'
                    else:
                        code += 'F'
                elif char == 'Q':
                    if word[pos + 1 : pos + 3] in {'UE', 'UI', 'UY'}:
                        code += 'G'
                    else:  # QUA, QUO, etc.
                        code += 'K'
                else:  # S...
                    if word[pos : pos + 6] == 'SAINTE':
                        code += 'X'
                        skip = 5
                    elif word[pos : pos + 5] == 'SAINT':
                        code += 'X'
                        skip = 4
                    elif word[pos : pos + 3] == 'STE':
                        code += 'X'
                        skip = 2
                    elif word[pos : pos + 2] == 'ST':
                        code += 'X'
                        skip = 1
                    elif nxch in self._uc_c_set:
                        continue
                    else:
                        code += 'S'
            # IId
            elif char == 'H' and prev in self._uc_c_set:
                continue
            elif char in self._uc_c_set - {
                'L',

                'R',

            } and nxch in self._uc_c_set - {'L', 'R'}:
                continue
            elif char == 'L' and nxch in {'M', 'N'}:
                continue
            elif (
                char in {'M', 'N'}

                and prev in self._uc_vy_set

                and nxch in self._uc_c_set

            ):
                continue
            # IIa
            else:
                code += char

        # IIe1
        if code[-4:] in {'AULT', 'EULT', 'OULT'}:
            code = code[:-2]
        # The following are blocked by rules above
        # elif code[-4:-3] in _vows and code[-3:] == 'MPS':
        #    code = code[:-3]
        # elif code[-3:-2] in _vows and code[-2:] in {'MB', 'MP', 'ND',
        #                                             'NS', 'NT'}:
        #    code = code[:-2]
        elif code[-2:-1] == 'R' and code[-1:] in self._uc_c_set:
            code = code[:-1]
        # IIe2
        elif code[-2:-1] in self._uc_vy_set and code[-1:] in {
            'D',

            'M',

            'N',

            'S',

            'T',

        }:
            code = code[:-1]
        elif code[-2:] == 'ER':
            code = code[:-1]

        # Drop non-initial vowels
        code = code[:1] + code[1:].translate(
            {65: '', 69: '', 73: '', 79: '', 85: '', 89: ''}
        )

        if max_length != -1:
            code = code[:max_length]

        return code


def henry_early(word, max_length=3):
    """Calculate the early version of the Henry code for a word.

    This is a wrapper for :py:meth:`HenryEarly.encode`.

    Parameters
    ----------
    word : str
        The word to transform
    max_length : int
        The length of the code returned (defaults to 3)

    Returns
    -------
    str
        The early Henry code

    Examples
    --------
    >>> henry_early('Marchand')
    'MRC'
    >>> henry_early('Beaulieu')
    'BL'
    >>> henry_early('Beaumont')
    'BM'
    >>> henry_early('Legrand')
    'LGR'
    >>> henry_early('Pelletier')
    'PLT'

    """
    return HenryEarly().encode(word, max_length)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._henry_early.
20
21		an early version of Henry Code
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from unicodedata import normalize as unicode_normalize
32
33	1	from six import text_type
34
35	1	from ._phonetic import _Phonetic
36
37	1	__all__ = ['HenryEarly', 'henry_early']
38
39
40	1	class HenryEarly(_Phonetic):
		0 ignored issues – show Unused Code introduced 2018-11-10 01:42 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
41		"""Henry code, early version.
42
43		The early version of Henry coding is given in :cite:`Legare:1972`. This is
44		different from the later version defined in :cite:`Henry:1976`.
45		"""
46
47	1	_uc_c_set = set('BCDFGHJKLMNPQRSTVWXZ')
48	1	_diph = {
49		'AI': 'E',
50		'AY': 'E',
51		'EI': 'E',
52		'AU': 'O',
53		'OI': 'O',
54		'OU': 'O',
55		'EU': 'U',
56		}
57	1	_simple = {'W': 'V', 'X': 'S', 'Z': 'S'}
58
59	1	def encode(self, word, max_length=3):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'encode' method Loading history...
60		"""Calculate the early version of the Henry code for a word.
61
62		Parameters
63		----------
64		word : str
65		The word to transform
66		max_length : int
67		The length of the code returned (defaults to 3)
68
69		Returns
70		-------
71		str
72		The early Henry code
73
74		Examples
75		--------
76		>>> henry_early('Marchand')
77		'MRC'
78		>>> henry_early('Beaulieu')
79		'BL'
80		>>> henry_early('Beaumont')
81		'BM'
82		>>> henry_early('Legrand')
83		'LGR'
84		>>> henry_early('Pelletier')
85		'PLT'
86
87		"""
88	1	word = unicode_normalize('NFKD', text_type(word.upper()))
89	1	word = ''.join(c for c in word if c in self._uc_set)
90
91	1	if not word:
92	1	return ''
93
94		# Rule Ia seems to be covered entirely in II
95
96		# Rule Ib
97	1	if word[0] in self._uc_vy_set:
98		# Ib1
99	1	if (
100		word[1:2] in self._uc_c_set - {'M', 'N'}
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
101		and word[2:3] in self._uc_c_set
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
102		) or (
103		word[1:2] in self._uc_c_set and word[2:3] not in self._uc_c_set
104		):
105	1	if word[0] == 'Y':
106	1	word = 'I' + word[1:]
107		# Ib2
108	1	elif word[1:2] in {'M', 'N'} and word[2:3] in self._uc_c_set:
109	1	if word[0] == 'E':
110	1	word = 'A' + word[1:]
111	1	elif word[0] in {'I', 'U', 'Y'}:
112	1	word = 'E' + word[1:]
113		# Ib3
114	1	elif word[:2] in self._diph:
115	1	word = self._diph[word[:2]] + word[2:]
116		# Ib4
117	1	elif word[1:2] in self._uc_vy_set and word[0] == 'Y':
118	1	word = 'I' + word[1:]
119
120	1	code = ''
121	1	skip = 0
122
123		# Rule II
124	1	for pos, char in enumerate(word):
125	1	nxch = word[pos + 1 : pos + 2]
126	1	prev = word[pos - 1 : pos]
127
128	1	if skip:
129	1	skip -= 1
130	1	elif char in self._uc_vy_set:
131	1	code += char
132		# IIc
133	1	elif char == nxch:
134	1	skip = 1
135	1	code += char
136	1	elif word[pos : pos + 2] in {'CQ', 'DT', 'SC'}:
137	1	continue
138		# IIb
139	1	elif char in self._simple:
140	1	code += self._simple[char]
141	1	elif char in {'C', 'G', 'P', 'Q', 'S'}:
142	1	if char == 'C':
143	1	if nxch in {'A', 'O', 'U', 'L', 'R'}:
144	1	code += 'K'
145	1	elif nxch in {'E', 'I', 'Y'}:
146	1	code += 'S'
147	1	elif nxch == 'H':
148	1	if word[pos + 2 : pos + 3] in self._uc_vy_set:
149	1	code += 'C'
150		else: # CHR, CHL, etc.
151	1	code += 'K'
152		else:
153	1	code += 'C'
154	1	elif char == 'G':
155	1	if nxch in {'A', 'O', 'U', 'L', 'R'}:
156	1	code += 'G'
157	1	elif nxch in {'E', 'I', 'Y'}:
158	1	code += 'J'
159	1	elif nxch == 'N':
160	1	code += 'N'
161	1	elif char == 'P':
162	1	if nxch != 'H':
163	1	code += 'P'
164		else:
165	1	code += 'F'
166	1	elif char == 'Q':
167	1	if word[pos + 1 : pos + 3] in {'UE', 'UI', 'UY'}:
168	1	code += 'G'
169		else: # QUA, QUO, etc.
170	1	code += 'K'
171		else: # S...
172	1	if word[pos : pos + 6] == 'SAINTE':
173	1	code += 'X'
174	1	skip = 5
175	1	elif word[pos : pos + 5] == 'SAINT':
176	1	code += 'X'
177	1	skip = 4
178	1	elif word[pos : pos + 3] == 'STE':
179	1	code += 'X'
180	1	skip = 2
181	1	elif word[pos : pos + 2] == 'ST':
182	1	code += 'X'
183	1	skip = 1
184	1	elif nxch in self._uc_c_set:
185	1	continue
186		else:
187	1	code += 'S'
188		# IId
189	1	elif char == 'H' and prev in self._uc_c_set:
190	1	continue
191	1	elif char in self._uc_c_set - {
192		'L',
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
193		'R',
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
194		} and nxch in self._uc_c_set - {'L', 'R'}:
195	1	continue
196	1	elif char == 'L' and nxch in {'M', 'N'}:
197	1	continue
198	1	elif (
199		char in {'M', 'N'}
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
200		and prev in self._uc_vy_set
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
201		and nxch in self._uc_c_set
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
202		):
203	1	continue
204		# IIa
205		else:
206	1	code += char
207
208		# IIe1
209	1	if code[-4:] in {'AULT', 'EULT', 'OULT'}:
210	1	code = code[:-2]
211		# The following are blocked by rules above
212		# elif code[-4:-3] in _vows and code[-3:] == 'MPS':
213		# code = code[:-3]
214		# elif code[-3:-2] in _vows and code[-2:] in {'MB', 'MP', 'ND',
215		# 'NS', 'NT'}:
216		# code = code[:-2]
217	1	elif code[-2:-1] == 'R' and code[-1:] in self._uc_c_set:
218	1	code = code[:-1]
219		# IIe2
220	1	elif code[-2:-1] in self._uc_vy_set and code[-1:] in {
221		'D',
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
222		'M',
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
223		'N',
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
224		'S',
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
225		'T',
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
226		}:
227	1	code = code[:-1]
228	1	elif code[-2:] == 'ER':
229	1	code = code[:-1]
230
231		# Drop non-initial vowels
232	1	code = code[:1] + code[1:].translate(
233		{65: '', 69: '', 73: '', 79: '', 85: '', 89: ''}
234		)
235
236	1	if max_length != -1:
237	1	code = code[:max_length]
238
239	1	return code
240
241
242	1	def henry_early(word, max_length=3):
243		"""Calculate the early version of the Henry code for a word.
244
245		This is a wrapper for :py:meth:`HenryEarly.encode`.
246
247		Parameters
248		----------
249		word : str
250		The word to transform
251		max_length : int
252		The length of the code returned (defaults to 3)
253
254		Returns
255		-------
256		str
257		The early Henry code
258
259		Examples
260		--------
261		>>> henry_early('Marchand')
262		'MRC'
263		>>> henry_early('Beaulieu')
264		'BL'
265		>>> henry_early('Beaumont')
266		'BM'
267		>>> henry_early('Legrand')
268		'LGR'
269		>>> henry_early('Pelletier')
270		'PLT'
271
272		"""
273	1	return HenryEarly().encode(word, max_length)
274
275
276		if __name__ == '__main__':
277		import doctest
278
279		doctest.testmod()
280

chrislit / abydos

Push — master ( f43547...71985b )

abydos.phonetic._henry_early.henry_early() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like