Test Failed
Push — master ( 23810f...afe14d )
by Chris
09:47
created

abydos.phonetic._henry_early.HenryEarly.encode()   F

Complexity

Conditions 56

Size

Total Lines 181
Code Lines 111

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 96
CRAP Score 56

Importance

Changes 0
Metric Value
cc 56
eloc 111
nop 3
dl 0
loc 181
ccs 96
cts 96
cp 1
crap 56
rs 0
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like abydos.phonetic._henry_early.HenryEarly.encode() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.phonetic._henry_early.
20
21
an early version of Henry Code
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from unicodedata import normalize as unicode_normalize
32
33 1
from six import text_type
34
35 1
from ._phonetic import _Phonetic
36
37 1
__all__ = ['HenryEarly', 'henry_early']
38
39
40 1
class HenryEarly(_Phonetic):
41
    """Henry code, early version.
42
43
    The early version of Henry coding is given in :cite:`Legare:1972`. This is
44
    different from the later version defined in :cite:`Henry:1976`.
45
    """
46
47 1
    _uc_c_set = set('BCDFGHJKLMNPQRSTVWXZ')
48 1
    _diph = {
49
        'AI': 'E',
50
        'AY': 'E',
51
        'EI': 'E',
52
        'AU': 'O',
53
        'OI': 'O',
54
        'OU': 'O',
55
        'EU': 'U',
56
    }
57 1
    _simple = {'W': 'V', 'X': 'S', 'Z': 'S'}
58
59 1
    def encode(self, word, max_length=3):
60
        """Calculate the early version of the Henry code for a word.
61
62
        Parameters
63
        ----------
64
        word : str
65
            The word to transform
66
        max_length : int
67
            The length of the code returned (defaults to 3)
68
69
        Returns
70
        -------
71
        str
72
            The early Henry code
73
74
        Examples
75
        --------
76
        >>> henry_early('Marchand')
77
        'MRC'
78
        >>> henry_early('Beaulieu')
79
        'BL'
80
        >>> henry_early('Beaumont')
81
        'BM'
82
        >>> henry_early('Legrand')
83
        'LGR'
84
        >>> henry_early('Pelletier')
85
        'PLT'
86
87
        """
88 1
        word = unicode_normalize('NFKD', text_type(word.upper()))
89 1
        word = ''.join(c for c in word if c in self._uc_set)
90
91 1
        if not word:
92 1
            return ''
93
94
        # Rule Ia seems to be covered entirely in II
95
96
        # Rule Ib
97 1
        if word[0] in self._uc_vy_set:
98
            # Ib1
99 1
            if (
100
                word[1:2] in self._uc_c_set - {'M', 'N'}
101
                and word[2:3] in self._uc_c_set
102
            ) or (
103
                word[1:2] in self._uc_c_set and word[2:3] not in self._uc_c_set
104
            ):
105 1
                if word[0] == 'Y':
106 1
                    word = 'I' + word[1:]
107
            # Ib2
108 1
            elif word[1:2] in {'M', 'N'} and word[2:3] in self._uc_c_set:
109 1
                if word[0] == 'E':
110 1
                    word = 'A' + word[1:]
111 1
                elif word[0] in {'I', 'U', 'Y'}:
112 1
                    word = 'E' + word[1:]
113
            # Ib3
114 1
            elif word[:2] in self._diph:
115 1
                word = self._diph[word[:2]] + word[2:]
116
            # Ib4
117 1
            elif word[1:2] in self._uc_vy_set and word[0] == 'Y':
118 1
                word = 'I' + word[1:]
119
120 1
        code = ''
121 1
        skip = 0
122
123
        # Rule II
124 1
        for pos, char in enumerate(word):
125 1
            nxch = word[pos + 1 : pos + 2]
126 1
            prev = word[pos - 1 : pos]
127
128 1
            if skip:
129 1
                skip -= 1
130 1
            elif char in self._uc_vy_set:
131 1
                code += char
132
            # IIc
133 1
            elif char == nxch:
134 1
                skip = 1
135 1
                code += char
136 1
            elif word[pos : pos + 2] in {'CQ', 'DT', 'SC'}:
137 1
                continue
138
            # IIb
139 1
            elif char in self._simple:
140 1
                code += self._simple[char]
141 1
            elif char in {'C', 'G', 'P', 'Q', 'S'}:
142 1
                if char == 'C':
143 1
                    if nxch in {'A', 'O', 'U', 'L', 'R'}:
144 1
                        code += 'K'
145 1
                    elif nxch in {'E', 'I', 'Y'}:
146 1
                        code += 'S'
147 1
                    elif nxch == 'H':
148 1
                        if word[pos + 2 : pos + 3] in self._uc_vy_set:
149 1
                            code += 'C'
150
                        else:  # CHR, CHL, etc.
151 1
                            code += 'K'
152
                    else:
153 1
                        code += 'C'
154 1
                elif char == 'G':
155 1
                    if nxch in {'A', 'O', 'U', 'L', 'R'}:
156 1
                        code += 'G'
157 1
                    elif nxch in {'E', 'I', 'Y'}:
158 1
                        code += 'J'
159 1
                    elif nxch == 'N':
160 1
                        code += 'N'
161 1
                elif char == 'P':
162 1
                    if nxch != 'H':
163 1
                        code += 'P'
164
                    else:
165 1
                        code += 'F'
166 1
                elif char == 'Q':
167 1
                    if word[pos + 1 : pos + 3] in {'UE', 'UI', 'UY'}:
168 1
                        code += 'G'
169
                    else:  # QUA, QUO, etc.
170 1
                        code += 'K'
171
                else:  # S...
172 1
                    if word[pos : pos + 6] == 'SAINTE':
173 1
                        code += 'X'
174 1
                        skip = 5
175 1
                    elif word[pos : pos + 5] == 'SAINT':
176 1
                        code += 'X'
177 1
                        skip = 4
178 1
                    elif word[pos : pos + 3] == 'STE':
179 1
                        code += 'X'
180 1
                        skip = 2
181 1
                    elif word[pos : pos + 2] == 'ST':
182 1
                        code += 'X'
183 1
                        skip = 1
184 1
                    elif nxch in self._uc_c_set:
185 1
                        continue
186
                    else:
187 1
                        code += 'S'
188
            # IId
189 1
            elif char == 'H' and prev in self._uc_c_set:
190 1
                continue
191 1
            elif char in self._uc_c_set - {
192
                'L',
193
                'R',
194
            } and nxch in self._uc_c_set - {'L', 'R'}:
195 1
                continue
196 1
            elif char == 'L' and nxch in {'M', 'N'}:
197 1
                continue
198 1
            elif (
199
                char in {'M', 'N'}
200
                and prev in self._uc_vy_set
201
                and nxch in self._uc_c_set
202
            ):
203 1
                continue
204
            # IIa
205
            else:
206 1
                code += char
207
208
        # IIe1
209 1
        if code[-4:] in {'AULT', 'EULT', 'OULT'}:
210 1
            code = code[:-2]
211
        # The following are blocked by rules above
212
        # elif code[-4:-3] in _vows and code[-3:] == 'MPS':
213
        #    code = code[:-3]
214
        # elif code[-3:-2] in _vows and code[-2:] in {'MB', 'MP', 'ND',
215
        #                                             'NS', 'NT'}:
216
        #    code = code[:-2]
217 1
        elif code[-2:-1] == 'R' and code[-1:] in self._uc_c_set:
218 1
            code = code[:-1]
219
        # IIe2
220 1
        elif code[-2:-1] in self._uc_vy_set and code[-1:] in {
221
            'D',
222
            'M',
223
            'N',
224
            'S',
225
            'T',
226
        }:
227 1
            code = code[:-1]
228 1
        elif code[-2:] == 'ER':
229 1
            code = code[:-1]
230
231
        # Drop non-initial vowels
232 1
        code = code[:1] + code[1:].translate(
233
            {65: '', 69: '', 73: '', 79: '', 85: '', 89: ''}
234
        )
235
236 1
        if max_length != -1:
237 1
            code = code[:max_length]
238
239 1
        return code
240
241
242 1
def henry_early(word, max_length=3):
243
    """Calculate the early version of the Henry code for a word.
244
245
    This is a wrapper for :py:meth:`HenryEarly.encode`.
246
247
    Parameters
248
    ----------
249
    word : str
250
        The word to transform
251
    max_length : int
252
        The length of the code returned (defaults to 3)
253
254
    Returns
255
    -------
256
    str
257
        The early Henry code
258
259
    Examples
260
    --------
261
    >>> henry_early('Marchand')
262
    'MRC'
263
    >>> henry_early('Beaulieu')
264
    'BL'
265
    >>> henry_early('Beaumont')
266
    'BM'
267
    >>> henry_early('Legrand')
268
    'LGR'
269
    >>> henry_early('Pelletier')
270
    'PLT'
271
272
    """
273 1
    return HenryEarly().encode(word, max_length)
274
275
276
if __name__ == '__main__':
277
    import doctest
278
279
    doctest.testmod()
280