Completed
Pull Request — master (#141)
by Chris
11:04
created

abydos.phonetic._spfc.SPFC.encode()   F

Complexity

Conditions 21

Size

Total Lines 162
Code Lines 70

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 61
CRAP Score 21

Importance

Changes 0
Metric Value
eloc 70
dl 0
loc 162
ccs 61
cts 61
cp 1
rs 0
c 0
b 0
f 0
cc 21
nop 2
crap 21

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like abydos.phonetic._spfc.SPFC.encode() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.phonetic._spfc.
20
21
Standardized Phonetic Frequency Code (SPFC) algorithm
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from unicodedata import normalize as unicode_normalize
32
33 1
from six import text_type
34 1
from six.moves import range
35
36 1
from ._phonetic import Phonetic
37
38 1
__all__ = ['SPFC', 'spfc']
39
40
41 1
class SPFC(Phonetic):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
42
    """Standardized Phonetic Frequency Code (SPFC).
43
44
    Standardized Phonetic Frequency Code is roughly Soundex-like.
45
    This implementation is based on page 19-21 of :cite:`Moore:1977`.
46
    """
47
48 1
    _pf1 = dict(
49
        zip(
50
            (ord(_) for _ in 'SZCKQVFPUWABLORDHIEMNXGJT'),
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable _ does not seem to be defined.
Loading history...
51
            '0011112222334445556666777',
52
        )
53
    )
54 1
    _pf2 = dict(
55
        zip(
56
            (ord(_) for _ in 'SZCKQFPXABORDHIMNGJTUVWEL'),
57
            '0011122233445556677788899',
58
        )
59
    )
60 1
    _pf3 = dict(
61
        zip(
62
            (ord(_) for _ in 'BCKQVDTFLPGJXMNRSZAEHIOUWY'),
63
            '00000112223334456677777777',
64
        )
65
    )
66
67 1
    _substitutions = (
68
        ('DK', 'K'),
69
        ('DT', 'T'),
70
        ('SC', 'S'),
71
        ('KN', 'N'),
72
        ('MN', 'N'),
73
    )
74
75 1
    def encode(self, word):
76
        """Return the Standardized Phonetic Frequency Code (SPFC) of a word.
77
78
        Args:
79
            word (str): The word to transform
80
81
        Returns:
82
            str: The SPFC value
83
84
        Raises:
85
            AttributeError: Word attribute must be a string with a space or
86
                period dividing the first and last names or a tuple/list
87
                consisting of the first and last names
88
89
        Examples:
90
            >>> pe = SPFC()
91
            >>> pe.encode('Christopher Smith')
92
            '01160'
93
            >>> pe.encode('Christopher Schmidt')
94
            '01160'
95
            >>> pe.encode('Niall Smith')
96
            '01660'
97
            >>> pe.encode('Niall Schmidt')
98
            '01660'
99
100
            >>> pe.encode('L.Smith')
101
            '01960'
102
            >>> pe.encode('R.Miller')
103
            '65490'
104
105
            >>> pe.encode(('L', 'Smith'))
106
            '01960'
107
            >>> pe.encode(('R', 'Miller'))
108
            '65490'
109
110
        """
111
112 1
        def _raise_word_ex():
113
            """Raise an AttributeError.
114
115
            Raises:
116
                AttributeError: Word attribute must be a string with a space or
117
                    period dividing the first and last names or a tuple/list
118
                    consisting of the first and last names
119
120
            """
121 1
            raise AttributeError(
122
                'Word attribute must be a string with a space or period '
123
                + 'dividing the first and last names or a tuple/list '
124
                + 'consisting of the first and last names'
125
            )
126
127 1
        if not word:
128 1
            return ''
129
130 1
        names = []
131 1
        if isinstance(word, (str, text_type)):
132 1
            names = word.split('.', 1)
133 1
            if len(names) != 2:
134 1
                names = word.split(' ', 1)
135 1
                if len(names) != 2:
136 1
                    _raise_word_ex()
137 1
        elif hasattr(word, '__iter__'):
138 1
            if len(word) != 2:
139 1
                _raise_word_ex()
140 1
            names = word
141
        else:
142 1
            _raise_word_ex()
143
144 1
        names = [
145
            unicode_normalize(
146
                'NFKD', text_type(_.strip().replace('ß', 'SS').upper())
147
            )
148
            for _ in names
149
        ]
150 1
        code = ''
151
152 1
        def _steps_one_to_three(name):
153
            """Perform the first three steps of SPFC.
154
155
            Args:
156
                name (str): Name to transform
157
158
            Returns:
159
                str: Transformed name
160
161
            """
162
            # filter out non A-Z
163 1
            name = ''.join(_ for _ in name if _ in self._uc_set)
164
165
            # 1. In the field, convert DK to K, DT to T, SC to S, KN to N,
166
            # and MN to N
167 1
            for subst in self._substitutions:
168 1
                name = name.replace(subst[0], subst[1])
169
170
            # 2. In the name field, replace multiple letters with a single
171
            # letter
172 1
            name = self._delete_consecutive_repeats(name)
173
174
            # 3. Remove vowels, W, H, and Y, but keep the first letter in the
175
            # name field.
176 1
            if name:
177 1
                name = name[0] + ''.join(
178
                    _
179
                    for _ in name[1:]
180
                    if _ not in {'A', 'E', 'H', 'I', 'O', 'U', 'W', 'Y'}
181
                )
182 1
            return name
183
184 1
        names = [_steps_one_to_three(_) for _ in names]
185
186
        # 4. The first digit of the code is obtained using PF1 and the first
187
        # letter of the name field. Remove this letter after coding.
188 1
        if names[1]:
189 1
            code += names[1][0].translate(self._pf1)
190 1
            names[1] = names[1][1:]
191
192
        # 5. Using the last letters of the name, use Table PF3 to obtain the
193
        # second digit of the code. Use as many letters as possible and remove
194
        # after coding.
195 1
        if names[1]:
196 1
            if names[1][-3:] == 'STN' or names[1][-3:] == 'PRS':
197 1
                code += '8'
198 1
                names[1] = names[1][:-3]
199 1
            elif names[1][-2:] == 'SN':
200 1
                code += '8'
201 1
                names[1] = names[1][:-2]
202 1
            elif names[1][-3:] == 'STR':
203 1
                code += '9'
204 1
                names[1] = names[1][:-3]
205 1
            elif names[1][-2:] in {'SR', 'TN', 'TD'}:
206 1
                code += '9'
207 1
                names[1] = names[1][:-2]
208 1
            elif names[1][-3:] == 'DRS':
209 1
                code += '7'
210 1
                names[1] = names[1][:-3]
211 1
            elif names[1][-2:] in {'TR', 'MN'}:
212 1
                code += '7'
213 1
                names[1] = names[1][:-2]
214
            else:
215 1
                code += names[1][-1].translate(self._pf3)
216 1
                names[1] = names[1][:-1]
217
218
        # 6. The third digit is found using Table PF2 and the first character
219
        # of the first name. Remove after coding.
220 1
        if names[0]:
221 1
            code += names[0][0].translate(self._pf2)
222 1
            names[0] = names[0][1:]
223
224
        # 7. The fourth digit is found using Table PF2 and the first character
225
        # of the name field. If no letters remain use zero. After coding remove
226
        # the letter.
227
        # 8. The fifth digit is found in the same manner as the fourth using
228
        # the remaining characters of the name field if any.
229 1
        for _ in range(2):
230 1
            if names[1]:
231 1
                code += names[1][0].translate(self._pf2)
232 1
                names[1] = names[1][1:]
233
            else:
234 1
                code += '0'
235
236 1
        return code
237
238
239 1
def spfc(word):
240
    """Return the Standardized Phonetic Frequency Code (SPFC) of a word.
241
242
    This is a wrapper for :py:meth:`SPFC.encode`.
243
244
    Args:
245
        word (str): The word to transform
246
247
    Returns:
248
        str: The SPFC value
249
250
    Examples:
251
        >>> spfc('Christopher Smith')
252
        '01160'
253
        >>> spfc('Christopher Schmidt')
254
        '01160'
255
        >>> spfc('Niall Smith')
256
        '01660'
257
        >>> spfc('Niall Schmidt')
258
        '01660'
259
260
        >>> spfc('L.Smith')
261
        '01960'
262
        >>> spfc('R.Miller')
263
        '65490'
264
265
        >>> spfc(('L', 'Smith'))
266
        '01960'
267
        >>> spfc(('R', 'Miller'))
268
        '65490'
269
270
    """
271 1
    return SPFC().encode(word)
272
273
274
if __name__ == '__main__':
275
    import doctest
276
277
    doctest.testmod()
278