Completed
Pull Request — master (#141)
by Chris
13:03
created

abydos.phonetic._spfc.SPFC.encode()   F

Complexity

Conditions 21

Size

Total Lines 162
Code Lines 70

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 61
CRAP Score 21

Importance

Changes 0
Metric Value
eloc 70
dl 0
loc 162
ccs 61
cts 61
cp 1
rs 0
c 0
b 0
f 0
cc 21
nop 2
crap 21

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like abydos.phonetic._spfc.SPFC.encode() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.phonetic._spfc.
20
21
The phonetic._spfc module implements the Standardized Phonetic Frequency Code
22
(SPFC) algorithm.
23
"""
24
25 1
from __future__ import unicode_literals
26
27 1
from unicodedata import normalize as unicode_normalize
28
29 1
from six import text_type
30 1
from six.moves import range
31
32 1
from ._phonetic import Phonetic
33
34 1
__all__ = ['SPFC', 'spfc']
35
36
37 1
class SPFC(Phonetic):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
38
    """Standardized Phonetic Frequency Code (SPFC).
39
40
    Standardized Phonetic Frequency Code is roughly Soundex-like.
41
    This implementation is based on page 19-21 of :cite:`Moore:1977`.
42
    """
43
44 1
    _pf1 = dict(
45
        zip(
46
            (ord(_) for _ in 'SZCKQVFPUWABLORDHIEMNXGJT'),
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable _ does not seem to be defined.
Loading history...
47
            '0011112222334445556666777',
48
        )
49
    )
50 1
    _pf2 = dict(
51
        zip(
52
            (ord(_) for _ in 'SZCKQFPXABORDHIMNGJTUVWEL'),
53
            '0011122233445556677788899',
54
        )
55
    )
56 1
    _pf3 = dict(
57
        zip(
58
            (ord(_) for _ in 'BCKQVDTFLPGJXMNRSZAEHIOUWY'),
59
            '00000112223334456677777777',
60
        )
61
    )
62
63 1
    _substitutions = (
64
        ('DK', 'K'),
65
        ('DT', 'T'),
66
        ('SC', 'S'),
67
        ('KN', 'N'),
68
        ('MN', 'N'),
69
    )
70
71 1
    def encode(self, word):
72
        """Return the Standardized Phonetic Frequency Code (SPFC) of a word.
73
74
        Args:
75
            word (str): The word to transform
76
77
        Returns:
78
            str: The SPFC value
79
80
        Raises:
81
            AttributeError: Word attribute must be a string with a space or
82
                period dividing the first and last names or a tuple/list
83
                consisting of the first and last names
84
85
        Examples:
86
            >>> pe = SPFC()
87
            >>> pe.encode('Christopher Smith')
88
            '01160'
89
            >>> pe.encode('Christopher Schmidt')
90
            '01160'
91
            >>> pe.encode('Niall Smith')
92
            '01660'
93
            >>> pe.encode('Niall Schmidt')
94
            '01660'
95
96
            >>> pe.encode('L.Smith')
97
            '01960'
98
            >>> pe.encode('R.Miller')
99
            '65490'
100
101
            >>> pe.encode(('L', 'Smith'))
102
            '01960'
103
            >>> pe.encode(('R', 'Miller'))
104
            '65490'
105
106
        """
107
108 1
        def _raise_word_ex():
109
            """Raise an AttributeError.
110
111
            Raises:
112
                AttributeError: Word attribute must be a string with a space or
113
                    period dividing the first and last names or a tuple/list
114
                    consisting of the first and last names
115
116
            """
117 1
            raise AttributeError(
118
                'Word attribute must be a string with a space or period '
119
                + 'dividing the first and last names or a tuple/list '
120
                + 'consisting of the first and last names'
121
            )
122
123 1
        if not word:
124 1
            return ''
125
126 1
        names = []
127 1
        if isinstance(word, (str, text_type)):
128 1
            names = word.split('.', 1)
129 1
            if len(names) != 2:
130 1
                names = word.split(' ', 1)
131 1
                if len(names) != 2:
132 1
                    _raise_word_ex()
133 1
        elif hasattr(word, '__iter__'):
134 1
            if len(word) != 2:
135 1
                _raise_word_ex()
136 1
            names = word
137
        else:
138 1
            _raise_word_ex()
139
140 1
        names = [
141
            unicode_normalize(
142
                'NFKD', text_type(_.strip().replace('ß', 'SS').upper())
143
            )
144
            for _ in names
145
        ]
146 1
        code = ''
147
148 1
        def _steps_one_to_three(name):
149
            """Perform the first three steps of SPFC.
150
151
            Args:
152
                name (str): Name to transform
153
154
            Returns:
155
                str: Transformed name
156
157
            """
158
            # filter out non A-Z
159 1
            name = ''.join(_ for _ in name if _ in self._uc_set)
160
161
            # 1. In the field, convert DK to K, DT to T, SC to S, KN to N,
162
            # and MN to N
163 1
            for subst in self._substitutions:
164 1
                name = name.replace(subst[0], subst[1])
165
166
            # 2. In the name field, replace multiple letters with a single
167
            # letter
168 1
            name = self._delete_consecutive_repeats(name)
169
170
            # 3. Remove vowels, W, H, and Y, but keep the first letter in the
171
            # name field.
172 1
            if name:
173 1
                name = name[0] + ''.join(
174
                    _
175
                    for _ in name[1:]
176
                    if _ not in {'A', 'E', 'H', 'I', 'O', 'U', 'W', 'Y'}
177
                )
178 1
            return name
179
180 1
        names = [_steps_one_to_three(_) for _ in names]
181
182
        # 4. The first digit of the code is obtained using PF1 and the first
183
        # letter of the name field. Remove this letter after coding.
184 1
        if names[1]:
185 1
            code += names[1][0].translate(self._pf1)
186 1
            names[1] = names[1][1:]
187
188
        # 5. Using the last letters of the name, use Table PF3 to obtain the
189
        # second digit of the code. Use as many letters as possible and remove
190
        # after coding.
191 1
        if names[1]:
192 1
            if names[1][-3:] == 'STN' or names[1][-3:] == 'PRS':
193 1
                code += '8'
194 1
                names[1] = names[1][:-3]
195 1
            elif names[1][-2:] == 'SN':
196 1
                code += '8'
197 1
                names[1] = names[1][:-2]
198 1
            elif names[1][-3:] == 'STR':
199 1
                code += '9'
200 1
                names[1] = names[1][:-3]
201 1
            elif names[1][-2:] in {'SR', 'TN', 'TD'}:
202 1
                code += '9'
203 1
                names[1] = names[1][:-2]
204 1
            elif names[1][-3:] == 'DRS':
205 1
                code += '7'
206 1
                names[1] = names[1][:-3]
207 1
            elif names[1][-2:] in {'TR', 'MN'}:
208 1
                code += '7'
209 1
                names[1] = names[1][:-2]
210
            else:
211 1
                code += names[1][-1].translate(self._pf3)
212 1
                names[1] = names[1][:-1]
213
214
        # 6. The third digit is found using Table PF2 and the first character
215
        # of the first name. Remove after coding.
216 1
        if names[0]:
217 1
            code += names[0][0].translate(self._pf2)
218 1
            names[0] = names[0][1:]
219
220
        # 7. The fourth digit is found using Table PF2 and the first character
221
        # of the name field. If no letters remain use zero. After coding remove
222
        # the letter.
223
        # 8. The fifth digit is found in the same manner as the fourth using
224
        # the remaining characters of the name field if any.
225 1
        for _ in range(2):
226 1
            if names[1]:
227 1
                code += names[1][0].translate(self._pf2)
228 1
                names[1] = names[1][1:]
229
            else:
230 1
                code += '0'
231
232 1
        return code
233
234
235 1
def spfc(word):
236
    """Return the Standardized Phonetic Frequency Code (SPFC) of a word.
237
238
    This is a wrapper for :py:meth:`SPFC.encode`.
239
240
    Args:
241
        word (str): The word to transform
242
243
    Returns:
244
        str: The SPFC value
245
246
    Examples:
247
        >>> spfc('Christopher Smith')
248
        '01160'
249
        >>> spfc('Christopher Schmidt')
250
        '01160'
251
        >>> spfc('Niall Smith')
252
        '01660'
253
        >>> spfc('Niall Schmidt')
254
        '01660'
255
256
        >>> spfc('L.Smith')
257
        '01960'
258
        >>> spfc('R.Miller')
259
        '65490'
260
261
        >>> spfc(('L', 'Smith'))
262
        '01960'
263
        >>> spfc(('R', 'Miller'))
264
        '65490'
265
266
    """
267 1
    return SPFC().encode(word)
268
269
270
if __name__ == '__main__':
271
    import doctest
272
273
    doctest.testmod()
274