Completed
Push — master ( f43547...71985b )
by Chris
12:00 queued 10s
created

abydos.phonetic._spfc.SPFC.encode()   F

Complexity

Conditions 21

Size

Total Lines 175
Code Lines 70

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 61
CRAP Score 21

Importance

Changes 0
Metric Value
cc 21
eloc 70
nop 2
dl 0
loc 175
ccs 61
cts 61
cp 1
crap 21
rs 0
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like abydos.phonetic._spfc.SPFC.encode() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.phonetic._spfc.
20
21
Standardized Phonetic Frequency Code (SPFC) algorithm
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from unicodedata import normalize as unicode_normalize
32
33 1
from six import text_type
34 1
from six.moves import range
35
36 1
from ._phonetic import _Phonetic
37
38 1
__all__ = ['SPFC', 'spfc']
39
40
41 1
class SPFC(_Phonetic):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
42
    """Standardized Phonetic Frequency Code (SPFC).
43
44
    Standardized Phonetic Frequency Code is roughly Soundex-like.
45
    This implementation is based on page 19-21 of :cite:`Moore:1977`.
46
    """
47
48 1
    _pf1 = dict(
49
        zip(
50
            (ord(_) for _ in 'SZCKQVFPUWABLORDHIEMNXGJT'),
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable _ does not seem to be defined.
Loading history...
51
            '0011112222334445556666777',
52
        )
53
    )
54 1
    _pf2 = dict(
55
        zip(
56
            (ord(_) for _ in 'SZCKQFPXABORDHIMNGJTUVWEL'),
57
            '0011122233445556677788899',
58
        )
59
    )
60 1
    _pf3 = dict(
61
        zip(
62
            (ord(_) for _ in 'BCKQVDTFLPGJXMNRSZAEHIOUWY'),
63
            '00000112223334456677777777',
64
        )
65
    )
66
67 1
    _substitutions = (
68
        ('DK', 'K'),
69
        ('DT', 'T'),
70
        ('SC', 'S'),
71
        ('KN', 'N'),
72
        ('MN', 'N'),
73
    )
74
75 1
    def encode(self, word):
76
        """Return the Standardized Phonetic Frequency Code (SPFC) of a word.
77
78
        Parameters
79
        ----------
80
        word : str
81
            The word to transform
82
83
        Returns
84
        -------
85
        str
86
            The SPFC value
87
88
        Raises
89
        ------
90
        AttributeError
91
            Word attribute must be a string with a space or period dividing the
92
            first and last names or a tuple/list consisting of the first and
93
            last names
94
95
        Examples
96
        --------
97
        >>> pe = SPFC()
98
        >>> pe.encode('Christopher Smith')
99
        '01160'
100
        >>> pe.encode('Christopher Schmidt')
101
        '01160'
102
        >>> pe.encode('Niall Smith')
103
        '01660'
104
        >>> pe.encode('Niall Schmidt')
105
        '01660'
106
107
        >>> pe.encode('L.Smith')
108
        '01960'
109
        >>> pe.encode('R.Miller')
110
        '65490'
111
112
        >>> pe.encode(('L', 'Smith'))
113
        '01960'
114
        >>> pe.encode(('R', 'Miller'))
115
        '65490'
116
117
        """
118
119 1
        def _raise_word_ex():
120
            """Raise an AttributeError.
121
122
            Raises
123
            ------
124
            AttributeError
125
                Word attribute must be a string with a space or period dividing
126
                the first and last names or a tuple/list consisting of the
127
                first and last names
128
129
            """
130 1
            raise AttributeError(
131
                'Word attribute must be a string with a space or period '
132
                + 'dividing the first and last names or a tuple/list '
133
                + 'consisting of the first and last names'
134
            )
135
136 1
        if not word:
137 1
            return ''
138
139 1
        names = []
140 1
        if isinstance(word, (str, text_type)):
141 1
            names = word.split('.', 1)
142 1
            if len(names) != 2:
143 1
                names = word.split(' ', 1)
144 1
                if len(names) != 2:
145 1
                    _raise_word_ex()
146 1
        elif hasattr(word, '__iter__'):
147 1
            if len(word) != 2:
148 1
                _raise_word_ex()
149 1
            names = word
150
        else:
151 1
            _raise_word_ex()
152
153 1
        names = [
154
            unicode_normalize(
155
                'NFKD', text_type(_.strip().replace('ß', 'SS').upper())
156
            )
157
            for _ in names
158
        ]
159 1
        code = ''
160
161 1
        def _steps_one_to_three(name):
162
            """Perform the first three steps of SPFC.
163
164
            Parameters
165
            ----------
166
            name : str
167
                Name to transform
168
169
            Returns
170
            -------
171
            str
172
                Transformed name
173
174
            """
175
            # filter out non A-Z
176 1
            name = ''.join(_ for _ in name if _ in self._uc_set)
177
178
            # 1. In the field, convert DK to K, DT to T, SC to S, KN to N,
179
            # and MN to N
180 1
            for subst in self._substitutions:
181 1
                name = name.replace(subst[0], subst[1])
182
183
            # 2. In the name field, replace multiple letters with a single
184
            # letter
185 1
            name = self._delete_consecutive_repeats(name)
186
187
            # 3. Remove vowels, W, H, and Y, but keep the first letter in the
188
            # name field.
189 1
            if name:
190 1
                name = name[0] + ''.join(
191
                    _
192
                    for _ in name[1:]
193
                    if _ not in {'A', 'E', 'H', 'I', 'O', 'U', 'W', 'Y'}
194
                )
195 1
            return name
196
197 1
        names = [_steps_one_to_three(_) for _ in names]
198
199
        # 4. The first digit of the code is obtained using PF1 and the first
200
        # letter of the name field. Remove this letter after coding.
201 1
        if names[1]:
202 1
            code += names[1][0].translate(self._pf1)
203 1
            names[1] = names[1][1:]
204
205
        # 5. Using the last letters of the name, use Table PF3 to obtain the
206
        # second digit of the code. Use as many letters as possible and remove
207
        # after coding.
208 1
        if names[1]:
209 1
            if names[1][-3:] == 'STN' or names[1][-3:] == 'PRS':
210 1
                code += '8'
211 1
                names[1] = names[1][:-3]
212 1
            elif names[1][-2:] == 'SN':
213 1
                code += '8'
214 1
                names[1] = names[1][:-2]
215 1
            elif names[1][-3:] == 'STR':
216 1
                code += '9'
217 1
                names[1] = names[1][:-3]
218 1
            elif names[1][-2:] in {'SR', 'TN', 'TD'}:
219 1
                code += '9'
220 1
                names[1] = names[1][:-2]
221 1
            elif names[1][-3:] == 'DRS':
222 1
                code += '7'
223 1
                names[1] = names[1][:-3]
224 1
            elif names[1][-2:] in {'TR', 'MN'}:
225 1
                code += '7'
226 1
                names[1] = names[1][:-2]
227
            else:
228 1
                code += names[1][-1].translate(self._pf3)
229 1
                names[1] = names[1][:-1]
230
231
        # 6. The third digit is found using Table PF2 and the first character
232
        # of the first name. Remove after coding.
233 1
        if names[0]:
234 1
            code += names[0][0].translate(self._pf2)
235 1
            names[0] = names[0][1:]
236
237
        # 7. The fourth digit is found using Table PF2 and the first character
238
        # of the name field. If no letters remain use zero. After coding remove
239
        # the letter.
240
        # 8. The fifth digit is found in the same manner as the fourth using
241
        # the remaining characters of the name field if any.
242 1
        for _ in range(2):
243 1
            if names[1]:
244 1
                code += names[1][0].translate(self._pf2)
245 1
                names[1] = names[1][1:]
246
            else:
247 1
                code += '0'
248
249 1
        return code
250
251
252 1
def spfc(word):
253
    """Return the Standardized Phonetic Frequency Code (SPFC) of a word.
254
255
    This is a wrapper for :py:meth:`SPFC.encode`.
256
257
    Parameters
258
    ----------
259
    word : str
260
        The word to transform
261
262
    Returns
263
    -------
264
    str
265
        The SPFC value
266
267
    Examples
268
    --------
269
    >>> spfc('Christopher Smith')
270
    '01160'
271
    >>> spfc('Christopher Schmidt')
272
    '01160'
273
    >>> spfc('Niall Smith')
274
    '01660'
275
    >>> spfc('Niall Schmidt')
276
    '01660'
277
278
    >>> spfc('L.Smith')
279
    '01960'
280
    >>> spfc('R.Miller')
281
    '65490'
282
283
    >>> spfc(('L', 'Smith'))
284
    '01960'
285
    >>> spfc(('R', 'Miller'))
286
    '65490'
287
288
    """
289 1
    return SPFC().encode(word)
290
291
292
if __name__ == '__main__':
293
    import doctest
294
295
    doctest.testmod()
296