Test Failed
Push — master ( 64abe2...a464fa )
by Chris
04:02 queued 11s
created

abydos.phonetic.spfc.spfc()   F

Complexity

Conditions 21

Size

Total Lines 147
Code Lines 81

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 21
eloc 81
nop 1
dl 0
loc 147
rs 0
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like abydos.phonetic.spfc.spfc() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
"""abydos.phonetic.spfc.
20
21
The phonetic.spfc module implements the Standardized Phonetic Frequency Code
22
(SPFC) algorithm.
23
"""
24
25
from __future__ import unicode_literals
26
27
from unicodedata import normalize as unicode_normalize
28
29
from six import text_type
30
from six.moves import range
31
32
from . import _delete_consecutive_repeats
33
34
__all__ = ['spfc']
35
36
37
def spfc(word):
38
    """Return the Standardized Phonetic Frequency Code (SPFC) of a word.
39
40
    Standardized Phonetic Frequency Code is roughly Soundex-like.
41
    This implementation is based on page 19-21 of :cite:`Moore:1977`.
42
43
    :param str word: the word to transform
44
    :returns: the SPFC value
45
    :rtype: str
46
47
    >>> spfc('Christopher Smith')
48
    '01160'
49
    >>> spfc('Christopher Schmidt')
50
    '01160'
51
    >>> spfc('Niall Smith')
52
    '01660'
53
    >>> spfc('Niall Schmidt')
54
    '01660'
55
56
    >>> spfc('L.Smith')
57
    '01960'
58
    >>> spfc('R.Miller')
59
    '65490'
60
61
    >>> spfc(('L', 'Smith'))
62
    '01960'
63
    >>> spfc(('R', 'Miller'))
64
    '65490'
65
    """
66
    _pf1 = dict(zip((ord(_) for _ in 'SZCKQVFPUWABLORDHIEMNXGJT'),
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable _ does not seem to be defined.
Loading history...
67
                    '0011112222334445556666777'))
68
    _pf2 = dict(zip((ord(_) for _ in
69
                     'SZCKQFPXABORDHIMNGJTUVWEL'),
70
                    '0011122233445556677788899'))
71
    _pf3 = dict(zip((ord(_) for _ in
72
                     'BCKQVDTFLPGJXMNRSZAEHIOUWY'),
73
                    '00000112223334456677777777'))
74
75
    _substitutions = (('DK', 'K'), ('DT', 'T'), ('SC', 'S'), ('KN', 'N'),
76
                      ('MN', 'N'))
77
78
    def _raise_word_ex():
79
        """Raise an AttributeError."""
80
        raise AttributeError('word attribute must be a string with a space ' +
81
                             'or period dividing the first and last names ' +
82
                             'or a tuple/list consisting of the first and ' +
83
                             'last names')
84
85
    if not word:
86
        return ''
87
88
    names = []
89
    if isinstance(word, (str, text_type)):
90
        names = word.split('.', 1)
91
        if len(names) != 2:
92
            names = word.split(' ', 1)
93
            if len(names) != 2:
94
                _raise_word_ex()
95
    elif hasattr(word, '__iter__'):
96
        if len(word) != 2:
97
            _raise_word_ex()
98
        names = word
99
    else:
100
        _raise_word_ex()
101
102
    names = [unicode_normalize('NFKD', text_type(_.strip()
103
                                                 .replace('ß', 'SS')
104
                                                 .upper()))
105
             for _ in names]
106
    code = ''
107
108
    def steps_one_to_three(name):
109
        """Perform the first three steps of SPFC."""
110
        # filter out non A-Z
111
        name = ''.join(_ for _ in name if _ in
112
                       {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
113
                        'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
114
                        'W', 'X', 'Y', 'Z'})
115
116
        # 1. In the field, convert DK to K, DT to T, SC to S, KN to N,
117
        # and MN to N
118
        for subst in _substitutions:
119
            name = name.replace(subst[0], subst[1])
120
121
        # 2. In the name field, replace multiple letters with a single letter
122
        name = _delete_consecutive_repeats(name)
123
124
        # 3. Remove vowels, W, H, and Y, but keep the first letter in the name
125
        # field.
126
        if name:
127
            name = name[0] + ''.join(_ for _ in name[1:] if _ not in
128
                                     {'A', 'E', 'H', 'I', 'O', 'U', 'W', 'Y'})
129
        return name
130
131
    names = [steps_one_to_three(_) for _ in names]
132
133
    # 4. The first digit of the code is obtained using PF1 and the first letter
134
    # of the name field. Remove this letter after coding.
135
    if names[1]:
136
        code += names[1][0].translate(_pf1)
137
        names[1] = names[1][1:]
138
139
    # 5. Using the last letters of the name, use Table PF3 to obtain the
140
    # second digit of the code. Use as many letters as possible and remove
141
    # after coding.
142
    if names[1]:
143
        if names[1][-3:] == 'STN' or names[1][-3:] == 'PRS':
144
            code += '8'
145
            names[1] = names[1][:-3]
146
        elif names[1][-2:] == 'SN':
147
            code += '8'
148
            names[1] = names[1][:-2]
149
        elif names[1][-3:] == 'STR':
150
            code += '9'
151
            names[1] = names[1][:-3]
152
        elif names[1][-2:] in {'SR', 'TN', 'TD'}:
153
            code += '9'
154
            names[1] = names[1][:-2]
155
        elif names[1][-3:] == 'DRS':
156
            code += '7'
157
            names[1] = names[1][:-3]
158
        elif names[1][-2:] in {'TR', 'MN'}:
159
            code += '7'
160
            names[1] = names[1][:-2]
161
        else:
162
            code += names[1][-1].translate(_pf3)
163
            names[1] = names[1][:-1]
164
165
    # 6. The third digit is found using Table PF2 and the first character of
166
    # the first name. Remove after coding.
167
    if names[0]:
168
        code += names[0][0].translate(_pf2)
169
        names[0] = names[0][1:]
170
171
    # 7. The fourth digit is found using Table PF2 and the first character of
172
    # the name field. If no letters remain use zero. After coding remove the
173
    # letter.
174
    # 8. The fifth digit is found in the same manner as the fourth using the
175
    # remaining characters of the name field if any.
176
    for _ in range(2):
177
        if names[1]:
178
            code += names[1][0].translate(_pf2)
179
            names[1] = names[1][1:]
180
        else:
181
            code += '0'
182
183
    return code
184
185
186
if __name__ == '__main__':
187
    import doctest
188
    doctest.testmod()
189