Test Failed
Push — master ( 64abe2...a464fa )
by Chris
04:02 queued 11s
created

abydos.phonetic.dolby.dolby()   F

Complexity

Conditions 32

Size

Total Lines 181
Code Lines 86

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 32
eloc 86
nop 4
dl 0
loc 181
rs 0
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like abydos.phonetic.dolby.dolby() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
"""abydos.phonetic.dolby.
20
21
The phonetic.dolby module implements the Dolby Code algorithm.
22
"""
23
24
from __future__ import unicode_literals
25
26
from unicodedata import normalize as unicode_normalize
27
28
from six import text_type
29
30
from . import _delete_consecutive_repeats
31
32
__all__ = ['dolby']
33
34
35
def dolby(word, max_length=-1, keep_vowels=False, vowel_char='*'):
36
    r"""Return the Dolby Code of a name.
37
38
    This follows "A Spelling Equivalent Abbreviation Algorithm For Personal
39
    Names" from :cite:`Dolby:1970` and :cite:`Cunningham:1969`.
40
41
    :param word: the word to encode
42
    :param max_length: maximum length of the returned Dolby code -- this also
43
        activates the fixed-length code mode if it is greater than 0
44
    :param keep_vowels: if True, retains all vowel markers
45
    :param vowel_char: the vowel marker character (default to \*)
46
    :returns: the Dolby Code
47
    :rtype: str
48
49
    >>> dolby('Hansen')
50
    'H*NSN'
51
    >>> dolby('Larsen')
52
    'L*RSN'
53
    >>> dolby('Aagaard')
54
    '*GR'
55
    >>> dolby('Braaten')
56
    'BR*DN'
57
    >>> dolby('Sandvik')
58
    'S*NVK'
59
    >>> dolby('Hansen', max_length=6)
60
    'H*NS*N'
61
    >>> dolby('Larsen', max_length=6)
62
    'L*RS*N'
63
    >>> dolby('Aagaard', max_length=6)
64
    '*G*R  '
65
    >>> dolby('Braaten', max_length=6)
66
    'BR*D*N'
67
    >>> dolby('Sandvik', max_length=6)
68
    'S*NF*K'
69
70
    >>> dolby('Smith')
71
    'SM*D'
72
    >>> dolby('Waters')
73
    'W*DRS'
74
    >>> dolby('James')
75
    'J*MS'
76
    >>> dolby('Schmidt')
77
    'SM*D'
78
    >>> dolby('Ashcroft')
79
    '*SKRFD'
80
    >>> dolby('Smith', max_length=6)
81
    'SM*D  '
82
    >>> dolby('Waters', max_length=6)
83
    'W*D*RS'
84
    >>> dolby('James', max_length=6)
85
    'J*M*S '
86
    >>> dolby('Schmidt', max_length=6)
87
    'SM*D  '
88
    >>> dolby('Ashcroft', max_length=6)
89
    '*SKRFD'
90
    """
91
    _vowels = {'A', 'E', 'I', 'O', 'U', 'Y'}
92
93
    # uppercase, normalize, decompose, and filter non-A-Z out
94
    word = unicode_normalize('NFKD', text_type(word.upper()))
95
    word = word.replace('ß', 'SS')
96
    word = ''.join(c for c in word if c in
97
                   {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
98
                    'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
99
                    'Y', 'Z'})
100
101
    # Rule 1 (FL2)
102
    if word[:3] in {'MCG', 'MAG', 'MAC'}:
103
        word = 'MK'+word[3:]
104
    elif word[:2] == 'MC':
105
        word = 'MK'+word[2:]
106
107
    # Rule 2 (FL3)
108
    pos = len(word)-2
109
    while pos > -1:
110
        if word[pos:pos+2] in {'DT', 'LD', 'ND', 'NT', 'RC', 'RD', 'RT', 'SC',
111
                               'SK', 'ST'}:
112
            word = word[:pos+1]+word[pos+2:]
113
            pos += 1
114
        pos -= 1
115
116
    # Rule 3 (FL4)
117
    # Although the rule indicates "after the first letter", the test cases make
118
    # it clear that these apply to the first letter also.
119
    word = word.replace('X', 'KS')
120
    word = word.replace('CE', 'SE')
121
    word = word.replace('CI', 'SI')
122
    word = word.replace('CY', 'SI')
123
124
    # not in the rule set, but they seem to have intended it
125
    word = word.replace('TCH', 'CH')
126
127
    pos = word.find('CH', 1)
128
    while pos != -1:
129
        if word[pos-1:pos] not in _vowels:
130
            word = word[:pos]+'S'+word[pos+1:]
131
        pos = word.find('CH', pos+1)
132
133
    word = word.replace('C', 'K')
134
    word = word.replace('Z', 'S')
135
136
    word = word.replace('WR', 'R')
137
    word = word.replace('DG', 'G')
138
    word = word.replace('QU', 'K')
139
    word = word.replace('T', 'D')
140
    word = word.replace('PH', 'F')
141
142
    # Rule 4 (FL5)
143
    # Although the rule indicates "after the first letter", the test cases make
144
    # it clear that these apply to the first letter also.
145
    pos = word.find('K', 0)
146
    while pos != -1:
147
        if pos > 1 and word[pos-1:pos] not in _vowels | {'L', 'N', 'R'}:
148
            word = word[:pos-1]+word[pos:]
149
            pos -= 1
150
        pos = word.find('K', pos+1)
151
152
    # Rule FL6
153
    if max_length > 0 and word[-1:] == 'E':
154
        word = word[:-1]
155
156
    # Rule 5 (FL7)
157
    word = _delete_consecutive_repeats(word)
158
159
    # Rule 6 (FL8)
160
    if word[:2] == 'PF':
161
        word = word[1:]
162
    if word[-2:] == 'PF':
163
        word = word[:-1]
164
    elif word[-2:] == 'GH':
165
        if word[-3:-2] in _vowels:
166
            word = word[:-2]+'F'
167
        else:
168
            word = word[:-2]+'G'
169
    word = word.replace('GH', '')
170
171
    # Rule FL9
172
    if max_length > 0:
173
        word = word.replace('V', 'F')
174
175
    # Rules 7-9 (FL10-FL12)
176
    first = 1 + (1 if max_length > 0 else 0)
177
    code = ''
178
    for pos, char in enumerate(word):
179
        if char in _vowels:
180
            if first or keep_vowels:
181
                code += vowel_char
182
                first -= 1
183
        elif pos > 0 and char in {'W', 'H'}:
184
            continue
185
        else:
186
            code += char
187
188
    if max_length > 0:
189
        # Rule FL13
190
        if len(code) > max_length and code[-1:] == 'S':
191
            code = code[:-1]
192
        if keep_vowels:
193
            code = code[:max_length]
194
        else:
195
            # Rule FL14
196
            code = code[:max_length + 2]
197
            # Rule FL15
198
            while len(code) > max_length:
199
                vowels = len(code) - max_length
200
                excess = vowels - 1
201
                word = code
202
                code = ''
203
                for char in word:
204
                    if char == vowel_char:
205
                        if vowels:
206
                            code += char
207
                            vowels -= 1
208
                    else:
209
                        code += char
210
                code = code[:max_length + excess]
211
212
        # Rule FL16
213
        code += ' ' * (max_length - len(code))
214
215
    return code
216
217
218
if __name__ == '__main__':
219
    import doctest
220
    doctest.testmod()
221