Completed
Push — master ( f43547...71985b )
by Chris
12:00 queued 10s
created

abydos.phonetic._dolby.dolby()   A

Complexity

Conditions 1

Size

Total Lines 68
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 4
dl 0
loc 68
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.phonetic._dolby.
20
21
Dolby Code
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from unicodedata import normalize as unicode_normalize
32
33 1
from six import text_type
34
35 1
from ._phonetic import _Phonetic
36
37 1
__all__ = ['Dolby', 'dolby']
38
39
40 1
class Dolby(_Phonetic):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
41
    """Dolby Code.
42
43
    This follows "A Spelling Equivalent Abbreviation Algorithm For Personal
44
    Names" from :cite:`Dolby:1970` and :cite:`Cunningham:1969`.
45
    """
46
47 1
    def encode(self, word, max_length=-1, keep_vowels=False, vowel_char='*'):
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'encode' method
Loading history...
48
        r"""Return the Dolby Code of a name.
49
50
        Parameters
51
        ----------
52
        word : str
53
            The word to transform
54
        max_length : int
55
            Maximum length of the returned Dolby code -- this also activates
56
            the fixed-length code mode if it is greater than 0
57
        keep_vowels : bool
58
            If True, retains all vowel markers
59
        vowel_char : str
60
            The vowel marker character (default to \*)
61
62
        Returns
63
        -------
64
        str
65
            The Dolby Code
66
67
        Examples
68
        --------
69
        >>> pe = Dolby()
70
        >>> pe.encode('Hansen')
71
        'H*NSN'
72
        >>> pe.encode('Larsen')
73
        'L*RSN'
74
        >>> pe.encode('Aagaard')
75
        '*GR'
76
        >>> pe.encode('Braaten')
77
        'BR*DN'
78
        >>> pe.encode('Sandvik')
79
        'S*NVK'
80
        >>> pe.encode('Hansen', max_length=6)
81
        'H*NS*N'
82
        >>> pe.encode('Larsen', max_length=6)
83
        'L*RS*N'
84
        >>> pe.encode('Aagaard', max_length=6)
85
        '*G*R  '
86
        >>> pe.encode('Braaten', max_length=6)
87
        'BR*D*N'
88
        >>> pe.encode('Sandvik', max_length=6)
89
        'S*NF*K'
90
91
        >>> pe.encode('Smith')
92
        'SM*D'
93
        >>> pe.encode('Waters')
94
        'W*DRS'
95
        >>> pe.encode('James')
96
        'J*MS'
97
        >>> pe.encode('Schmidt')
98
        'SM*D'
99
        >>> pe.encode('Ashcroft')
100
        '*SKRFD'
101
        >>> pe.encode('Smith', max_length=6)
102
        'SM*D  '
103
        >>> pe.encode('Waters', max_length=6)
104
        'W*D*RS'
105
        >>> pe.encode('James', max_length=6)
106
        'J*M*S '
107
        >>> pe.encode('Schmidt', max_length=6)
108
        'SM*D  '
109
        >>> pe.encode('Ashcroft', max_length=6)
110
        '*SKRFD'
111
112
        """
113
        # uppercase, normalize, decompose, and filter non-A-Z out
114 1
        word = unicode_normalize('NFKD', text_type(word.upper()))
115 1
        word = word.replace('ß', 'SS')
116 1
        word = ''.join(c for c in word if c in self._uc_set)
117
118
        # Rule 1 (FL2)
119 1
        if word[:3] in {'MCG', 'MAG', 'MAC'}:
120 1
            word = 'MK' + word[3:]
121 1
        elif word[:2] == 'MC':
122 1
            word = 'MK' + word[2:]
123
124
        # Rule 2 (FL3)
125 1
        pos = len(word) - 2
126 1
        while pos > -1:
127 1
            if word[pos : pos + 2] in {
128
                'DT',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
129
                'LD',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
130
                'ND',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
131
                'NT',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
132
                'RC',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
133
                'RD',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
134
                'RT',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
135
                'SC',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
136
                'SK',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
137
                'ST',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
138
            }:
139 1
                word = word[: pos + 1] + word[pos + 2 :]
140 1
                pos += 1
141 1
            pos -= 1
142
143
        # Rule 3 (FL4)
144
        # Although the rule indicates "after the first letter", the test cases
145
        # make it clear that these apply to the first letter also.
146 1
        word = word.replace('X', 'KS')
147 1
        word = word.replace('CE', 'SE')
148 1
        word = word.replace('CI', 'SI')
149 1
        word = word.replace('CY', 'SI')
150
151
        # not in the rule set, but they seem to have intended it
152 1
        word = word.replace('TCH', 'CH')
153
154 1
        pos = word.find('CH', 1)
155 1
        while pos != -1:
156 1
            if word[pos - 1 : pos] not in self._uc_vy_set:
157 1
                word = word[:pos] + 'S' + word[pos + 1 :]
158 1
            pos = word.find('CH', pos + 1)
159
160 1
        word = word.replace('C', 'K')
161 1
        word = word.replace('Z', 'S')
162
163 1
        word = word.replace('WR', 'R')
164 1
        word = word.replace('DG', 'G')
165 1
        word = word.replace('QU', 'K')
166 1
        word = word.replace('T', 'D')
167 1
        word = word.replace('PH', 'F')
168
169
        # Rule 4 (FL5)
170
        # Although the rule indicates "after the first letter", the test cases
171
        # make it clear that these apply to the first letter also.
172 1
        pos = word.find('K', 0)
173 1
        while pos != -1:
174 1
            if pos > 1 and word[pos - 1 : pos] not in self._uc_vy_set | {
175
                'L',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
176
                'N',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
177
                'R',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
178
            }:
179 1
                word = word[: pos - 1] + word[pos:]
180 1
                pos -= 1
181 1
            pos = word.find('K', pos + 1)
182
183
        # Rule FL6
184 1
        if max_length > 0 and word[-1:] == 'E':
185 1
            word = word[:-1]
186
187
        # Rule 5 (FL7)
188 1
        word = self._delete_consecutive_repeats(word)
189
190
        # Rule 6 (FL8)
191 1
        if word[:2] == 'PF':
192 1
            word = word[1:]
193 1
        if word[-2:] == 'PF':
194 1
            word = word[:-1]
195 1
        elif word[-2:] == 'GH':
196 1
            if word[-3:-2] in self._uc_vy_set:
197 1
                word = word[:-2] + 'F'
198
            else:
199 1
                word = word[:-2] + 'G'
200 1
        word = word.replace('GH', '')
201
202
        # Rule FL9
203 1
        if max_length > 0:
204 1
            word = word.replace('V', 'F')
205
206
        # Rules 7-9 (FL10-FL12)
207 1
        first = 1 + (1 if max_length > 0 else 0)
208 1
        code = ''
209 1
        for pos, char in enumerate(word):
210 1
            if char in self._uc_vy_set:
211 1
                if first or keep_vowels:
212 1
                    code += vowel_char
213 1
                    first -= 1
214 1
            elif pos > 0 and char in {'W', 'H'}:
215 1
                continue
216
            else:
217 1
                code += char
218
219 1
        if max_length > 0:
0 ignored issues
show
unused-code introduced by
Too many nested blocks (6/5)
Loading history...
220
            # Rule FL13
221 1
            if len(code) > max_length and code[-1:] == 'S':
222 1
                code = code[:-1]
223 1
            if keep_vowels:
224 1
                code = code[:max_length]
225
            else:
226
                # Rule FL14
227 1
                code = code[: max_length + 2]
228
                # Rule FL15
229 1
                while len(code) > max_length:
230 1
                    vowels = len(code) - max_length
231 1
                    excess = vowels - 1
232 1
                    word = code
233 1
                    code = ''
234 1
                    for char in word:
235 1
                        if char == vowel_char:
236 1
                            if vowels:
237 1
                                code += char
238 1
                                vowels -= 1
239
                        else:
240 1
                            code += char
241 1
                    code = code[: max_length + excess]
242
243
            # Rule FL16
244 1
            code += ' ' * (max_length - len(code))
245
246 1
        return code
247
248
249 1
def dolby(word, max_length=-1, keep_vowels=False, vowel_char='*'):
250
    r"""Return the Dolby Code of a name.
251
252
    This is a wrapper for :py:meth:`Dolby.encode`.
253
254
    Parameters
255
    ----------
256
    word : str
257
        The word to transform
258
    max_length : int
259
        Maximum length of the returned Dolby code -- this also activates the
260
        fixed-length code mode if it is greater than 0
261
    keep_vowels : bool
262
        If True, retains all vowel markers
263
    vowel_char : str
264
        The vowel marker character (default to \*)
265
266
    Returns
267
    -------
268
    str
269
        The Dolby Code
270
271
    Examples
272
    --------
273
    >>> dolby('Hansen')
274
    'H*NSN'
275
    >>> dolby('Larsen')
276
    'L*RSN'
277
    >>> dolby('Aagaard')
278
    '*GR'
279
    >>> dolby('Braaten')
280
    'BR*DN'
281
    >>> dolby('Sandvik')
282
    'S*NVK'
283
    >>> dolby('Hansen', max_length=6)
284
    'H*NS*N'
285
    >>> dolby('Larsen', max_length=6)
286
    'L*RS*N'
287
    >>> dolby('Aagaard', max_length=6)
288
    '*G*R  '
289
    >>> dolby('Braaten', max_length=6)
290
    'BR*D*N'
291
    >>> dolby('Sandvik', max_length=6)
292
    'S*NF*K'
293
294
    >>> dolby('Smith')
295
    'SM*D'
296
    >>> dolby('Waters')
297
    'W*DRS'
298
    >>> dolby('James')
299
    'J*MS'
300
    >>> dolby('Schmidt')
301
    'SM*D'
302
    >>> dolby('Ashcroft')
303
    '*SKRFD'
304
    >>> dolby('Smith', max_length=6)
305
    'SM*D  '
306
    >>> dolby('Waters', max_length=6)
307
    'W*D*RS'
308
    >>> dolby('James', max_length=6)
309
    'J*M*S '
310
    >>> dolby('Schmidt', max_length=6)
311
    'SM*D  '
312
    >>> dolby('Ashcroft', max_length=6)
313
    '*SKRFD'
314
315
    """
316 1
    return Dolby().encode(word, max_length, keep_vowels, vowel_char)
317
318
319
if __name__ == '__main__':
320
    import doctest
321
322
    doctest.testmod()
323