Completed
Pull Request — master (#141)
by Chris
11:42
created

abydos.phonetic._Dolby   A

Complexity

Total Complexity 33

Size/Duplication

Total Lines 309
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
eloc 111
dl 0
loc 309
ccs 90
cts 90
cp 1
rs 9.76
c 0
b 0
f 0
wmc 33

1 Method

Rating   Name   Duplication   Size   Complexity  
F Dolby.encode() 0 193 32

1 Function

Rating   Name   Duplication   Size   Complexity  
A dolby() 0 61 1
1
# -*- coding: utf-8 -*-
0 ignored issues
show
Coding Style Naming introduced by
The name _Dolby does not conform to the module naming conventions ((([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
2
3
# Copyright 2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.phonetic._Dolby.
20
21
Dolby Code
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from unicodedata import normalize as unicode_normalize
32
33 1
from six import text_type
34
35 1
from ._Phonetic import Phonetic
36
37 1
__all__ = ['Dolby', 'dolby']
38
39
40 1
class Dolby(Phonetic):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
41
    """Dolby Code.
42
43
    This follows "A Spelling Equivalent Abbreviation Algorithm For Personal
44
    Names" from :cite:`Dolby:1970` and :cite:`Cunningham:1969`.
45
    """
46
47 1
    def encode(self, word, max_length=-1, keep_vowels=False, vowel_char='*'):
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'encode' method
Loading history...
48
        r"""Return the Dolby Code of a name.
49
50
        Args:
51
            word (str): The word to transform
52
            max_length (int): Maximum length of the returned Dolby code -- this
53
                also activates the fixed-length code mode if it is greater than
54
                0
55
            keep_vowels (bool): If True, retains all vowel markers
56
            vowel_char (str): The vowel marker character (default to \*)
57
58
        Returns:
59
            str: The Dolby Code
60
61
        Examples:
62
            >>> pe = Dolby()
63
            >>> pe.encode('Hansen')
64
            'H*NSN'
65
            >>> pe.encode('Larsen')
66
            'L*RSN'
67
            >>> pe.encode('Aagaard')
68
            '*GR'
69
            >>> pe.encode('Braaten')
70
            'BR*DN'
71
            >>> pe.encode('Sandvik')
72
            'S*NVK'
73
            >>> pe.encode('Hansen', max_length=6)
74
            'H*NS*N'
75
            >>> pe.encode('Larsen', max_length=6)
76
            'L*RS*N'
77
            >>> pe.encode('Aagaard', max_length=6)
78
            '*G*R  '
79
            >>> pe.encode('Braaten', max_length=6)
80
            'BR*D*N'
81
            >>> pe.encode('Sandvik', max_length=6)
82
            'S*NF*K'
83
84
            >>> pe.encode('Smith')
85
            'SM*D'
86
            >>> pe.encode('Waters')
87
            'W*DRS'
88
            >>> pe.encode('James')
89
            'J*MS'
90
            >>> pe.encode('Schmidt')
91
            'SM*D'
92
            >>> pe.encode('Ashcroft')
93
            '*SKRFD'
94
            >>> pe.encode('Smith', max_length=6)
95
            'SM*D  '
96
            >>> pe.encode('Waters', max_length=6)
97
            'W*D*RS'
98
            >>> pe.encode('James', max_length=6)
99
            'J*M*S '
100
            >>> pe.encode('Schmidt', max_length=6)
101
            'SM*D  '
102
            >>> pe.encode('Ashcroft', max_length=6)
103
            '*SKRFD'
104
105
        """
106
        # uppercase, normalize, decompose, and filter non-A-Z out
107 1
        word = unicode_normalize('NFKD', text_type(word.upper()))
108 1
        word = word.replace('ß', 'SS')
109 1
        word = ''.join(c for c in word if c in self._uc_set)
110
111
        # Rule 1 (FL2)
112 1
        if word[:3] in {'MCG', 'MAG', 'MAC'}:
113 1
            word = 'MK' + word[3:]
114 1
        elif word[:2] == 'MC':
115 1
            word = 'MK' + word[2:]
116
117
        # Rule 2 (FL3)
118 1
        pos = len(word) - 2
119 1
        while pos > -1:
120 1
            if word[pos : pos + 2] in {
121
                'DT',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
122
                'LD',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
123
                'ND',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
124
                'NT',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
125
                'RC',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
126
                'RD',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
127
                'RT',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
128
                'SC',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
129
                'SK',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
130
                'ST',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
131
            }:
132 1
                word = word[: pos + 1] + word[pos + 2 :]
133 1
                pos += 1
134 1
            pos -= 1
135
136
        # Rule 3 (FL4)
137
        # Although the rule indicates "after the first letter", the test cases
138
        # make it clear that these apply to the first letter also.
139 1
        word = word.replace('X', 'KS')
140 1
        word = word.replace('CE', 'SE')
141 1
        word = word.replace('CI', 'SI')
142 1
        word = word.replace('CY', 'SI')
143
144
        # not in the rule set, but they seem to have intended it
145 1
        word = word.replace('TCH', 'CH')
146
147 1
        pos = word.find('CH', 1)
148 1
        while pos != -1:
149 1
            if word[pos - 1 : pos] not in self._uc_vy_set:
150 1
                word = word[:pos] + 'S' + word[pos + 1 :]
151 1
            pos = word.find('CH', pos + 1)
152
153 1
        word = word.replace('C', 'K')
154 1
        word = word.replace('Z', 'S')
155
156 1
        word = word.replace('WR', 'R')
157 1
        word = word.replace('DG', 'G')
158 1
        word = word.replace('QU', 'K')
159 1
        word = word.replace('T', 'D')
160 1
        word = word.replace('PH', 'F')
161
162
        # Rule 4 (FL5)
163
        # Although the rule indicates "after the first letter", the test cases
164
        # make it clear that these apply to the first letter also.
165 1
        pos = word.find('K', 0)
166 1
        while pos != -1:
167 1
            if pos > 1 and word[pos - 1 : pos] not in self._uc_vy_set | {
168
                'L',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
169
                'N',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
170
                'R',
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
171
            }:
172 1
                word = word[: pos - 1] + word[pos:]
173 1
                pos -= 1
174 1
            pos = word.find('K', pos + 1)
175
176
        # Rule FL6
177 1
        if max_length > 0 and word[-1:] == 'E':
178 1
            word = word[:-1]
179
180
        # Rule 5 (FL7)
181 1
        word = self._delete_consecutive_repeats(word)
182
183
        # Rule 6 (FL8)
184 1
        if word[:2] == 'PF':
185 1
            word = word[1:]
186 1
        if word[-2:] == 'PF':
187 1
            word = word[:-1]
188 1
        elif word[-2:] == 'GH':
189 1
            if word[-3:-2] in self._uc_vy_set:
190 1
                word = word[:-2] + 'F'
191
            else:
192 1
                word = word[:-2] + 'G'
193 1
        word = word.replace('GH', '')
194
195
        # Rule FL9
196 1
        if max_length > 0:
197 1
            word = word.replace('V', 'F')
198
199
        # Rules 7-9 (FL10-FL12)
200 1
        first = 1 + (1 if max_length > 0 else 0)
201 1
        code = ''
202 1
        for pos, char in enumerate(word):
203 1
            if char in self._uc_vy_set:
204 1
                if first or keep_vowels:
205 1
                    code += vowel_char
206 1
                    first -= 1
207 1
            elif pos > 0 and char in {'W', 'H'}:
208 1
                continue
209
            else:
210 1
                code += char
211
212 1
        if max_length > 0:
0 ignored issues
show
unused-code introduced by
Too many nested blocks (6/5)
Loading history...
213
            # Rule FL13
214 1
            if len(code) > max_length and code[-1:] == 'S':
215 1
                code = code[:-1]
216 1
            if keep_vowels:
217 1
                code = code[:max_length]
218
            else:
219
                # Rule FL14
220 1
                code = code[: max_length + 2]
221
                # Rule FL15
222 1
                while len(code) > max_length:
223 1
                    vowels = len(code) - max_length
224 1
                    excess = vowels - 1
225 1
                    word = code
226 1
                    code = ''
227 1
                    for char in word:
228 1
                        if char == vowel_char:
229 1
                            if vowels:
230 1
                                code += char
231 1
                                vowels -= 1
232
                        else:
233 1
                            code += char
234 1
                    code = code[: max_length + excess]
235
236
            # Rule FL16
237 1
            code += ' ' * (max_length - len(code))
238
239 1
        return code
240
241
242 1
def dolby(word, max_length=-1, keep_vowels=False, vowel_char='*'):
243
    r"""Return the Dolby Code of a name.
244
245
    This is a wrapper for :py:meth:`Dolby.encode`.
246
247
    Args:
248
        word (str): The word to transform
249
        max_length (int): Maximum length of the returned Dolby code -- this
250
            also activates the fixed-length code mode if it is greater than
251
            0
252
        keep_vowels (bool): If True, retains all vowel markers
253
        vowel_char (str): The vowel marker character (default to \*)
254
255
    Returns:
256
        str: The Dolby Code
257
258
    Examples:
259
        >>> dolby('Hansen')
260
        'H*NSN'
261
        >>> dolby('Larsen')
262
        'L*RSN'
263
        >>> dolby('Aagaard')
264
        '*GR'
265
        >>> dolby('Braaten')
266
        'BR*DN'
267
        >>> dolby('Sandvik')
268
        'S*NVK'
269
        >>> dolby('Hansen', max_length=6)
270
        'H*NS*N'
271
        >>> dolby('Larsen', max_length=6)
272
        'L*RS*N'
273
        >>> dolby('Aagaard', max_length=6)
274
        '*G*R  '
275
        >>> dolby('Braaten', max_length=6)
276
        'BR*D*N'
277
        >>> dolby('Sandvik', max_length=6)
278
        'S*NF*K'
279
280
        >>> dolby('Smith')
281
        'SM*D'
282
        >>> dolby('Waters')
283
        'W*DRS'
284
        >>> dolby('James')
285
        'J*MS'
286
        >>> dolby('Schmidt')
287
        'SM*D'
288
        >>> dolby('Ashcroft')
289
        '*SKRFD'
290
        >>> dolby('Smith', max_length=6)
291
        'SM*D  '
292
        >>> dolby('Waters', max_length=6)
293
        'W*D*RS'
294
        >>> dolby('James', max_length=6)
295
        'J*M*S '
296
        >>> dolby('Schmidt', max_length=6)
297
        'SM*D  '
298
        >>> dolby('Ashcroft', max_length=6)
299
        '*SKRFD'
300
301
    """
302 1
    return Dolby().encode(word, max_length, keep_vowels, vowel_char)
303
304
305
if __name__ == '__main__':
306
    import doctest
307
308
    doctest.testmod()
309