abydos.phonetic._Dolby - Code Metrics - Inspection of "0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#141)

by Chris

created 2018-11-10 01:31 UTC

abydos.phonetic._Dolby A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	309
Duplicated Lines	0 %

Test Coverage

Coverage

100%

Importance

Changes

Metric	Value
eloc	111
dl	0
loc	309
ccs	90
cts	90
cp	1
rs	9.76
c	0
b	0
f	0
wmc	33

1 Method

Rating	Name	Duplication	Size	Complexity
F	Dolby.encode()	0	193	32

1 Function

Rating	Name	Duplication	Size	Complexity
A	dolby()	0	61	1

# -*- coding: utf-8 -*-


# Copyright 2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._Dolby.

Dolby Code
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from unicodedata import normalize as unicode_normalize

from six import text_type

from ._Phonetic import Phonetic

__all__ = ['Dolby', 'dolby']


class Dolby(Phonetic):

    """Dolby Code.

    This follows "A Spelling Equivalent Abbreviation Algorithm For Personal
    Names" from :cite:`Dolby:1970` and :cite:`Cunningham:1969`.
    """

    def encode(self, word, max_length=-1, keep_vowels=False, vowel_char='*'):

        r"""Return the Dolby Code of a name.

        Args:
            word (str): The word to transform
            max_length (int): Maximum length of the returned Dolby code -- this
                also activates the fixed-length code mode if it is greater than
                0
            keep_vowels (bool): If True, retains all vowel markers
            vowel_char (str): The vowel marker character (default to \*)

        Returns:
            str: The Dolby Code

        Examples:
            >>> pe = Dolby()
            >>> pe.encode('Hansen')
            'H*NSN'
            >>> pe.encode('Larsen')
            'L*RSN'
            >>> pe.encode('Aagaard')
            '*GR'
            >>> pe.encode('Braaten')
            'BR*DN'
            >>> pe.encode('Sandvik')
            'S*NVK'
            >>> pe.encode('Hansen', max_length=6)
            'H*NS*N'
            >>> pe.encode('Larsen', max_length=6)
            'L*RS*N'
            >>> pe.encode('Aagaard', max_length=6)
            '*G*R  '
            >>> pe.encode('Braaten', max_length=6)
            'BR*D*N'
            >>> pe.encode('Sandvik', max_length=6)
            'S*NF*K'

            >>> pe.encode('Smith')
            'SM*D'
            >>> pe.encode('Waters')
            'W*DRS'
            >>> pe.encode('James')
            'J*MS'
            >>> pe.encode('Schmidt')
            'SM*D'
            >>> pe.encode('Ashcroft')
            '*SKRFD'
            >>> pe.encode('Smith', max_length=6)
            'SM*D  '
            >>> pe.encode('Waters', max_length=6)
            'W*D*RS'
            >>> pe.encode('James', max_length=6)
            'J*M*S '
            >>> pe.encode('Schmidt', max_length=6)
            'SM*D  '
            >>> pe.encode('Ashcroft', max_length=6)
            '*SKRFD'

        """
        # uppercase, normalize, decompose, and filter non-A-Z out
        word = unicode_normalize('NFKD', text_type(word.upper()))
        word = word.replace('ß', 'SS')
        word = ''.join(c for c in word if c in self._uc_set)

        # Rule 1 (FL2)
        if word[:3] in {'MCG', 'MAG', 'MAC'}:
            word = 'MK' + word[3:]
        elif word[:2] == 'MC':
            word = 'MK' + word[2:]

        # Rule 2 (FL3)
        pos = len(word) - 2
        while pos > -1:
            if word[pos : pos + 2] in {
                'DT',

                'LD',

                'ND',

                'NT',

                'RC',

                'RD',

                'RT',

                'SC',

                'SK',

                'ST',

            }:
                word = word[: pos + 1] + word[pos + 2 :]
                pos += 1
            pos -= 1

        # Rule 3 (FL4)
        # Although the rule indicates "after the first letter", the test cases
        # make it clear that these apply to the first letter also.
        word = word.replace('X', 'KS')
        word = word.replace('CE', 'SE')
        word = word.replace('CI', 'SI')
        word = word.replace('CY', 'SI')

        # not in the rule set, but they seem to have intended it
        word = word.replace('TCH', 'CH')

        pos = word.find('CH', 1)
        while pos != -1:
            if word[pos - 1 : pos] not in self._uc_vy_set:
                word = word[:pos] + 'S' + word[pos + 1 :]
            pos = word.find('CH', pos + 1)

        word = word.replace('C', 'K')
        word = word.replace('Z', 'S')

        word = word.replace('WR', 'R')
        word = word.replace('DG', 'G')
        word = word.replace('QU', 'K')
        word = word.replace('T', 'D')
        word = word.replace('PH', 'F')

        # Rule 4 (FL5)
        # Although the rule indicates "after the first letter", the test cases
        # make it clear that these apply to the first letter also.
        pos = word.find('K', 0)
        while pos != -1:
            if pos > 1 and word[pos - 1 : pos] not in self._uc_vy_set | {
                'L',

                'N',

                'R',

            }:
                word = word[: pos - 1] + word[pos:]
                pos -= 1
            pos = word.find('K', pos + 1)

        # Rule FL6
        if max_length > 0 and word[-1:] == 'E':
            word = word[:-1]

        # Rule 5 (FL7)
        word = self._delete_consecutive_repeats(word)

        # Rule 6 (FL8)
        if word[:2] == 'PF':
            word = word[1:]
        if word[-2:] == 'PF':
            word = word[:-1]
        elif word[-2:] == 'GH':
            if word[-3:-2] in self._uc_vy_set:
                word = word[:-2] + 'F'
            else:
                word = word[:-2] + 'G'
        word = word.replace('GH', '')

        # Rule FL9
        if max_length > 0:
            word = word.replace('V', 'F')

        # Rules 7-9 (FL10-FL12)
        first = 1 + (1 if max_length > 0 else 0)
        code = ''
        for pos, char in enumerate(word):
            if char in self._uc_vy_set:
                if first or keep_vowels:
                    code += vowel_char
                    first -= 1
            elif pos > 0 and char in {'W', 'H'}:
                continue
            else:
                code += char

        if max_length > 0:

            # Rule FL13
            if len(code) > max_length and code[-1:] == 'S':
                code = code[:-1]
            if keep_vowels:
                code = code[:max_length]
            else:
                # Rule FL14
                code = code[: max_length + 2]
                # Rule FL15
                while len(code) > max_length:
                    vowels = len(code) - max_length
                    excess = vowels - 1
                    word = code
                    code = ''
                    for char in word:
                        if char == vowel_char:
                            if vowels:
                                code += char
                                vowels -= 1
                        else:
                            code += char
                    code = code[: max_length + excess]

            # Rule FL16
            code += ' ' * (max_length - len(code))

        return code


def dolby(word, max_length=-1, keep_vowels=False, vowel_char='*'):
    r"""Return the Dolby Code of a name.

    This is a wrapper for :py:meth:`Dolby.encode`.

    Args:
        word (str): The word to transform
        max_length (int): Maximum length of the returned Dolby code -- this
            also activates the fixed-length code mode if it is greater than
            0
        keep_vowels (bool): If True, retains all vowel markers
        vowel_char (str): The vowel marker character (default to \*)

    Returns:
        str: The Dolby Code

    Examples:
        >>> dolby('Hansen')
        'H*NSN'
        >>> dolby('Larsen')
        'L*RSN'
        >>> dolby('Aagaard')
        '*GR'
        >>> dolby('Braaten')
        'BR*DN'
        >>> dolby('Sandvik')
        'S*NVK'
        >>> dolby('Hansen', max_length=6)
        'H*NS*N'
        >>> dolby('Larsen', max_length=6)
        'L*RS*N'
        >>> dolby('Aagaard', max_length=6)
        '*G*R  '
        >>> dolby('Braaten', max_length=6)
        'BR*D*N'
        >>> dolby('Sandvik', max_length=6)
        'S*NF*K'

        >>> dolby('Smith')
        'SM*D'
        >>> dolby('Waters')
        'W*DRS'
        >>> dolby('James')
        'J*MS'
        >>> dolby('Schmidt')
        'SM*D'
        >>> dolby('Ashcroft')
        '*SKRFD'
        >>> dolby('Smith', max_length=6)
        'SM*D  '
        >>> dolby('Waters', max_length=6)
        'W*D*RS'
        >>> dolby('James', max_length=6)
        'J*M*S '
        >>> dolby('Schmidt', max_length=6)
        'SM*D  '
        >>> dolby('Ashcroft', max_length=6)
        '*SKRFD'

    """
    return Dolby().encode(word, max_length, keep_vowels, vowel_char)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
		0 ignored issues – show Coding Style Naming introduced 2018-11-10 01:42 UTC by Report Bug Copy Issue Report The name `_Dolby` does not conform to the module naming conventions (`(([a-z_][a-z0-9_]*)\|([A-Z][a-zA-Z0-9]+))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
2
3		# Copyright 2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._Dolby.
20
21		Dolby Code
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from unicodedata import normalize as unicode_normalize
32
33	1	from six import text_type
34
35	1	from ._Phonetic import Phonetic
36
37	1	__all__ = ['Dolby', 'dolby']
38
39
40	1	class Dolby(Phonetic):
		0 ignored issues – show Unused Code introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
41		"""Dolby Code.
42
43		This follows "A Spelling Equivalent Abbreviation Algorithm For Personal
44		Names" from :cite:`Dolby:1970` and :cite:`Cunningham:1969`.
45		"""
46
47	1	def encode(self, word, max_length=-1, keep_vowels=False, vowel_char='*'):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'encode' method Loading history...
48		r"""Return the Dolby Code of a name.
49
50		Args:
51		word (str): The word to transform
52		max_length (int): Maximum length of the returned Dolby code -- this
53		also activates the fixed-length code mode if it is greater than
54		0
55		keep_vowels (bool): If True, retains all vowel markers
56		vowel_char (str): The vowel marker character (default to \*)
57
58		Returns:
59		str: The Dolby Code
60
61		Examples:
62		>>> pe = Dolby()
63		>>> pe.encode('Hansen')
64		'H*NSN'
65		>>> pe.encode('Larsen')
66		'L*RSN'
67		>>> pe.encode('Aagaard')
68		'*GR'
69		>>> pe.encode('Braaten')
70		'BR*DN'
71		>>> pe.encode('Sandvik')
72		'S*NVK'
73		>>> pe.encode('Hansen', max_length=6)
74		'HNSN'
75		>>> pe.encode('Larsen', max_length=6)
76		'LRSN'
77		>>> pe.encode('Aagaard', max_length=6)
78		'GR '
79		>>> pe.encode('Braaten', max_length=6)
80		'BRDN'
81		>>> pe.encode('Sandvik', max_length=6)
82		'SNFK'
83
84		>>> pe.encode('Smith')
85		'SM*D'
86		>>> pe.encode('Waters')
87		'W*DRS'
88		>>> pe.encode('James')
89		'J*MS'
90		>>> pe.encode('Schmidt')
91		'SM*D'
92		>>> pe.encode('Ashcroft')
93		'*SKRFD'
94		>>> pe.encode('Smith', max_length=6)
95		'SM*D '
96		>>> pe.encode('Waters', max_length=6)
97		'WDRS'
98		>>> pe.encode('James', max_length=6)
99		'JMS '
100		>>> pe.encode('Schmidt', max_length=6)
101		'SM*D '
102		>>> pe.encode('Ashcroft', max_length=6)
103		'*SKRFD'
104
105		"""
106		# uppercase, normalize, decompose, and filter non-A-Z out
107	1	word = unicode_normalize('NFKD', text_type(word.upper()))
108	1	word = word.replace('ß', 'SS')
109	1	word = ''.join(c for c in word if c in self._uc_set)
110
111		# Rule 1 (FL2)
112	1	if word[:3] in {'MCG', 'MAG', 'MAC'}:
113	1	word = 'MK' + word[3:]
114	1	elif word[:2] == 'MC':
115	1	word = 'MK' + word[2:]
116
117		# Rule 2 (FL3)
118	1	pos = len(word) - 2
119	1	while pos > -1:
120	1	if word[pos : pos + 2] in {
121		'DT',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
122		'LD',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
123		'ND',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
124		'NT',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
125		'RC',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
126		'RD',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
127		'RT',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
128		'SC',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
129		'SK',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
130		'ST',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
131		}:
132	1	word = word[: pos + 1] + word[pos + 2 :]
133	1	pos += 1
134	1	pos -= 1
135
136		# Rule 3 (FL4)
137		# Although the rule indicates "after the first letter", the test cases
138		# make it clear that these apply to the first letter also.
139	1	word = word.replace('X', 'KS')
140	1	word = word.replace('CE', 'SE')
141	1	word = word.replace('CI', 'SI')
142	1	word = word.replace('CY', 'SI')
143
144		# not in the rule set, but they seem to have intended it
145	1	word = word.replace('TCH', 'CH')
146
147	1	pos = word.find('CH', 1)
148	1	while pos != -1:
149	1	if word[pos - 1 : pos] not in self._uc_vy_set:
150	1	word = word[:pos] + 'S' + word[pos + 1 :]
151	1	pos = word.find('CH', pos + 1)
152
153	1	word = word.replace('C', 'K')
154	1	word = word.replace('Z', 'S')
155
156	1	word = word.replace('WR', 'R')
157	1	word = word.replace('DG', 'G')
158	1	word = word.replace('QU', 'K')
159	1	word = word.replace('T', 'D')
160	1	word = word.replace('PH', 'F')
161
162		# Rule 4 (FL5)
163		# Although the rule indicates "after the first letter", the test cases
164		# make it clear that these apply to the first letter also.
165	1	pos = word.find('K', 0)
166	1	while pos != -1:
167	1	if pos > 1 and word[pos - 1 : pos] not in self._uc_vy_set \| {
168		'L',
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
169		'N',
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
170		'R',
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
171		}:
172	1	word = word[: pos - 1] + word[pos:]
173	1	pos -= 1
174	1	pos = word.find('K', pos + 1)
175
176		# Rule FL6
177	1	if max_length > 0 and word[-1:] == 'E':
178	1	word = word[:-1]
179
180		# Rule 5 (FL7)
181	1	word = self._delete_consecutive_repeats(word)
182
183		# Rule 6 (FL8)
184	1	if word[:2] == 'PF':
185	1	word = word[1:]
186	1	if word[-2:] == 'PF':
187	1	word = word[:-1]
188	1	elif word[-2:] == 'GH':
189	1	if word[-3:-2] in self._uc_vy_set:
190	1	word = word[:-2] + 'F'
191		else:
192	1	word = word[:-2] + 'G'
193	1	word = word.replace('GH', '')
194
195		# Rule FL9
196	1	if max_length > 0:
197	1	word = word.replace('V', 'F')
198
199		# Rules 7-9 (FL10-FL12)
200	1	first = 1 + (1 if max_length > 0 else 0)
201	1	code = ''
202	1	for pos, char in enumerate(word):
203	1	if char in self._uc_vy_set:
204	1	if first or keep_vowels:
205	1	code += vowel_char
206	1	first -= 1
207	1	elif pos > 0 and char in {'W', 'H'}:
208	1	continue
209		else:
210	1	code += char
211
212	1	if max_length > 0:
		0 ignored issues – show unused-code introduced 2018-10-20 00:45 UTC by Report Bug Copy Issue Report Too many nested blocks (6/5) Loading history...
213		# Rule FL13
214	1	if len(code) > max_length and code[-1:] == 'S':
215	1	code = code[:-1]
216	1	if keep_vowels:
217	1	code = code[:max_length]
218		else:
219		# Rule FL14
220	1	code = code[: max_length + 2]
221		# Rule FL15
222	1	while len(code) > max_length:
223	1	vowels = len(code) - max_length
224	1	excess = vowels - 1
225	1	word = code
226	1	code = ''
227	1	for char in word:
228	1	if char == vowel_char:
229	1	if vowels:
230	1	code += char
231	1	vowels -= 1
232		else:
233	1	code += char
234	1	code = code[: max_length + excess]
235
236		# Rule FL16
237	1	code += ' ' * (max_length - len(code))
238
239	1	return code
240
241
242	1	def dolby(word, max_length=-1, keep_vowels=False, vowel_char='*'):
243		r"""Return the Dolby Code of a name.
244
245		This is a wrapper for :py:meth:`Dolby.encode`.
246
247		Args:
248		word (str): The word to transform
249		max_length (int): Maximum length of the returned Dolby code -- this
250		also activates the fixed-length code mode if it is greater than
251		0
252		keep_vowels (bool): If True, retains all vowel markers
253		vowel_char (str): The vowel marker character (default to \*)
254
255		Returns:
256		str: The Dolby Code
257
258		Examples:
259		>>> dolby('Hansen')
260		'H*NSN'
261		>>> dolby('Larsen')
262		'L*RSN'
263		>>> dolby('Aagaard')
264		'*GR'
265		>>> dolby('Braaten')
266		'BR*DN'
267		>>> dolby('Sandvik')
268		'S*NVK'
269		>>> dolby('Hansen', max_length=6)
270		'HNSN'
271		>>> dolby('Larsen', max_length=6)
272		'LRSN'
273		>>> dolby('Aagaard', max_length=6)
274		'GR '
275		>>> dolby('Braaten', max_length=6)
276		'BRDN'
277		>>> dolby('Sandvik', max_length=6)
278		'SNFK'
279
280		>>> dolby('Smith')
281		'SM*D'
282		>>> dolby('Waters')
283		'W*DRS'
284		>>> dolby('James')
285		'J*MS'
286		>>> dolby('Schmidt')
287		'SM*D'
288		>>> dolby('Ashcroft')
289		'*SKRFD'
290		>>> dolby('Smith', max_length=6)
291		'SM*D '
292		>>> dolby('Waters', max_length=6)
293		'WDRS'
294		>>> dolby('James', max_length=6)
295		'JMS '
296		>>> dolby('Schmidt', max_length=6)
297		'SM*D '
298		>>> dolby('Ashcroft', max_length=6)
299		'*SKRFD'
300
301		"""
302	1	return Dolby().encode(word, max_length, keep_vowels, vowel_char)
303
304
305		if __name__ == '__main__':
306		import doctest
307
308		doctest.testmod()
309

chrislit / abydos

Pull Request — master (#141)

abydos.phonetic._Dolby A

Complexity

Size/Duplication

Test Coverage

Importance

1 Method

1 Function

Duplication Side-by-Side

Filter issues like