abydos.phonetic._Koelner.Koelner.encode_alpha() - Code Metrics - Inspection of "0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#141)

by Chris

created 2018-11-10 01:31 UTC

abydos.phonetic._Koelner.Koelner.encode_alpha() A

↳ Parent: abydos.phonetic._Koelner

Complexity

Conditions

Size

Total Lines	22
Code Lines	2

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	2
CRAP Score	1

Importance

Changes

Metric	Value
cc	1
eloc	2
nop	2
dl	0
loc	22
ccs	2
cts	2
cp	1
crap	1
rs	10
c	0
b	0
f	0

# -*- coding: utf-8 -*-


# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._Koelner.

Kölner Phonetik
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from unicodedata import normalize as unicode_normalize

from six import text_type
from six.moves import range

from ._Phonetic import Phonetic

__all__ = [
    'Koelner',
    'koelner_phonetik',
    'koelner_phonetik_alpha',
    'koelner_phonetik_num_to_alpha',
]


class Koelner(Phonetic):

    """Kölner Phonetik.

    Based on the algorithm defined by :cite:`Postel:1969`.
    """

    _uc_v_set = set('AEIOUJY')

    _num_trans = dict(zip((ord(_) for _ in '012345678'), 'APTFKLNRS'))

    _num_set = set('012345678')

    def encode(self, word):
        """Return the Kölner Phonetik (numeric output) code for a word.

        While the output code is numeric, it is still a str because 0s can lead
        the code.

        Args:
            word (str): The word to transform

        Returns:
            str: The Kölner Phonetik value as a numeric string

        Example:
            >>> pe = Koelner()
            >>> pe.encode('Christopher')
            '478237'
            >>> pe.encode('Niall')
            '65'
            >>> pe.encode('Smith')
            '862'
            >>> pe.encode('Schmidt')
            '862'
            >>> pe.encode('Müller')
            '657'
            >>> pe.encode('Zimmermann')
            '86766'

        """

        def _after(word, pos, letters):
            """Return True if word[pos] follows one of the supplied letters.

            Args:
                word (str): The word to check
                pos (int): Position within word to check
                letters (str): Letters to confirm precede word[pos]

            Returns:
                bool: True if word[pos] follows a value in letters

            """
            return pos > 0 and word[pos - 1] in letters

        def _before(word, pos, letters):
            """Return True if word[pos] precedes one of the supplied letters.

            Args:
                word (str): The word to check
                pos (int): Position within word to check
                letters (str): Letters to confirm follow word[pos]

            Returns:
                bool: True if word[pos] precedes a value in letters

            """
            return pos + 1 < len(word) and word[pos + 1] in letters

        sdx = ''

        word = unicode_normalize('NFKD', text_type(word.upper()))
        word = word.replace('ß', 'SS')

        word = word.replace('Ä', 'AE')
        word = word.replace('Ö', 'OE')
        word = word.replace('Ü', 'UE')
        word = ''.join(c for c in word if c in self._uc_set)

        # Nothing to convert, return base case
        if not word:
            return sdx

        for i in range(len(word)):

            if word[i] in self._uc_v_set:

                sdx += '0'
            elif word[i] == 'B':
                sdx += '1'
            elif word[i] == 'P':
                if _before(word, i, {'H'}):
                    sdx += '3'
                else:
                    sdx += '1'
            elif word[i] in {'D', 'T'}:
                if _before(word, i, {'C', 'S', 'Z'}):
                    sdx += '8'
                else:
                    sdx += '2'
            elif word[i] in {'F', 'V', 'W'}:
                sdx += '3'
            elif word[i] in {'G', 'K', 'Q'}:
                sdx += '4'
            elif word[i] == 'C':
                if _after(word, i, {'S', 'Z'}):
                    sdx += '8'
                elif i == 0:
                    if _before(
                        word, i, {'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X'}

                    ):
                        sdx += '4'
                    else:
                        sdx += '8'
                elif _before(word, i, {'A', 'H', 'K', 'O', 'Q', 'U', 'X'}):
                    sdx += '4'
                else:
                    sdx += '8'
            elif word[i] == 'X':
                if _after(word, i, {'C', 'K', 'Q'}):
                    sdx += '8'
                else:
                    sdx += '48'
            elif word[i] == 'L':
                sdx += '5'
            elif word[i] in {'M', 'N'}:
                sdx += '6'
            elif word[i] == 'R':
                sdx += '7'
            elif word[i] in {'S', 'Z'}:
                sdx += '8'

        sdx = self._delete_consecutive_repeats(sdx)

        if sdx:
            sdx = sdx[:1] + sdx[1:].replace('0', '')

        return sdx

    def _to_alpha(self, num):
        """Convert a Kölner Phonetik code from numeric to alphabetic.

        Args:
            num (str or int): A numeric Kölner Phonetik representation

        Returns:
            str: An alphabetic representation of the same word

        Examples:
            >>> pe = Koelner()
            >>> pe._to_alpha('862')
            'SNT'
            >>> pe._to_alpha('657')
            'NLR'
            >>> pe._to_alpha('86766')
            'SNRNN'

        """
        num = ''.join(c for c in text_type(num) if c in self._num_set)
        return num.translate(self._num_trans)

    def encode_alpha(self, word):
        """Return the Kölner Phonetik (alphabetic output) code for a word.

        Args:
            word (str): The word to transform

        Returns:
            str: The Kölner Phonetik value as an alphabetic string

        Examples:
            >>> pe = Koelner()
            >>> pe.encode_alpha('Smith')
            'SNT'
            >>> pe.encode_alpha('Schmidt')
            'SNT'
            >>> pe.encode_alpha('Müller')
            'NLR'
            >>> pe.encode_alpha('Zimmermann')
            'SNRNN'

        """
        return koelner_phonetik_num_to_alpha(koelner_phonetik(word))


def koelner_phonetik(word):
    """Return the Kölner Phonetik (numeric output) code for a word.

    This is a wrapper for :py:meth:`Koelner.encode`.

    Args:
        word (str): The word to transform

    Returns:
        str: The Kölner Phonetik value as a numeric string

    Example:
        >>> koelner_phonetik('Christopher')
        '478237'
        >>> koelner_phonetik('Niall')
        '65'
        >>> koelner_phonetik('Smith')
        '862'
        >>> koelner_phonetik('Schmidt')
        '862'
        >>> koelner_phonetik('Müller')
        '657'
        >>> koelner_phonetik('Zimmermann')
        '86766'

    """
    return Koelner().encode(word)


def koelner_phonetik_num_to_alpha(num):
    """Convert a Kölner Phonetik code from numeric to alphabetic.

    This is a wrapper for :py:meth:`Koelner._to_alpha`.

    Args:
        num (str or int): A numeric Kölner Phonetik representation

    Returns:
        str: An alphabetic representation of the same word

    Examples:
        >>> koelner_phonetik_num_to_alpha('862')
        'SNT'
        >>> koelner_phonetik_num_to_alpha('657')
        'NLR'
        >>> koelner_phonetik_num_to_alpha('86766')
        'SNRNN'

    """
    return Koelner()._to_alpha(num)
class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent


def koelner_phonetik_alpha(word):
    """Return the Kölner Phonetik (alphabetic output) code for a word.

    This is a wrapper for :py:meth:`Koelner.encode_alpha`.

    Args:
        word (str): The word to transform

    Returns:
        str: The Kölner Phonetik value as an alphabetic string

    Examples:
        >>> koelner_phonetik_alpha('Smith')
        'SNT'
        >>> koelner_phonetik_alpha('Schmidt')
        'SNT'
        >>> koelner_phonetik_alpha('Müller')
        'NLR'
        >>> koelner_phonetik_alpha('Zimmermann')
        'SNRNN'

    """
    return Koelner().encode_alpha(word)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1			# -- coding: utf-8 --
			0 ignored issues – show Coding Style Naming introduced 2018-11-10 01:42 UTC by Report Bug Copy Issue Report The name `_Koelner` does not conform to the module naming conventions (`(([a-z_][a-z0-9_]*)\|([A-Z][a-zA-Z0-9]+))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
2
3			# Copyright 2014-2018 by Christopher C. Little.
4			# This file is part of Abydos.
5			#
6			# Abydos is free software: you can redistribute it and/or modify
7			# it under the terms of the GNU General Public License as published by
8			# the Free Software Foundation, either version 3 of the License, or
9			# (at your option) any later version.
10			#
11			# Abydos is distributed in the hope that it will be useful,
12			# but WITHOUT ANY WARRANTY; without even the implied warranty of
13			# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14			# GNU General Public License for more details.
15			#
16			# You should have received a copy of the GNU General Public License
17			# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1		"""abydos.phonetic._Koelner.
20
21			Kölner Phonetik
22			"""
23
24	1		from __future__ import (
25			absolute_import,
26			division,
27			print_function,
28			unicode_literals,
29			)
30
31	1		from unicodedata import normalize as unicode_normalize
32
33	1		from six import text_type
34	1		from six.moves import range
35
36	1		from ._Phonetic import Phonetic
37
38	1		__all__ = [
39			'Koelner',
40			'koelner_phonetik',
41			'koelner_phonetik_alpha',
42			'koelner_phonetik_num_to_alpha',
43			]
44
45
46	1		class Koelner(Phonetic):
			0 ignored issues – show Unused Code introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
47			"""Kölner Phonetik.
48
49			Based on the algorithm defined by :cite:`Postel:1969`.
50			"""
51
52	1		_uc_v_set = set('AEIOUJY')
53
54	1		_num_trans = dict(zip((ord(_) for _ in '012345678'), 'APTFKLNRS'))
			0 ignored issues – show Comprehensibility Best Practice introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `_` does not seem to be defined. Loading history...
55	1		_num_set = set('012345678')
56
57	1		def encode(self, word):
58			"""Return the Kölner Phonetik (numeric output) code for a word.
59
60			While the output code is numeric, it is still a str because 0s can lead
61			the code.
62
63			Args:
64			word (str): The word to transform
65
66			Returns:
67			str: The Kölner Phonetik value as a numeric string
68
69			Example:
70			>>> pe = Koelner()
71			>>> pe.encode('Christopher')
72			'478237'
73			>>> pe.encode('Niall')
74			'65'
75			>>> pe.encode('Smith')
76			'862'
77			>>> pe.encode('Schmidt')
78			'862'
79			>>> pe.encode('Müller')
80			'657'
81			>>> pe.encode('Zimmermann')
82			'86766'
83
84			"""
85
86	1		def _after(word, pos, letters):
87			"""Return True if word[pos] follows one of the supplied letters.
88
89			Args:
90			word (str): The word to check
91			pos (int): Position within word to check
92			letters (str): Letters to confirm precede word[pos]
93
94			Returns:
95			bool: True if word[pos] follows a value in letters
96
97			"""
98	1		return pos > 0 and word[pos - 1] in letters
99
100	1		def _before(word, pos, letters):
101			"""Return True if word[pos] precedes one of the supplied letters.
102
103			Args:
104			word (str): The word to check
105			pos (int): Position within word to check
106			letters (str): Letters to confirm follow word[pos]
107
108			Returns:
109			bool: True if word[pos] precedes a value in letters
110
111			"""
112	1		return pos + 1 < len(word) and word[pos + 1] in letters
113
114	1		sdx = ''
115
116	1		word = unicode_normalize('NFKD', text_type(word.upper()))
117	1		word = word.replace('ß', 'SS')
118
119	1		word = word.replace('Ä', 'AE')
120	1		word = word.replace('Ö', 'OE')
121	1		word = word.replace('Ü', 'UE')
122	1		word = ''.join(c for c in word if c in self._uc_set)
123
124			# Nothing to convert, return base case
125	1		if not word:
126	1		return sdx
127
128	1		for i in range(len(word)):
			0 ignored issues – show unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Consider using enumerate instead of iterating with range and len Loading history...
129	1	View Code Duplication	if word[i] in self._uc_v_set:
			0 ignored issues – show Duplication introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
130	1		sdx += '0'
131	1		elif word[i] == 'B':
132	1		sdx += '1'
133	1		elif word[i] == 'P':
134	1		if _before(word, i, {'H'}):
135	1		sdx += '3'
136			else:
137	1		sdx += '1'
138	1		elif word[i] in {'D', 'T'}:
139	1		if _before(word, i, {'C', 'S', 'Z'}):
140	1		sdx += '8'
141			else:
142	1		sdx += '2'
143	1		elif word[i] in {'F', 'V', 'W'}:
144	1		sdx += '3'
145	1		elif word[i] in {'G', 'K', 'Q'}:
146	1		sdx += '4'
147	1		elif word[i] == 'C':
148	1		if _after(word, i, {'S', 'Z'}):
149	1		sdx += '8'
150	1		elif i == 0:
151	1		if _before(
152			word, i, {'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X'}
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
153			):
154	1		sdx += '4'
155			else:
156	1		sdx += '8'
157	1		elif _before(word, i, {'A', 'H', 'K', 'O', 'Q', 'U', 'X'}):
158	1		sdx += '4'
159			else:
160	1		sdx += '8'
161	1		elif word[i] == 'X':
162	1		if _after(word, i, {'C', 'K', 'Q'}):
163	1		sdx += '8'
164			else:
165	1		sdx += '48'
166	1		elif word[i] == 'L':
167	1		sdx += '5'
168	1		elif word[i] in {'M', 'N'}:
169	1		sdx += '6'
170	1		elif word[i] == 'R':
171	1		sdx += '7'
172	1		elif word[i] in {'S', 'Z'}:
173	1		sdx += '8'
174
175	1		sdx = self._delete_consecutive_repeats(sdx)
176
177	1		if sdx:
178	1		sdx = sdx[:1] + sdx[1:].replace('0', '')
179
180	1		return sdx
181
182	1		def _to_alpha(self, num):
183			"""Convert a Kölner Phonetik code from numeric to alphabetic.
184
185			Args:
186			num (str or int): A numeric Kölner Phonetik representation
187
188			Returns:
189			str: An alphabetic representation of the same word
190
191			Examples:
192			>>> pe = Koelner()
193			>>> pe._to_alpha('862')
194			'SNT'
195			>>> pe._to_alpha('657')
196			'NLR'
197			>>> pe._to_alpha('86766')
198			'SNRNN'
199
200			"""
201	1		num = ''.join(c for c in text_type(num) if c in self._num_set)
202	1		return num.translate(self._num_trans)
203
204	1		def encode_alpha(self, word):
205			"""Return the Kölner Phonetik (alphabetic output) code for a word.
206
207			Args:
208			word (str): The word to transform
209
210			Returns:
211			str: The Kölner Phonetik value as an alphabetic string
212
213			Examples:
214			>>> pe = Koelner()
215			>>> pe.encode_alpha('Smith')
216			'SNT'
217			>>> pe.encode_alpha('Schmidt')
218			'SNT'
219			>>> pe.encode_alpha('Müller')
220			'NLR'
221			>>> pe.encode_alpha('Zimmermann')
222			'SNRNN'
223
224			"""
225	1		return koelner_phonetik_num_to_alpha(koelner_phonetik(word))
226
227
228	1		def koelner_phonetik(word):
229			"""Return the Kölner Phonetik (numeric output) code for a word.
230
231			This is a wrapper for :py:meth:`Koelner.encode`.
232
233			Args:
234			word (str): The word to transform
235
236			Returns:
237			str: The Kölner Phonetik value as a numeric string
238
239			Example:
240			>>> koelner_phonetik('Christopher')
241			'478237'
242			>>> koelner_phonetik('Niall')
243			'65'
244			>>> koelner_phonetik('Smith')
245			'862'
246			>>> koelner_phonetik('Schmidt')
247			'862'
248			>>> koelner_phonetik('Müller')
249			'657'
250			>>> koelner_phonetik('Zimmermann')
251			'86766'
252
253			"""
254	1		return Koelner().encode(word)
255
256
257	1		def koelner_phonetik_num_to_alpha(num):
258			"""Convert a Kölner Phonetik code from numeric to alphabetic.
259
260			This is a wrapper for :py:meth:`Koelner._to_alpha`.
261
262			Args:
263			num (str or int): A numeric Kölner Phonetik representation
264
265			Returns:
266			str: An alphabetic representation of the same word
267
268			Examples:
269			>>> koelner_phonetik_num_to_alpha('862')
270			'SNT'
271			>>> koelner_phonetik_num_to_alpha('657')
272			'NLR'
273			>>> koelner_phonetik_num_to_alpha('86766')
274			'SNRNN'
275
276			"""
277	1		return Koelner()._to_alpha(num)
			0 ignored issues – show Coding Style Best Practice introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report It seems like `_to_alpha` was declared protected and should not be accessed from this context. Prefixing a member variable `_` is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class: class MyParent: def __init__(self): self._x = 1; self.y = 2; class MyChild(MyParent): def some_method(self): return self._x # Ok, since accessed from a child class class AnotherClass: def some_method(self, instance_of_my_child): return instance_of_my_child._x # Would be flagged as AnotherClass is not # a child class of MyParent Loading history...
278
279
280	1		def koelner_phonetik_alpha(word):
281			"""Return the Kölner Phonetik (alphabetic output) code for a word.
282
283			This is a wrapper for :py:meth:`Koelner.encode_alpha`.
284
285			Args:
286			word (str): The word to transform
287
288			Returns:
289			str: The Kölner Phonetik value as an alphabetic string
290
291			Examples:
292			>>> koelner_phonetik_alpha('Smith')
293			'SNT'
294			>>> koelner_phonetik_alpha('Schmidt')
295			'SNT'
296			>>> koelner_phonetik_alpha('Müller')
297			'NLR'
298			>>> koelner_phonetik_alpha('Zimmermann')
299			'SNRNN'
300
301			"""
302	1		return Koelner().encode_alpha(word)
303
304
305			if __name__ == '__main__':
306			import doctest
307
308			doctest.testmod()
309

chrislit / abydos

Pull Request — master (#141)

abydos.phonetic._Koelner.Koelner.encode_alpha() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like