abydos.phonetic._koelner.Koelner.encode() - Code Metrics - Inspection of "Merge pull request #149 from chrislit/0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( f43547...71985b )

by Chris

created 2018-11-17 08:52 UTC

abydos.phonetic._koelner.Koelner.encode() F

↳ Parent: abydos.phonetic._koelner

Complexity

Conditions

Size

Total Lines	141
Code Lines	58

Duplication

Lines	45
Ratio	31.91 %

Code Coverage

Tests	57
CRAP Score	23

Importance

Changes

Metric	Value
cc	23
eloc	58
nop	2
dl	45
loc	141
ccs	57
cts	57
cp	1
crap	23
rs	0
c	0
b	0
f	0

How to fix Long Method Complexity

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._koelner.

Kölner Phonetik
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from unicodedata import normalize as unicode_normalize

from six import text_type
from six.moves import range

from ._phonetic import _Phonetic

__all__ = [
    'Koelner',
    'koelner_phonetik',
    'koelner_phonetik_alpha',
    'koelner_phonetik_num_to_alpha',
]


class Koelner(_Phonetic):

    """Kölner Phonetik.

    Based on the algorithm defined by :cite:`Postel:1969`.
    """

    _uc_v_set = set('AEIOUJY')

    _num_trans = dict(zip((ord(_) for _ in '012345678'), 'APTFKLNRS'))

    _num_set = set('012345678')

    def encode(self, word):
        """Return the Kölner Phonetik (numeric output) code for a word.

        While the output code is numeric, it is still a str because 0s can lead
        the code.

        Parameters
        ----------
        word : str
            The word to transform

        Returns
        -------
        str
            The Kölner Phonetik value as a numeric string

        Example
        -------
        >>> pe = Koelner()
        >>> pe.encode('Christopher')
        '478237'
        >>> pe.encode('Niall')
        '65'
        >>> pe.encode('Smith')
        '862'
        >>> pe.encode('Schmidt')
        '862'
        >>> pe.encode('Müller')
        '657'
        >>> pe.encode('Zimmermann')
        '86766'

        """

        def _after(word, pos, letters):
            """Return True if word[pos] follows one of the supplied letters.

            Parameters
            ----------
            word : str
                The word to check
            pos : int
                Position within word to check
            letters : str
                Letters to confirm precede word[pos]

            Returns
            -------
            bool
                True if word[pos] follows a value in letters

            """
            return pos > 0 and word[pos - 1] in letters

        def _before(word, pos, letters):
            """Return True if word[pos] precedes one of the supplied letters.

            Parameters
            ----------
            word : str
                The word to check
            pos : int
                Position within word to check
            letters : str
                Letters to confirm follow word[pos]

            Returns
            -------
            bool
                True if word[pos] precedes a value in letters

            """
            return pos + 1 < len(word) and word[pos + 1] in letters

        sdx = ''

        word = unicode_normalize('NFKD', text_type(word.upper()))
        word = word.replace('ß', 'SS')

        word = word.replace('Ä', 'AE')
        word = word.replace('Ö', 'OE')
        word = word.replace('Ü', 'UE')
        word = ''.join(c for c in word if c in self._uc_set)

        # Nothing to convert, return base case
        if not word:
            return sdx

        for i in range(len(word)):

            if word[i] in self._uc_v_set:

                sdx += '0'
            elif word[i] == 'B':
                sdx += '1'
            elif word[i] == 'P':
                if _before(word, i, {'H'}):
                    sdx += '3'
                else:
                    sdx += '1'
            elif word[i] in {'D', 'T'}:
                if _before(word, i, {'C', 'S', 'Z'}):
                    sdx += '8'
                else:
                    sdx += '2'
            elif word[i] in {'F', 'V', 'W'}:
                sdx += '3'
            elif word[i] in {'G', 'K', 'Q'}:
                sdx += '4'
            elif word[i] == 'C':
                if _after(word, i, {'S', 'Z'}):
                    sdx += '8'
                elif i == 0:
                    if _before(
                        word, i, {'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X'}

                    ):
                        sdx += '4'
                    else:
                        sdx += '8'
                elif _before(word, i, {'A', 'H', 'K', 'O', 'Q', 'U', 'X'}):
                    sdx += '4'
                else:
                    sdx += '8'
            elif word[i] == 'X':
                if _after(word, i, {'C', 'K', 'Q'}):
                    sdx += '8'
                else:
                    sdx += '48'
            elif word[i] == 'L':
                sdx += '5'
            elif word[i] in {'M', 'N'}:
                sdx += '6'
            elif word[i] == 'R':
                sdx += '7'
            elif word[i] in {'S', 'Z'}:
                sdx += '8'

        sdx = self._delete_consecutive_repeats(sdx)

        if sdx:
            sdx = sdx[:1] + sdx[1:].replace('0', '')

        return sdx

    def _to_alpha(self, num):
        """Convert a Kölner Phonetik code from numeric to alphabetic.

        Parameters
        ----------
        num : str or int
            A numeric Kölner Phonetik representation

        Returns
        -------
        str
            An alphabetic representation of the same word

        Examples
        --------
        >>> pe = Koelner()
        >>> pe._to_alpha('862')
        'SNT'
        >>> pe._to_alpha('657')
        'NLR'
        >>> pe._to_alpha('86766')
        'SNRNN'

        """
        num = ''.join(c for c in text_type(num) if c in self._num_set)
        return num.translate(self._num_trans)

    def encode_alpha(self, word):
        """Return the Kölner Phonetik (alphabetic output) code for a word.

        Parameters
        ----------
        word : str
            The word to transform

        Returns
        -------
        str
            The Kölner Phonetik value as an alphabetic string

        Examples
        --------
        >>> pe = Koelner()
        >>> pe.encode_alpha('Smith')
        'SNT'
        >>> pe.encode_alpha('Schmidt')
        'SNT'
        >>> pe.encode_alpha('Müller')
        'NLR'
        >>> pe.encode_alpha('Zimmermann')
        'SNRNN'

        """
        return koelner_phonetik_num_to_alpha(koelner_phonetik(word))


def koelner_phonetik(word):
    """Return the Kölner Phonetik (numeric output) code for a word.

    This is a wrapper for :py:meth:`Koelner.encode`.

    Parameters
    ----------
    word : str
        The word to transform

    Returns
    -------
    str
        The Kölner Phonetik value as a numeric string

    Example
    -------
    >>> koelner_phonetik('Christopher')
    '478237'
    >>> koelner_phonetik('Niall')
    '65'
    >>> koelner_phonetik('Smith')
    '862'
    >>> koelner_phonetik('Schmidt')
    '862'
    >>> koelner_phonetik('Müller')
    '657'
    >>> koelner_phonetik('Zimmermann')
    '86766'

    """
    return Koelner().encode(word)


def koelner_phonetik_num_to_alpha(num):
    """Convert a Kölner Phonetik code from numeric to alphabetic.

    This is a wrapper for :py:meth:`Koelner._to_alpha`.

    Parameters
    ----------
    num : str or int
        A numeric Kölner Phonetik representation

    Returns
    -------
    str
        An alphabetic representation of the same word

    Examples
    --------
    >>> koelner_phonetik_num_to_alpha('862')
    'SNT'
    >>> koelner_phonetik_num_to_alpha('657')
    'NLR'
    >>> koelner_phonetik_num_to_alpha('86766')
    'SNRNN'

    """
    return Koelner()._to_alpha(num)
class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent


def koelner_phonetik_alpha(word):
    """Return the Kölner Phonetik (alphabetic output) code for a word.

    This is a wrapper for :py:meth:`Koelner.encode_alpha`.

    Parameters
    ----------
    word : str
        The word to transform

    Returns
    -------
    str
        The Kölner Phonetik value as an alphabetic string

    Examples
    --------
    >>> koelner_phonetik_alpha('Smith')
    'SNT'
    >>> koelner_phonetik_alpha('Schmidt')
    'SNT'
    >>> koelner_phonetik_alpha('Müller')
    'NLR'
    >>> koelner_phonetik_alpha('Zimmermann')
    'SNRNN'

    """
    return Koelner().encode_alpha(word)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1			# -- coding: utf-8 --
2
3			# Copyright 2014-2018 by Christopher C. Little.
4			# This file is part of Abydos.
5			#
6			# Abydos is free software: you can redistribute it and/or modify
7			# it under the terms of the GNU General Public License as published by
8			# the Free Software Foundation, either version 3 of the License, or
9			# (at your option) any later version.
10			#
11			# Abydos is distributed in the hope that it will be useful,
12			# but WITHOUT ANY WARRANTY; without even the implied warranty of
13			# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14			# GNU General Public License for more details.
15			#
16			# You should have received a copy of the GNU General Public License
17			# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1		"""abydos.phonetic._koelner.
20
21			Kölner Phonetik
22			"""
23
24	1		from __future__ import (
25			absolute_import,
26			division,
27			print_function,
28			unicode_literals,
29			)
30
31	1		from unicodedata import normalize as unicode_normalize
32
33	1		from six import text_type
34	1		from six.moves import range
35
36	1		from ._phonetic import _Phonetic
37
38	1		__all__ = [
39			'Koelner',
40			'koelner_phonetik',
41			'koelner_phonetik_alpha',
42			'koelner_phonetik_num_to_alpha',
43			]
44
45
46	1		class Koelner(_Phonetic):
			0 ignored issues – show Unused Code introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
47			"""Kölner Phonetik.
48
49			Based on the algorithm defined by :cite:`Postel:1969`.
50			"""
51
52	1		_uc_v_set = set('AEIOUJY')
53
54	1		_num_trans = dict(zip((ord(_) for _ in '012345678'), 'APTFKLNRS'))
			0 ignored issues – show Comprehensibility Best Practice introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `_` does not seem to be defined. Loading history...
55	1		_num_set = set('012345678')
56
57	1		def encode(self, word):
58			"""Return the Kölner Phonetik (numeric output) code for a word.
59
60			While the output code is numeric, it is still a str because 0s can lead
61			the code.
62
63			Parameters
64			----------
65			word : str
66			The word to transform
67
68			Returns
69			-------
70			str
71			The Kölner Phonetik value as a numeric string
72
73			Example
74			-------
75			>>> pe = Koelner()
76			>>> pe.encode('Christopher')
77			'478237'
78			>>> pe.encode('Niall')
79			'65'
80			>>> pe.encode('Smith')
81			'862'
82			>>> pe.encode('Schmidt')
83			'862'
84			>>> pe.encode('Müller')
85			'657'
86			>>> pe.encode('Zimmermann')
87			'86766'
88
89			"""
90
91	1		def _after(word, pos, letters):
92			"""Return True if word[pos] follows one of the supplied letters.
93
94			Parameters
95			----------
96			word : str
97			The word to check
98			pos : int
99			Position within word to check
100			letters : str
101			Letters to confirm precede word[pos]
102
103			Returns
104			-------
105			bool
106			True if word[pos] follows a value in letters
107
108			"""
109	1		return pos > 0 and word[pos - 1] in letters
110
111	1		def _before(word, pos, letters):
112			"""Return True if word[pos] precedes one of the supplied letters.
113
114			Parameters
115			----------
116			word : str
117			The word to check
118			pos : int
119			Position within word to check
120			letters : str
121			Letters to confirm follow word[pos]
122
123			Returns
124			-------
125			bool
126			True if word[pos] precedes a value in letters
127
128			"""
129	1		return pos + 1 < len(word) and word[pos + 1] in letters
130
131	1		sdx = ''
132
133	1		word = unicode_normalize('NFKD', text_type(word.upper()))
134	1		word = word.replace('ß', 'SS')
135
136	1		word = word.replace('Ä', 'AE')
137	1		word = word.replace('Ö', 'OE')
138	1		word = word.replace('Ü', 'UE')
139	1		word = ''.join(c for c in word if c in self._uc_set)
140
141			# Nothing to convert, return base case
142	1		if not word:
143	1		return sdx
144
145	1		for i in range(len(word)):
			0 ignored issues – show unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Consider using enumerate instead of iterating with range and len Loading history...
146	1	View Code Duplication	if word[i] in self._uc_v_set:
			0 ignored issues – show Duplication introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
147	1		sdx += '0'
148	1		elif word[i] == 'B':
149	1		sdx += '1'
150	1		elif word[i] == 'P':
151	1		if _before(word, i, {'H'}):
152	1		sdx += '3'
153			else:
154	1		sdx += '1'
155	1		elif word[i] in {'D', 'T'}:
156	1		if _before(word, i, {'C', 'S', 'Z'}):
157	1		sdx += '8'
158			else:
159	1		sdx += '2'
160	1		elif word[i] in {'F', 'V', 'W'}:
161	1		sdx += '3'
162	1		elif word[i] in {'G', 'K', 'Q'}:
163	1		sdx += '4'
164	1		elif word[i] == 'C':
165	1		if _after(word, i, {'S', 'Z'}):
166	1		sdx += '8'
167	1		elif i == 0:
168	1		if _before(
169			word, i, {'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X'}
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
170			):
171	1		sdx += '4'
172			else:
173	1		sdx += '8'
174	1		elif _before(word, i, {'A', 'H', 'K', 'O', 'Q', 'U', 'X'}):
175	1		sdx += '4'
176			else:
177	1		sdx += '8'
178	1		elif word[i] == 'X':
179	1		if _after(word, i, {'C', 'K', 'Q'}):
180	1		sdx += '8'
181			else:
182	1		sdx += '48'
183	1		elif word[i] == 'L':
184	1		sdx += '5'
185	1		elif word[i] in {'M', 'N'}:
186	1		sdx += '6'
187	1		elif word[i] == 'R':
188	1		sdx += '7'
189	1		elif word[i] in {'S', 'Z'}:
190	1		sdx += '8'
191
192	1		sdx = self._delete_consecutive_repeats(sdx)
193
194	1		if sdx:
195	1		sdx = sdx[:1] + sdx[1:].replace('0', '')
196
197	1		return sdx
198
199	1		def _to_alpha(self, num):
200			"""Convert a Kölner Phonetik code from numeric to alphabetic.
201
202			Parameters
203			----------
204			num : str or int
205			A numeric Kölner Phonetik representation
206
207			Returns
208			-------
209			str
210			An alphabetic representation of the same word
211
212			Examples
213			--------
214			>>> pe = Koelner()
215			>>> pe._to_alpha('862')
216			'SNT'
217			>>> pe._to_alpha('657')
218			'NLR'
219			>>> pe._to_alpha('86766')
220			'SNRNN'
221
222			"""
223	1		num = ''.join(c for c in text_type(num) if c in self._num_set)
224	1		return num.translate(self._num_trans)
225
226	1		def encode_alpha(self, word):
227			"""Return the Kölner Phonetik (alphabetic output) code for a word.
228
229			Parameters
230			----------
231			word : str
232			The word to transform
233
234			Returns
235			-------
236			str
237			The Kölner Phonetik value as an alphabetic string
238
239			Examples
240			--------
241			>>> pe = Koelner()
242			>>> pe.encode_alpha('Smith')
243			'SNT'
244			>>> pe.encode_alpha('Schmidt')
245			'SNT'
246			>>> pe.encode_alpha('Müller')
247			'NLR'
248			>>> pe.encode_alpha('Zimmermann')
249			'SNRNN'
250
251			"""
252	1		return koelner_phonetik_num_to_alpha(koelner_phonetik(word))
253
254
255	1		def koelner_phonetik(word):
256			"""Return the Kölner Phonetik (numeric output) code for a word.
257
258			This is a wrapper for :py:meth:`Koelner.encode`.
259
260			Parameters
261			----------
262			word : str
263			The word to transform
264
265			Returns
266			-------
267			str
268			The Kölner Phonetik value as a numeric string
269
270			Example
271			-------
272			>>> koelner_phonetik('Christopher')
273			'478237'
274			>>> koelner_phonetik('Niall')
275			'65'
276			>>> koelner_phonetik('Smith')
277			'862'
278			>>> koelner_phonetik('Schmidt')
279			'862'
280			>>> koelner_phonetik('Müller')
281			'657'
282			>>> koelner_phonetik('Zimmermann')
283			'86766'
284
285			"""
286	1		return Koelner().encode(word)
287
288
289	1		def koelner_phonetik_num_to_alpha(num):
290			"""Convert a Kölner Phonetik code from numeric to alphabetic.
291
292			This is a wrapper for :py:meth:`Koelner._to_alpha`.
293
294			Parameters
295			----------
296			num : str or int
297			A numeric Kölner Phonetik representation
298
299			Returns
300			-------
301			str
302			An alphabetic representation of the same word
303
304			Examples
305			--------
306			>>> koelner_phonetik_num_to_alpha('862')
307			'SNT'
308			>>> koelner_phonetik_num_to_alpha('657')
309			'NLR'
310			>>> koelner_phonetik_num_to_alpha('86766')
311			'SNRNN'
312
313			"""
314	1		return Koelner()._to_alpha(num)
			0 ignored issues – show Coding Style Best Practice introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report It seems like `_to_alpha` was declared protected and should not be accessed from this context. Prefixing a member variable `_` is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class: class MyParent: def __init__(self): self._x = 1; self.y = 2; class MyChild(MyParent): def some_method(self): return self._x # Ok, since accessed from a child class class AnotherClass: def some_method(self, instance_of_my_child): return instance_of_my_child._x # Would be flagged as AnotherClass is not # a child class of MyParent Loading history...
315
316
317	1		def koelner_phonetik_alpha(word):
318			"""Return the Kölner Phonetik (alphabetic output) code for a word.
319
320			This is a wrapper for :py:meth:`Koelner.encode_alpha`.
321
322			Parameters
323			----------
324			word : str
325			The word to transform
326
327			Returns
328			-------
329			str
330			The Kölner Phonetik value as an alphabetic string
331
332			Examples
333			--------
334			>>> koelner_phonetik_alpha('Smith')
335			'SNT'
336			>>> koelner_phonetik_alpha('Schmidt')
337			'SNT'
338			>>> koelner_phonetik_alpha('Müller')
339			'NLR'
340			>>> koelner_phonetik_alpha('Zimmermann')
341			'SNRNN'
342
343			"""
344	1		return Koelner().encode_alpha(word)
345
346
347			if __name__ == '__main__':
348			import doctest
349
350			doctest.testmod()
351

chrislit / abydos

Push — master ( f43547...71985b )

abydos.phonetic._koelner.Koelner.encode() F

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like