Issues in _koelner.py (master) - Issues in master - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Issues (140)

abydos/phonetic/_koelner.py (2 issues)

Labels

Severity

# Copyright 2014-2020 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._koelner.

Kölner Phonetik
"""

from typing import Set
from unicodedata import normalize as unicode_normalize

from ._phonetic import _Phonetic

__all__ = [
    'Koelner',
]


class Koelner(_Phonetic):
    """Kölner Phonetik.

    Based on the algorithm defined by :cite:`Postel:1969`.

    .. versionadded:: 0.3.6
    """

    _uc_v_set = set('AEIOUJY')

    _num_trans = dict(zip((ord(_) for _ in '012345678'), 'APTFKLNRS'))

    _num_set = set('012345678')

    def encode(self, word: str) -> str:
        """Return the Kölner Phonetik (numeric output) code for a word.

        While the output code is numeric, it is still a str because 0s can lead
        the code.

        Parameters
        ----------
        word : str
            The word to transform

        Returns
        -------
        str
            The Kölner Phonetik value as a numeric string

        Example
        -------
        >>> pe = Koelner()
        >>> pe.encode('Christopher')
        '478237'
        >>> pe.encode('Niall')
        '65'
        >>> pe.encode('Smith')
        '862'
        >>> pe.encode('Schmidt')
        '862'
        >>> pe.encode('Müller')
        '657'
        >>> pe.encode('Zimmermann')
        '86766'


        .. versionadded:: 0.1.0
        .. versionchanged:: 0.3.6
            Encapsulated in class

        """

        def _after(word: str, pos: int, letters: Set[str]) -> bool:
            """Return True if word[pos] follows one of the supplied letters.

            Parameters
            ----------
            word : str
                The word to check
            pos : int
                Position within word to check
            letters : {str}
                Letters to confirm precede word[pos]

            Returns
            -------
            bool
                True if word[pos] follows a value in letters

            .. versionadded:: 0.1.0

            """
            return pos > 0 and word[pos - 1] in letters

        def _before(word: str, pos: int, letters: Set[str]) -> bool:
            """Return True if word[pos] precedes one of the supplied letters.

            Parameters
            ----------
            word : str
                The word to check
            pos : int
                Position within word to check
            letters : {str}
                Letters to confirm follow word[pos]

            Returns
            -------
            bool
                True if word[pos] precedes a value in letters

            .. versionadded:: 0.1.0

            """
            return pos + 1 < len(word) and word[pos + 1] in letters

        sdx = ''

        word = unicode_normalize('NFKD', word.upper())

        word = word.replace('Ä', 'AE')
        word = word.replace('Ö', 'OE')
        word = word.replace('Ü', 'UE')
        word = ''.join(c for c in word if c in self._uc_set)

        # Nothing to convert, return base case
        if not word:
            return sdx

        for i in range(len(word)):
            if word[i] in self._uc_v_set:

                sdx += '0'
            elif word[i] == 'B':
                sdx += '1'
            elif word[i] == 'P':
                if _before(word, i, {'H'}):
                    sdx += '3'
                else:
                    sdx += '1'
            elif word[i] in {'D', 'T'}:
                if _before(word, i, {'C', 'S', 'Z'}):
                    sdx += '8'
                else:
                    sdx += '2'
            elif word[i] in {'F', 'V', 'W'}:
                sdx += '3'
            elif word[i] in {'G', 'K', 'Q'}:
                sdx += '4'
            elif word[i] == 'C':
                if _after(word, i, {'S', 'Z'}):
                    sdx += '8'
                elif i == 0:
                    if _before(
                        word, i, {'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X'}
                    ):
                        sdx += '4'
                    else:
                        sdx += '8'
                elif _before(word, i, {'A', 'H', 'K', 'O', 'Q', 'U', 'X'}):
                    sdx += '4'
                else:
                    sdx += '8'
            elif word[i] == 'X':
                if _after(word, i, {'C', 'K', 'Q'}):
                    sdx += '8'
                else:
                    sdx += '48'
            elif word[i] == 'L':
                sdx += '5'
            elif word[i] in {'M', 'N'}:
                sdx += '6'
            elif word[i] == 'R':
                sdx += '7'
            elif word[i] in {'S', 'Z'}:
                sdx += '8'

        sdx = self._delete_consecutive_repeats(sdx)

        if sdx:
            sdx = sdx[:1] + sdx[1:].replace('0', '')

        return sdx

    def encode_alpha(self, word: str) -> str:
        """Return the Kölner Phonetik (alphabetic output) code for a word.

        Parameters
        ----------
        word : str
            The word to transform

        Returns
        -------
        str
            The Kölner Phonetik value as an alphabetic string

        Examples
        --------
        >>> pe = Koelner()
        >>> pe.encode_alpha('Smith')
        'SNT'
        >>> pe.encode_alpha('Schmidt')
        'SNT'
        >>> pe.encode_alpha('Müller')
        'NLR'
        >>> pe.encode_alpha('Zimmermann')
        'SNRNN'


        .. versionadded:: 0.1.0
        .. versionchanged:: 0.3.6
            Encapsulated in class

        """
        num = ''.join(c for c in self.encode(word) if c in self._num_set)
        return num.translate(self._num_trans)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1			# Copyright 2014-2020 by Christopher C. Little.
2			# This file is part of Abydos.
3			#
4			# Abydos is free software: you can redistribute it and/or modify
5			# it under the terms of the GNU General Public License as published by
6			# the Free Software Foundation, either version 3 of the License, or
7			# (at your option) any later version.
8			#
9			# Abydos is distributed in the hope that it will be useful,
10			# but WITHOUT ANY WARRANTY; without even the implied warranty of
11			# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12			# GNU General Public License for more details.
13			#
14			# You should have received a copy of the GNU General Public License
15			# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17			"""abydos.phonetic._koelner.
18
19	1		Kölner Phonetik
20			"""
21
22			from typing import Set
23			from unicodedata import normalize as unicode_normalize
24	1
25			from ._phonetic import _Phonetic
26
27			__all__ = [
28			'Koelner',
29			]
30
31	1
32			class Koelner(_Phonetic):
33	1		"""Kölner Phonetik.
34
35	1		Based on the algorithm defined by :cite:`Postel:1969`.
36	1
37			.. versionadded:: 0.3.6
38	1		"""
39	1
40			_uc_v_set = set('AEIOUJY')
41	1
42			_num_trans = dict(zip((ord(_) for _ in '012345678'), 'APTFKLNRS'))
			0 ignored issues – show Comprehensibility Best Practice introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Show Similar Issues like this The variable `_` does not seem to be defined. Loading history...
43			_num_set = set('012345678')
44
45			def encode(self, word: str) -> str:
46			"""Return the Kölner Phonetik (numeric output) code for a word.
47
48			While the output code is numeric, it is still a str because 0s can lead
49	1		the code.
50
51			Parameters
52			----------
53			word : str
54			The word to transform
55
56			Returns
57	1		-------
58			str
59	1		The Kölner Phonetik value as a numeric string
60	1
61			Example
62	1		-------
63			>>> pe = Koelner()
64			>>> pe.encode('Christopher')
65			'478237'
66			>>> pe.encode('Niall')
67			'65'
68			>>> pe.encode('Smith')
69			'862'
70			>>> pe.encode('Schmidt')
71			'862'
72			>>> pe.encode('Müller')
73			'657'
74			>>> pe.encode('Zimmermann')
75			'86766'
76
77
78			.. versionadded:: 0.1.0
79			.. versionchanged:: 0.3.6
80			Encapsulated in class
81
82			"""
83
84			def _after(word: str, pos: int, letters: Set[str]) -> bool:
85			"""Return True if word[pos] follows one of the supplied letters.
86
87			Parameters
88			----------
89			word : str
90			The word to check
91			pos : int
92			Position within word to check
93			letters : {str}
94			Letters to confirm precede word[pos]
95
96			Returns
97			-------
98			bool
99			True if word[pos] follows a value in letters
100
101	1		.. versionadded:: 0.1.0
102
103			"""
104			return pos > 0 and word[pos - 1] in letters
105
106			def _before(word: str, pos: int, letters: Set[str]) -> bool:
107			"""Return True if word[pos] precedes one of the supplied letters.
108
109			Parameters
110			----------
111			word : str
112			The word to check
113			pos : int
114			Position within word to check
115			letters : {str}
116			Letters to confirm follow word[pos]
117
118			Returns
119			-------
120			bool
121	1		True if word[pos] precedes a value in letters
122
123	1		.. versionadded:: 0.1.0
124
125			"""
126			return pos + 1 < len(word) and word[pos + 1] in letters
127
128			sdx = ''
129
130			word = unicode_normalize('NFKD', word.upper())
131
132			word = word.replace('Ä', 'AE')
133			word = word.replace('Ö', 'OE')
134			word = word.replace('Ü', 'UE')
135			word = ''.join(c for c in word if c in self._uc_set)
136
137			# Nothing to convert, return base case
138			if not word:
139			return sdx
140
141			for i in range(len(word)):
142		View Code Duplication	if word[i] in self._uc_v_set:
			0 ignored issues – show Duplication introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Show Similar Issues like this This code seems to be duplicated in your project. Loading history...
143	1		sdx += '0'
144			elif word[i] == 'B':
145	1		sdx += '1'
146			elif word[i] == 'P':
147	1		if _before(word, i, {'H'}):
148	1		sdx += '3'
149			else:
150	1		sdx += '1'
151	1		elif word[i] in {'D', 'T'}:
152	1		if _before(word, i, {'C', 'S', 'Z'}):
153	1		sdx += '8'
154			else:
155			sdx += '2'
156	1		elif word[i] in {'F', 'V', 'W'}:
157	1		sdx += '3'
158			elif word[i] in {'G', 'K', 'Q'}:
159	1		sdx += '4'
160	1		elif word[i] == 'C':
161	1		if _after(word, i, {'S', 'Z'}):
162	1		sdx += '8'
163	1		elif i == 0:
164	1		if _before(
165	1		word, i, {'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X'}
166	1		):
167			sdx += '4'
168	1		else:
169	1		sdx += '8'
170	1		elif _before(word, i, {'A', 'H', 'K', 'O', 'Q', 'U', 'X'}):
171	1		sdx += '4'
172			else:
173	1		sdx += '8'
174	1		elif word[i] == 'X':
175	1		if _after(word, i, {'C', 'K', 'Q'}):
176	1		sdx += '8'
177	1		else:
178	1		sdx += '48'
179	1		elif word[i] == 'L':
180	1		sdx += '5'
181	1		elif word[i] in {'M', 'N'}:
182	1		sdx += '6'
183			elif word[i] == 'R':
184			sdx += '7'
185	1		elif word[i] in {'S', 'Z'}:
186			sdx += '8'
187	1
188	1		sdx = self._delete_consecutive_repeats(sdx)
189	1
190			if sdx:
191	1		sdx = sdx[:1] + sdx[1:].replace('0', '')
192	1
193	1		return sdx
194	1
195			def encode_alpha(self, word: str) -> str:
196	1		"""Return the Kölner Phonetik (alphabetic output) code for a word.
197	1
198	1		Parameters
199	1		----------
200	1		word : str
201	1		The word to transform
202	1
203	1		Returns
204	1		-------
205			str
206	1		The Kölner Phonetik value as an alphabetic string
207
208	1		Examples
209	1		--------
210			>>> pe = Koelner()
211	1		>>> pe.encode_alpha('Smith')
212			'SNT'
213	1		>>> pe.encode_alpha('Schmidt')
214			'SNT'
215			>>> pe.encode_alpha('Müller')
216			'NLR'
217			>>> pe.encode_alpha('Zimmermann')
218			'SNRNN'
219
220
221			.. versionadded:: 0.1.0
222			.. versionchanged:: 0.3.6
223			Encapsulated in class
224
225			"""
226			num = ''.join(c for c in self.encode(word) if c in self._num_set)
227			return num.translate(self._num_trans)
228
229
230			if __name__ == '__main__':
231			import doctest
232
233			doctest.testmod()
234

chrislit / abydos

Issues (140)

abydos/phonetic/_koelner.py (2 issues)

Labels

Severity

Introduced By

Duplication Side-by-Side

Filter issues like