abydos.phonetic._koelner   A
last analyzed

Complexity

Total Complexity 24

Size/Duplication

Total Lines 234
Duplicated Lines 19.23 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 24
eloc 73
dl 45
loc 234
ccs 71
cts 71
cp 1
rs 10
c 0
b 0
f 0

2 Methods

Rating   Name   Duplication   Size   Complexity  
A Koelner.encode_alpha() 0 33 1
F Koelner.encode() 45 149 23

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
# Copyright 2014-2020 by Christopher C. Little.
2
# This file is part of Abydos.
3
#
4
# Abydos is free software: you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# Abydos is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17
"""abydos.phonetic._koelner.
18
19 1
Kölner Phonetik
20
"""
21
22
from typing import Set
23
from unicodedata import normalize as unicode_normalize
24 1
25
from ._phonetic import _Phonetic
26
27
__all__ = [
28
    'Koelner',
29
]
30
31 1
32
class Koelner(_Phonetic):
33 1
    """Kölner Phonetik.
34
35 1
    Based on the algorithm defined by :cite:`Postel:1969`.
36 1
37
    .. versionadded:: 0.3.6
38 1
    """
39 1
40
    _uc_v_set = set('AEIOUJY')
41 1
42
    _num_trans = dict(zip((ord(_) for _ in '012345678'), 'APTFKLNRS'))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable _ does not seem to be defined.
Loading history...
43
    _num_set = set('012345678')
44
45
    def encode(self, word: str) -> str:
46
        """Return the Kölner Phonetik (numeric output) code for a word.
47
48
        While the output code is numeric, it is still a str because 0s can lead
49 1
        the code.
50
51
        Parameters
52
        ----------
53
        word : str
54
            The word to transform
55
56
        Returns
57 1
        -------
58
        str
59 1
            The Kölner Phonetik value as a numeric string
60 1
61
        Example
62 1
        -------
63
        >>> pe = Koelner()
64
        >>> pe.encode('Christopher')
65
        '478237'
66
        >>> pe.encode('Niall')
67
        '65'
68
        >>> pe.encode('Smith')
69
        '862'
70
        >>> pe.encode('Schmidt')
71
        '862'
72
        >>> pe.encode('Müller')
73
        '657'
74
        >>> pe.encode('Zimmermann')
75
        '86766'
76
77
78
        .. versionadded:: 0.1.0
79
        .. versionchanged:: 0.3.6
80
            Encapsulated in class
81
82
        """
83
84
        def _after(word: str, pos: int, letters: Set[str]) -> bool:
85
            """Return True if word[pos] follows one of the supplied letters.
86
87
            Parameters
88
            ----------
89
            word : str
90
                The word to check
91
            pos : int
92
                Position within word to check
93
            letters : {str}
94
                Letters to confirm precede word[pos]
95
96
            Returns
97
            -------
98
            bool
99
                True if word[pos] follows a value in letters
100
101 1
            .. versionadded:: 0.1.0
102
103
            """
104
            return pos > 0 and word[pos - 1] in letters
105
106
        def _before(word: str, pos: int, letters: Set[str]) -> bool:
107
            """Return True if word[pos] precedes one of the supplied letters.
108
109
            Parameters
110
            ----------
111
            word : str
112
                The word to check
113
            pos : int
114
                Position within word to check
115
            letters : {str}
116
                Letters to confirm follow word[pos]
117
118
            Returns
119
            -------
120
            bool
121 1
                True if word[pos] precedes a value in letters
122
123 1
            .. versionadded:: 0.1.0
124
125
            """
126
            return pos + 1 < len(word) and word[pos + 1] in letters
127
128
        sdx = ''
129
130
        word = unicode_normalize('NFKD', word.upper())
131
132
        word = word.replace('Ä', 'AE')
133
        word = word.replace('Ö', 'OE')
134
        word = word.replace('Ü', 'UE')
135
        word = ''.join(c for c in word if c in self._uc_set)
136
137
        # Nothing to convert, return base case
138
        if not word:
139
            return sdx
140
141
        for i in range(len(word)):
142 View Code Duplication
            if word[i] in self._uc_v_set:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
143 1
                sdx += '0'
144
            elif word[i] == 'B':
145 1
                sdx += '1'
146
            elif word[i] == 'P':
147 1
                if _before(word, i, {'H'}):
148 1
                    sdx += '3'
149
                else:
150 1
                    sdx += '1'
151 1
            elif word[i] in {'D', 'T'}:
152 1
                if _before(word, i, {'C', 'S', 'Z'}):
153 1
                    sdx += '8'
154
                else:
155
                    sdx += '2'
156 1
            elif word[i] in {'F', 'V', 'W'}:
157 1
                sdx += '3'
158
            elif word[i] in {'G', 'K', 'Q'}:
159 1
                sdx += '4'
160 1
            elif word[i] == 'C':
161 1
                if _after(word, i, {'S', 'Z'}):
162 1
                    sdx += '8'
163 1
                elif i == 0:
164 1
                    if _before(
165 1
                        word, i, {'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X'}
166 1
                    ):
167
                        sdx += '4'
168 1
                    else:
169 1
                        sdx += '8'
170 1
                elif _before(word, i, {'A', 'H', 'K', 'O', 'Q', 'U', 'X'}):
171 1
                    sdx += '4'
172
                else:
173 1
                    sdx += '8'
174 1
            elif word[i] == 'X':
175 1
                if _after(word, i, {'C', 'K', 'Q'}):
176 1
                    sdx += '8'
177 1
                else:
178 1
                    sdx += '48'
179 1
            elif word[i] == 'L':
180 1
                sdx += '5'
181 1
            elif word[i] in {'M', 'N'}:
182 1
                sdx += '6'
183
            elif word[i] == 'R':
184
                sdx += '7'
185 1
            elif word[i] in {'S', 'Z'}:
186
                sdx += '8'
187 1
188 1
        sdx = self._delete_consecutive_repeats(sdx)
189 1
190
        if sdx:
191 1
            sdx = sdx[:1] + sdx[1:].replace('0', '')
192 1
193 1
        return sdx
194 1
195
    def encode_alpha(self, word: str) -> str:
196 1
        """Return the Kölner Phonetik (alphabetic output) code for a word.
197 1
198 1
        Parameters
199 1
        ----------
200 1
        word : str
201 1
            The word to transform
202 1
203 1
        Returns
204 1
        -------
205
        str
206 1
            The Kölner Phonetik value as an alphabetic string
207
208 1
        Examples
209 1
        --------
210
        >>> pe = Koelner()
211 1
        >>> pe.encode_alpha('Smith')
212
        'SNT'
213 1
        >>> pe.encode_alpha('Schmidt')
214
        'SNT'
215
        >>> pe.encode_alpha('Müller')
216
        'NLR'
217
        >>> pe.encode_alpha('Zimmermann')
218
        'SNRNN'
219
220
221
        .. versionadded:: 0.1.0
222
        .. versionchanged:: 0.3.6
223
            Encapsulated in class
224
225
        """
226
        num = ''.join(c for c in self.encode(word) if c in self._num_set)
227
        return num.translate(self._num_trans)
228
229
230
if __name__ == '__main__':
231
    import doctest
232
233
    doctest.testmod()
234