Completed
Push — master ( f43547...71985b )
by Chris
12:00 queued 10s
created

abydos.phonetic._roger_root.RogerRoot.encode()   B

Complexity

Conditions 7

Size

Total Lines 60
Code Lines 22

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 22
CRAP Score 7

Importance

Changes 0
Metric Value
cc 7
eloc 22
nop 4
dl 0
loc 60
ccs 22
cts 22
cp 1
crap 7
rs 7.952
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.phonetic._roger_root.
20
21
Roger Root phonetic algorithm
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from unicodedata import normalize as unicode_normalize
32
33 1
from six import text_type
34 1
from six.moves import range
35
36 1
from ._phonetic import _Phonetic
37
38 1
__all__ = ['RogerRoot', 'roger_root']
39
40
41 1
class RogerRoot(_Phonetic):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
42
    """Roger Root code.
43
44
    This is Roger Root name coding, described in :cite:`Moore:1977`.
45
    """
46
47
    # '*' is used to prevent combining by _delete_consecutive_repeats()
48 1
    _init_patterns = {
49
        4: {'TSCH': '06'},
50
        3: {'TSH': '06', 'SCH': '06'},
51
        2: {
52
            'CE': '0*0',
53
            'CH': '06',
54
            'CI': '0*0',
55
            'CY': '0*0',
56
            'DG': '07',
57
            'GF': '08',
58
            'GM': '03',
59
            'GN': '02',
60
            'KN': '02',
61
            'PF': '08',
62
            'PH': '08',
63
            'PN': '02',
64
            'SH': '06',
65
            'TS': '0*0',
66
            'WR': '04',
67
        },
68
        1: {
69
            'A': '1',
70
            'B': '09',
71
            'C': '07',
72
            'D': '01',
73
            'E': '1',
74
            'F': '08',
75
            'G': '07',
76
            'H': '2',
77
            'I': '1',
78
            'J': '3',
79
            'K': '07',
80
            'L': '05',
81
            'M': '03',
82
            'N': '02',
83
            'O': '1',
84
            'P': '09',
85
            'Q': '07',
86
            'R': '04',
87
            'S': '0*0',
88
            'T': '01',
89
            'U': '1',
90
            'V': '08',
91
            'W': '4',
92
            'X': '07',
93
            'Y': '5',
94
            'Z': '0*0',
95
        },
96
    }
97
98 1
    _med_patterns = {
99
        4: {'TSCH': '6'},
100
        3: {'TSH': '6', 'SCH': '6'},
101
        2: {
102
            'CE': '0',
103
            'CH': '6',
104
            'CI': '0',
105
            'CY': '0',
106
            'DG': '7',
107
            'PH': '8',
108
            'SH': '6',
109
            'TS': '0',
110
        },
111
        1: {
112
            'B': '9',
113
            'C': '7',
114
            'D': '1',
115
            'F': '8',
116
            'G': '7',
117
            'J': '6',
118
            'K': '7',
119
            'L': '5',
120
            'M': '3',
121
            'N': '2',
122
            'P': '9',
123
            'Q': '7',
124
            'R': '4',
125
            'S': '0',
126
            'T': '1',
127
            'V': '8',
128
            'X': '7',
129
            'Z': '0',
130
            'A': '*',
131
            'E': '*',
132
            'H': '*',
133
            'I': '*',
134
            'O': '*',
135
            'U': '*',
136
            'W': '*',
137
            'Y': '*',
138
        },
139
    }
140
141 1
    def encode(self, word, max_length=5, zero_pad=True):
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'encode' method
Loading history...
142
        """Return the Roger Root code for a word.
143
144
        Parameters
145
        ----------
146
        word : str
147
            The word to transform
148
        max_length : int
149
            The maximum length (default 5) of the code to return
150
        zero_pad : bool
151
            Pad the end of the return value with 0s to achieve a max_length
152
            string
153
154
        Returns
155
        -------
156
        str
157
            The Roger Root code
158
159
        Examples
160
        --------
161
        >>> roger_root('Christopher')
162
        '06401'
163
        >>> roger_root('Niall')
164
        '02500'
165
        >>> roger_root('Smith')
166
        '00310'
167
        >>> roger_root('Schmidt')
168
        '06310'
169
170
        """
171
        # uppercase, normalize, decompose, and filter non-A-Z out
172 1
        word = unicode_normalize('NFKD', text_type(word.upper()))
173 1
        word = word.replace('ß', 'SS')
174 1
        word = ''.join(c for c in word if c in self._uc_set)
175
176 1
        code = ''
177 1
        pos = 0
178
179
        # Do first digit(s) first
180 1
        for num in range(4, 0, -1):
181 1
            if word[:num] in self._init_patterns[num]:
182 1
                code = self._init_patterns[num][word[:num]]
183 1
                pos += num
184 1
                break
185
186
        # Then code subsequent digits
187 1
        while pos < len(word):
188 1
            for num in range(4, 0, -1):  # pragma: no branch
189 1
                if word[pos : pos + num] in self._med_patterns[num]:
190 1
                    code += self._med_patterns[num][word[pos : pos + num]]
191 1
                    pos += num
192 1
                    break
193
194 1
        code = self._delete_consecutive_repeats(code)
195 1
        code = code.replace('*', '')
196
197 1
        if zero_pad:
198 1
            code += '0' * max_length
199
200 1
        return code[:max_length]
201
202
203 1
def roger_root(word, max_length=5, zero_pad=True):
204
    """Return the Roger Root code for a word.
205
206
    This is a wrapper for :py:meth:`RogerRoot.encode`.
207
208
    Parameters
209
    ----------
210
    word : str
211
        The word to transform
212
    max_length : int
213
        The maximum length (default 5) of the code to return
214
    zero_pad : bool
215
        Pad the end of the return value with 0s to achieve a max_length string
216
217
    Returns
218
    -------
219
    str
220
        The Roger Root code
221
222
    Examples
223
    --------
224
    >>> roger_root('Christopher')
225
    '06401'
226
    >>> roger_root('Niall')
227
    '02500'
228
    >>> roger_root('Smith')
229
    '00310'
230
    >>> roger_root('Schmidt')
231
    '06310'
232
233
    """
234 1
    return RogerRoot().encode(word, max_length, zero_pad)
235
236
237
if __name__ == '__main__':
238
    import doctest
239
240
    doctest.testmod()
241