Completed
Push — master ( f43547...71985b )
by Chris
12:00 queued 10s
created

abydos.phonetic._russell_index   A

Complexity

Total Complexity 10

Size/Duplication

Total Lines 268
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
eloc 52
dl 0
loc 268
ccs 38
cts 38
cp 1
rs 10
c 0
b 0
f 0
wmc 10

3 Functions

Rating   Name   Duplication   Size   Complexity  
A russell_index_num_to_alpha() 0 26 1
A russell_index() 0 28 1
A russell_index_alpha() 0 28 1

3 Methods

Rating   Name   Duplication   Size   Complexity  
A RussellIndex.encode_alpha() 0 32 2
A RussellIndex._to_alpha() 0 31 2
A RussellIndex.encode() 0 45 3
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.phonetic._russell_index.
20
21
Robert C. Russell's Index
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from unicodedata import normalize as unicode_normalize
32
33 1
from six import text_type
34
35 1
from ._phonetic import _Phonetic
36
37 1
__all__ = [
38
    'RussellIndex',
39
    'russell_index',
40
    'russell_index_alpha',
41
    'russell_index_num_to_alpha',
42
]
43
44
45 1
class RussellIndex(_Phonetic):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
46
    """Russell Index.
47
48
    This follows Robert C. Russell's Index algorithm, as described in
49
    :cite:`Russell:1917`.
50
    """
51
52 1
    _uc_set = set('ABCDEFGIKLMNOPQRSTUVXYZ')
53
54 1
    _trans = dict(
55
        zip(
56
            (ord(_) for _ in 'ABCDEFGIKLMNOPQRSTUVXYZ'),
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable _ does not seem to be defined.
Loading history...
57
            '12341231356712383412313',
58
        )
59
    )
60 1
    _num_trans = dict(zip((ord(_) for _ in '12345678'), 'ABCDLMNR'))
61
62 1
    _num_set = set('12345678')
63
64 1
    def encode(self, word):
65
        """Return the Russell Index (integer output) of a word.
66
67
        Parameters
68
        ----------
69
        word : str
70
            The word to transform
71
72
        Returns
73
        -------
74
        int
75
            The Russell Index value
76
77
        Examples
78
        --------
79
        >>> pe = RussellIndex()
80
        >>> pe.encode('Christopher')
81
        3813428
82
        >>> pe.encode('Niall')
83
        715
84
        >>> pe.encode('Smith')
85
        3614
86
        >>> pe.encode('Schmidt')
87
        3614
88
89
        """
90 1
        word = unicode_normalize('NFKD', text_type(word.upper()))
91 1
        word = word.replace('ß', 'SS')
92 1
        word = word.replace('GH', '')  # discard gh (rule 3)
93 1
        word = word.rstrip('SZ')  # discard /[sz]$/ (rule 3)
94
95
        # translate according to Russell's mapping
96 1
        word = ''.join(c for c in word if c in self._uc_set)
97 1
        sdx = word.translate(self._trans)
98
99
        # remove any 1s after the first occurrence
100 1
        one = sdx.find('1') + 1
101 1
        if one:
102 1
            sdx = sdx[:one] + ''.join(c for c in sdx[one:] if c != '1')
103
104
        # remove repeating characters
105 1
        sdx = self._delete_consecutive_repeats(sdx)
106
107
        # return as an int
108 1
        return int(sdx) if sdx else float('NaN')
109
110 1
    def _to_alpha(self, num):
111
        """Convert the Russell Index integer to an alphabetic string.
112
113
        This follows Robert C. Russell's Index algorithm, as described in
114
        :cite:`Russell:1917`.
115
116
        Parameters
117
        ----------
118
        num : int
119
            A Russell Index integer value
120
121
        Returns
122
        -------
123
        str
124
            The Russell Index as an alphabetic string
125
126
        Examples
127
        --------
128
        >>> pe = RussellIndex()
129
        >>> pe._to_alpha(3813428)
130
        'CRACDBR'
131
        >>> pe._to_alpha(715)
132
        'NAL'
133
        >>> pe._to_alpha(3614)
134
        'CMAD'
135
136
        """
137 1
        num = ''.join(c for c in text_type(num) if c in self._num_set)
138 1
        if num:
139 1
            return num.translate(self._num_trans)
140 1
        return ''
141
142 1
    def encode_alpha(self, word):
143
        """Return the Russell Index (alphabetic output) for the word.
144
145
        This follows Robert C. Russell's Index algorithm, as described in
146
        :cite:`Russell:1917`.
147
148
        Parameters
149
        ----------
150
        word : str
151
            The word to transform
152
153
        Returns
154
        -------
155
        str
156
            The Russell Index value as an alphabetic string
157
158
        Examples
159
        --------
160
        >>> pe = RussellIndex()
161
        >>> pe.encode_alpha('Christopher')
162
        'CRACDBR'
163
        >>> pe.encode_alpha('Niall')
164
        'NAL'
165
        >>> pe.encode_alpha('Smith')
166
        'CMAD'
167
        >>> pe.encode_alpha('Schmidt')
168
        'CMAD'
169
170
        """
171 1
        if word:
172 1
            return self._to_alpha(self.encode(word))
173 1
        return ''
174
175
176 1
def russell_index(word):
177
    """Return the Russell Index (integer output) of a word.
178
179
    This is a wrapper for :py:meth:`RussellIndex.encode`.
180
181
    Parameters
182
    ----------
183
    word : str
184
        The word to transform
185
186
    Returns
187
    -------
188
    int
189
        The Russell Index value
190
191
    Examples
192
    --------
193
    >>> russell_index('Christopher')
194
    3813428
195
    >>> russell_index('Niall')
196
    715
197
    >>> russell_index('Smith')
198
    3614
199
    >>> russell_index('Schmidt')
200
    3614
201
202
    """
203 1
    return RussellIndex().encode(word)
204
205
206 1
def russell_index_num_to_alpha(num):
207
    """Convert the Russell Index integer to an alphabetic string.
208
209
    This is a wrapper for :py:meth:`RussellIndex._to_alpha`.
210
211
    Parameters
212
    ----------
213
    num : int
214
        A Russell Index integer value
215
216
    Returns
217
    -------
218
    str
219
        The Russell Index as an alphabetic string
220
221
    Examples
222
    --------
223
    >>> russell_index_num_to_alpha(3813428)
224
    'CRACDBR'
225
    >>> russell_index_num_to_alpha(715)
226
    'NAL'
227
    >>> russell_index_num_to_alpha(3614)
228
    'CMAD'
229
230
    """
231 1
    return RussellIndex()._to_alpha(num)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _to_alpha was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
232
233
234 1
def russell_index_alpha(word):
235
    """Return the Russell Index (alphabetic output) for the word.
236
237
    This is a wrapper for :py:meth:`RussellIndex.encode_alpha`.
238
239
    Parameters
240
    ----------
241
    word : str
242
        The word to transform
243
244
    Returns
245
    -------
246
    str
247
        The Russell Index value as an alphabetic string
248
249
    Examples
250
    --------
251
    >>> russell_index_alpha('Christopher')
252
    'CRACDBR'
253
    >>> russell_index_alpha('Niall')
254
    'NAL'
255
    >>> russell_index_alpha('Smith')
256
    'CMAD'
257
    >>> russell_index_alpha('Schmidt')
258
    'CMAD'
259
260
    """
261 1
    return RussellIndex().encode_alpha(word)
262
263
264
if __name__ == '__main__':
265
    import doctest
266
267
    doctest.testmod()
268