Completed
Push — master ( 6ed6e1...91db7a )
by Chris
13:26
created

abydos.phonetic.russell.russell_index_alpha()   A

Complexity

Conditions 2

Size

Total Lines 22
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 2

Importance

Changes 0
Metric Value
cc 2
eloc 4
nop 1
dl 0
loc 22
ccs 4
cts 4
cp 1
crap 2
rs 10
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.phonetic.russell.
20
21
The phonetic.russell module implements Robert C. Russell's Index.
22
"""
23
24 1
from __future__ import unicode_literals
25
26 1
from unicodedata import normalize as unicode_normalize
27
28 1
from six import text_type
29
30 1
from . import _delete_consecutive_repeats
31
32 1
__all__ = [
33
    'russell_index',
34
    'russell_index_alpha',
35
    'russell_index_num_to_alpha',
36
]
37
38
39 1
def russell_index(word):
40
    """Return the Russell Index (integer output) of a word.
41
42
    This follows Robert C. Russell's Index algorithm, as described in
43
    :cite:`Russell:1917`.
44
45
    :param str word: the word to transform
46
    :returns: the Russell Index value
47
    :rtype: int
48
49
    >>> russell_index('Christopher')
50
    3813428
51
    >>> russell_index('Niall')
52
    715
53
    >>> russell_index('Smith')
54
    3614
55
    >>> russell_index('Schmidt')
56
    3614
57
    """
58 1
    _russell_translation = dict(
59
        zip(
60
            (ord(_) for _ in 'ABCDEFGIKLMNOPQRSTUVXYZ'),
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable _ does not seem to be defined.
Loading history...
61
            '12341231356712383412313',
62
        )
63
    )
64
65 1
    word = unicode_normalize('NFKD', text_type(word.upper()))
66 1
    word = word.replace('ß', 'SS')
67 1
    word = word.replace('GH', '')  # discard gh (rule 3)
68 1
    word = word.rstrip('SZ')  # discard /[sz]$/ (rule 3)
69
70
    # translate according to Russell's mapping
71 1
    word = ''.join(
72
        c
73
        for c in word
74
        if c
75
        in {
76
            'A',
77
            'B',
78
            'C',
79
            'D',
80
            'E',
81
            'F',
82
            'G',
83
            'I',
84
            'K',
85
            'L',
86
            'M',
87
            'N',
88
            'O',
89
            'P',
90
            'Q',
91
            'R',
92
            'S',
93
            'T',
94
            'U',
95
            'V',
96
            'X',
97
            'Y',
98
            'Z',
99
        }
100
    )
101 1
    sdx = word.translate(_russell_translation)
102
103
    # remove any 1s after the first occurrence
104 1
    one = sdx.find('1') + 1
105 1
    if one:
106 1
        sdx = sdx[:one] + ''.join(c for c in sdx[one:] if c != '1')
107
108
    # remove repeating characters
109 1
    sdx = _delete_consecutive_repeats(sdx)
110
111
    # return as an int
112 1
    return int(sdx) if sdx else float('NaN')
113
114
115 1
def russell_index_num_to_alpha(num):
116
    """Convert the Russell Index integer to an alphabetic string.
117
118
    This follows Robert C. Russell's Index algorithm, as described in
119
    :cite:`Russell:1917`.
120
121
    :param int num: a Russell Index integer value
122
    :returns: the Russell Index as an alphabetic string
123
    :rtype: str
124
125
    >>> russell_index_num_to_alpha(3813428)
126
    'CRACDBR'
127
    >>> russell_index_num_to_alpha(715)
128
    'NAL'
129
    >>> russell_index_num_to_alpha(3614)
130
    'CMAD'
131
    """
132 1
    _russell_num_translation = dict(
133
        zip((ord(_) for _ in '12345678'), 'ABCDLMNR')
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable _ does not seem to be defined.
Loading history...
134
    )
135 1
    num = ''.join(
136
        c
137
        for c in text_type(num)
138
        if c in {'1', '2', '3', '4', '5', '6', '7', '8'}
139
    )
140 1
    if num:
141 1
        return num.translate(_russell_num_translation)
142 1
    return ''
143
144
145 1
def russell_index_alpha(word):
146
    """Return the Russell Index (alphabetic output) for the word.
147
148
    This follows Robert C. Russell's Index algorithm, as described in
149
    :cite:`Russell:1917`.
150
151
    :param str word: the word to transform
152
    :returns: the Russell Index value as an alphabetic string
153
    :rtype: str
154
155
    >>> russell_index_alpha('Christopher')
156
    'CRACDBR'
157
    >>> russell_index_alpha('Niall')
158
    'NAL'
159
    >>> russell_index_alpha('Smith')
160
    'CMAD'
161
    >>> russell_index_alpha('Schmidt')
162
    'CMAD'
163
    """
164 1
    if word:
165 1
        return russell_index_num_to_alpha(russell_index(word))
166 1
    return ''
167
168
169
if __name__ == '__main__':
170
    import doctest
171
172
    doctest.testmod()
173