abydos.phonetic._phonex   A
last analyzed

Complexity

Total Complexity 35

Size/Duplication

Total Lines 210
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 35
eloc 74
dl 0
loc 210
ccs 67
cts 67
cp 1
rs 9.6
c 0
b 0
f 0

3 Methods

Rating   Name   Duplication   Size   Complexity  
F Phonex.encode() 0 111 32
A Phonex.__init__() 0 21 2
A Phonex.encode_alpha() 0 31 1
1
# Copyright 2014-2020 by Christopher C. Little.
2
# This file is part of Abydos.
3
#
4
# Abydos is free software: you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# Abydos is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17
"""abydos.phonetic._phonex.
18
19 1
Phonex
20
"""
21
22
from unicodedata import normalize as unicode_normalize
23
24 1
from ._phonetic import _Phonetic
25
26
__all__ = ['Phonex']
27
28
29
class Phonex(_Phonetic):
30
    """Phonex code.
31 1
32
    Phonex is an algorithm derived from Soundex, defined in :cite:`Lait:1996`.
33 1
34
    .. versionadded:: 0.3.6
35 1
    """
36 1
37
    _alphabetic = dict(zip((ord(_) for _ in '123456'), 'PSTLNR'))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable _ does not seem to be defined.
Loading history...
38 1
39 1
    def __init__(self, max_length: int = 4, zero_pad: bool = True) -> None:
40
        """Initialize Phonex instance.
41 1
42
        Parameters
43
        ----------
44 1
        max_length : int
45
            The length of the code returned (defaults to 4)
46
        zero_pad : bool
47
            Pad the end of the return value with 0s to achieve a max_length
48
            string
49
50
51
        .. versionadded:: 0.4.0
52 1
53
        """
54 1
        # Clamp max_length to [4, 64]
55
        if max_length != -1:
56
            self._max_length = min(max(4, max_length), 64)
57
        else:
58
            self._max_length = 64
59
        self._zero_pad = zero_pad
60
61
    def encode_alpha(self, word: str) -> str:
62
        """Return the alphabetic Phonex code for a word.
63
64
        Parameters
65
        ----------
66
        word : str
67
            The word to transform
68
69
        Returns
70 1
        -------
71 1
        str
72
            The alphabetic Phonex value
73 1
74 1
        Examples
75
        --------
76 1
        >>> pe = Phonex()
77
        >>> pe.encode_alpha('Christopher')
78
        'CRST'
79
        >>> pe.encode_alpha('Niall')
80
        'NL'
81
        >>> pe.encode_alpha('Smith')
82
        'SNT'
83
        >>> pe.encode_alpha('Schmidt')
84
        'SSNT'
85
86
87
        .. versionadded:: 0.4.0
88
89
        """
90
        code = self.encode(word).rstrip('0')
91
        return code[:1] + code[1:].translate(self._alphabetic)
92
93
    def encode(self, word: str) -> str:
94
        """Return the Phonex code for a word.
95
96
        Parameters
97
        ----------
98
        word : str
99
            The word to transform
100
101
        Returns
102
        -------
103
        str
104
            The Phonex value
105 1
106 1
        Examples
107
        --------
108 1
        >>> pe = Phonex()
109
        >>> pe.encode('Christopher')
110
        'C623'
111
        >>> pe.encode('Niall')
112
        'N400'
113
        >>> pe.encode('Schmidt')
114
        'S253'
115
        >>> pe.encode('Smith')
116
        'S530'
117
118
119
        .. versionadded:: 0.1.0
120
        .. versionchanged:: 0.3.6
121
            Encapsulated in class
122
123
        """
124
        name = unicode_normalize('NFKD', word.upper())
125
126
        name_code = last = ''
127
128
        # Deletions effected by replacing with next letter which
129
        # will be ignored due to duplicate handling of Soundex code.
130
        # This is faster than 'moving' all subsequent letters.
131
132
        # Remove any trailing Ss
133
        while name[-1:] == 'S':
134
            name = name[:-1]
135
136
        # Phonetic equivalents of first 2 characters
137
        # Works since duplicate letters are ignored
138
        if name[:2] == 'KN':
139 1
            name = 'N' + name[2:]  # KN.. == N..
140 1
        elif name[:2] == 'PH':
141
            name = 'F' + name[2:]  # PH.. == F.. (H ignored anyway)
142 1
        elif name[:2] == 'WR':
143
            name = 'R' + name[2:]  # WR.. == R..
144
145
        if name:
146
            # Special case, ignore H first letter (subsequent Hs ignored
147
            # anyway)
148
            # Works since duplicate letters are ignored
149 1
            if name[0] == 'H':
150 1
                name = name[1:]
151
152
        if name:
153
            # Phonetic equivalents of first character
154 1
            if name[0] in self._uc_vy_set:
155 1
                name = 'A' + name[1:]
156 1
            elif name[0] in {'B', 'P'}:
157 1
                name = 'B' + name[1:]
158 1
            elif name[0] in {'V', 'F'}:
159 1
                name = 'F' + name[1:]
160
            elif name[0] in {'C', 'K', 'Q'}:
161 1
                name = 'C' + name[1:]
162
            elif name[0] in {'G', 'J'}:
163
                name = 'G' + name[1:]
164
            elif name[0] in {'S', 'Z'}:
165 1
                name = 'S' + name[1:]
166 1
167
            name_code = last = name[0]
168 1
169
        # Modified Soundex code
170 1
        for i in range(1, len(name)):
171 1
            code = '0'
172 1
            if name[i] in {'B', 'F', 'P', 'V'}:
173 1
                code = '1'
174 1
            elif name[i] in {'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z'}:
175 1
                code = '2'
176 1
            elif name[i] in {'D', 'T'}:
177 1
                if name[i + 1 : i + 2] != 'C':
178 1
                    code = '3'
179 1
            elif name[i] == 'L':
180 1
                if name[i + 1 : i + 2] in self._uc_vy_set or i + 1 == len(
181 1
                    name
182
                ):
183 1
                    code = '4'
184
            elif name[i] in {'M', 'N'}:
185
                if name[i + 1 : i + 2] in {'D', 'G'}:
186 1
                    name = name[: i + 1] + name[i] + name[i + 2 :]
187 1
                code = '5'
188 1
            elif name[i] == 'R':
189 1
                if name[i + 1 : i + 2] in self._uc_vy_set or i + 1 == len(
190 1
                    name
191 1
                ):
192 1
                    code = '6'
193 1
194 1
            if code != last and code != '0' and i != 0:
195 1
                name_code += code
196 1
197
            last = name_code[-1]
198
199 1
        if self._zero_pad:
200 1
            name_code += '0' * self._max_length
201 1
        if not name_code:
202 1
            name_code = '0'
203 1
        return name_code[: self._max_length]
204 1
205 1
206
if __name__ == '__main__':
207
    import doctest
208 1
209
    doctest.testmod()
210