Completed
Pull Request — master (#225)
by Chris
09:15
created

abydos.phonetic._phonic   A

Complexity

Total Complexity 9

Size/Duplication

Total Lines 203
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 9
eloc 74
dl 0
loc 203
ccs 43
cts 43
cp 1
rs 10
c 0
b 0
f 0

3 Methods

Rating   Name   Duplication   Size   Complexity  
B PHONIC.encode() 0 55 6
A PHONIC.encode_alpha() 0 37 1
A PHONIC.__init__() 0 26 2
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2019 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.phonetic._phonic.
20
21
Phonic
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from ._phonetic import _Phonetic
32
33 1
__all__ = ['PHONIC']
34
35
36 1
class PHONIC(_Phonetic):
37
    """PHONIC code.
38
39
    PHONIC is a Soundex-like algorithm defined in :cite:`Taft:1970`.
40
41
42
    .. versionadded:: 0.4.1
43
    """
44
45 1
    _trans2 = {
46
        'CH': '6',
47
        'SH': '6',
48
        'PH': '8',
49
        'CE': '0',
50
        'CI': '0',
51
        'CY': '0',
52
    }
53
54 1
    _trans1 = {
55
        'D': '1',
56
        'T': '1',
57
        'N': '2',
58
        'M': '3',
59
        'R': '4',
60
        'L': '5',
61
        'J': '6',
62
        'C': '7',
63
        'K': '7',
64
        'G': '7',
65
        'Q': '7',
66
        'X': '7',
67
        'F': '8',
68
        'V': '8',
69
        'B': '9',
70
        'P': '9',
71
        'S': '0',
72
        'Z': '0',
73
    }
74
75 1
    _alphabetic = dict(zip((ord(_) for _ in '0123456789'), 'STNMRLJKFP'))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable _ does not seem to be defined.
Loading history...
76
77 1
    def __init__(self, max_length=5, zero_pad=True, extended=False):
78
        """Initialize PHONIC instance.
79
80
        Parameters
81
        ----------
82
        max_length : int
83
            The length of the code returned (defaults to 5)
84
        zero_pad : bool
85
            Pad the end of the return value with 0s to achieve a max_length
86
            string
87
        extended : bool
88
            If True, this uses Taft's 'Extended PHONIC coding' mode, which
89
            simply omits the first character of the code.
90
91
92
        .. versionadded:: 0.4.1
93
94
        """
95
        # Require a max_length of at least 5 and not more than 64
96 1
        if max_length != -1:
97 1
            self._max_length = min(max(5, max_length), 64)
98
        else:
99 1
            self._max_length = 64
100
101 1
        self._zero_pad = zero_pad
102 1
        self._extended = extended
103
104 1
    def encode_alpha(self, word):
105
        """Return the alphabetic PHONIC code for a word.
106
107
        Parameters
108
        ----------
109
        word : str
110
            The word to transform
111
112
        Returns
113
        -------
114
        str
115
            The alphabetic PHONIC value
116
117
        Examples
118
        --------
119
        >>> pe = PHONIC()
120
        >>> pe.encode_alpha('Christopher')
121
        'JRSTF'
122
        >>> pe.encode_alpha('Niall')
123
        'NL'
124
        >>> pe.encode_alpha('Smith')
125
        'SMT'
126
        >>> pe.encode_alpha('Schmidt')
127
        'SJMT'
128
129
130
        .. versionadded:: 0.4.1
131
132
        """
133 1
        save_pad = self._zero_pad
134 1
        save_ext = self._extended
135 1
        self._zero_pad = False
136 1
        self._extended = True
137 1
        code = self.encode(word)
138 1
        self._zero_pad = save_pad
139 1
        self._extended = save_ext
140 1
        return code.translate(self._alphabetic)
141
142 1
    def encode(self, word):
143
        """Return the PHONIC code for a word.
144
145
        Parameters
146
        ----------
147
        word : str
148
            The word to transform
149
150
        Returns
151
        -------
152
        str
153
            The PHONIC code
154
155
        Examples
156
        --------
157
        >>> pe = PHONIC()
158
        >>> pe.encode('Christopher')
159
        'C6401'
160
        >>> pe.encode('Niall')
161
        'N2500'
162
        >>> pe.encode('Smith')
163
        'S0310'
164
        >>> pe.encode('Schmidt')
165
        'S0631'
166
167
168
        .. versionadded:: 0.4.1
169
170
        """
171
        # uppercase
172 1
        word = word.upper()
173
174 1
        code = []
175 1
        pos = 0
176 1
        while pos < len(word):
177 1
            if word[pos : pos + 2] in self._trans2:
178 1
                code.append(self._trans2[word[pos : pos + 2]])
179 1
                pos += 1
180 1
            elif word[pos] in self._trans1:
181 1
                code.append(self._trans1[word[pos]])
182
            else:
183 1
                code.append('.')
184 1
            pos += 1
185
186 1
        code = ''.join(code)
187 1
        code = self._delete_consecutive_repeats(code)
188 1
        code = code.replace('.', '')
189
190 1
        if self._zero_pad:
191 1
            code += '0' * (self._max_length - 1 - len(code))
192
193 1
        if not self._extended:
194 1
            code = word[:1] + code
195
196 1
        return code[: self._max_length]
197
198
199
if __name__ == '__main__':
200
    import doctest
201
202
    doctest.testmod()
203