abydos.phonetic._metaphone.Metaphone.__init__()   A
last analyzed

Complexity

Conditions 2

Size

Total Lines 18
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 2

Importance

Changes 0
Metric Value
eloc 4
dl 0
loc 18
ccs 4
cts 4
cp 1
rs 10
c 0
b 0
f 0
cc 2
nop 2
crap 2
1
# Copyright 2014-2020 by Christopher C. Little.
2
# This file is part of Abydos.
3
#
4
# Abydos is free software: you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# Abydos is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17
"""abydos.phonetic._metaphone.
18
19 1
Metaphone
20
"""
21
22
from ._phonetic import _Phonetic
23
24 1
__all__ = ['Metaphone']
25
26
27
class Metaphone(_Phonetic):
28
    """Metaphone.
29
30
    Based on Lawrence Philips' Pick BASIC code from 1990 :cite:`Philips:1990`,
31 1
    as described in :cite:`Philips:1990b`.
32
    This incorporates some corrections to the above code, particularly
33 1
    some of those suggested by Michael Kuhn in :cite:`Kuhn:1995`.
34
35 1
    .. versionadded:: 0.3.6
36 1
    """
37
38 1
    _frontv = {'E', 'I', 'Y'}
39
    _varson = {'C', 'G', 'P', 'S', 'T'}
40
41 1
    def __init__(self, max_length: int = -1) -> None:
42
        """Initialize AlphaSIS instance.
43
44
        Parameters
45
        ----------
46
        max_length : int
47
            The maximum length of the returned Metaphone code (defaults to 64,
48
            but in Philips' original implementation this was 4)
49
50
51
        .. versionadded:: 0.4.0
52 1
53 1
        """
54
        # Require a max_length of at least 4
55 1
        if max_length != -1:
56
            self._max_length = max(4, max_length)
57
        else:
58
            self._max_length = 64
59
60
    def encode(self, word: str) -> str:
61
        """Return the Metaphone code for a word.
62
63
        Based on Lawrence Philips' Pick BASIC code from 1990
64
        :cite:`Philips:1990`, as described in :cite:`Philips:1990b`.
65
        This incorporates some corrections to the above code, particularly
66
        some of those suggested by Michael Kuhn in :cite:`Kuhn:1995`.
67
68
        Parameters
69 1
        ----------
70 1
        word : str
71
            The word to transform
72 1
73
        Returns
74 1
        -------
75
        str
76
            The Metaphone value
77
78
        Examples
79
        --------
80
        >>> pe = Metaphone()
81
        >>> pe.encode('Christopher')
82
        'KRSTFR'
83
        >>> pe.encode('Niall')
84
        'NL'
85
        >>> pe.encode('Smith')
86
        'SM0'
87
        >>> pe.encode('Schmidt')
88
        'SKMTT'
89
90
91
        .. versionadded:: 0.1.0
92
        .. versionchanged:: 0.3.6
93
            Encapsulated in class
94
95
        """
96
        # As in variable sound--those modified by adding an "h"
97
        ename = ''.join(c for c in word.upper() if c.isalnum())
98
99
        # Delete non-alphanumeric characters and make all caps
100
        if not ename:
101
            return ''
102
        if ename[0:2] in {'PN', 'AE', 'KN', 'GN', 'WR'}:
103
            ename = ename[1:]
104
        elif ename[0] == 'X':
105
            ename = 'S' + ename[1:]
106
        elif ename[0:2] == 'WH':
107
            ename = 'W' + ename[2:]
108
109
        # Convert to metaphone
110
        elen = len(ename) - 1
111 1
        metaph = ''
112 1
        for i in range(len(ename)):
113
            if len(metaph) >= self._max_length:
114
                break
115 1
            if (
116 1
                ename[i] not in {'G', 'T'}
117 1
                and i > 0
118 1
                and ename[i - 1] == ename[i]
119 1
            ):
120 1
                continue
121 1
122 1
            if ename[i] in self._uc_v_set and i == 0:
123
                metaph = ename[i]
124
125 1
            elif ename[i] == 'B':
126 1
                if i != elen or ename[i - 1] != 'M':
127 1
                    metaph += ename[i]
128 1
129 1
            elif ename[i] == 'C':
130 1
                if not (
131
                    i > 0
132
                    and ename[i - 1] == 'S'
133
                    and ename[i + 1 : i + 2] in self._frontv
134
                ):
135 1
                    if ename[i + 1 : i + 3] == 'IA':
136
                        metaph += 'X'
137 1
                    elif ename[i + 1 : i + 2] in self._frontv:
138 1
                        metaph += 'S'
139
                    elif i > 0 and ename[i - 1 : i + 2] == 'SCH':
140 1
                        metaph += 'K'
141 1
                    elif ename[i + 1 : i + 2] == 'H':
142 1
                        if (
143
                            i == 0
144 1
                            and i + 1 < elen
145 1
                            and ename[i + 2 : i + 3] not in self._uc_v_set
146
                        ):
147
                            metaph += 'K'
148
                        else:
149
                            metaph += 'X'
150 1
                    else:
151 1
                        metaph += 'K'
152 1
153 1
            elif ename[i] == 'D':
154 1
                if (
155 1
                    ename[i + 1 : i + 2] == 'G'
156 1
                    and ename[i + 2 : i + 3] in self._frontv
157 1
                ):
158
                    metaph += 'J'
159
                else:
160
                    metaph += 'T'
161
162 1
            elif ename[i] == 'G':
163
                if ename[i + 1 : i + 2] == 'H' and not (
164 1
                    i + 1 == elen or ename[i + 2 : i + 3] not in self._uc_v_set
165
                ):
166 1
                    continue
167
                elif i > 0 and (
168 1
                    (i + 1 == elen and ename[i + 1] == 'N')
169 1
                    or (i + 3 == elen and ename[i + 1 : i + 4] == 'NED')
170
                ):
171
                    continue
172
                elif (
173 1
                    i - 1 > 0
174
                    and i + 1 <= elen
175 1
                    and ename[i - 1] == 'D'
176
                    and ename[i + 1] in self._frontv
177 1
                ):
178 1
                    continue
179
                elif ename[i + 1 : i + 2] == 'G':
180
                    continue
181 1
                elif ename[i + 1 : i + 2] in self._frontv:
182 1
                    if i == 0 or ename[i - 1] != 'G':
183
                        metaph += 'J'
184
                    else:
185
                        metaph += 'K'
186 1
                else:
187 1
                    metaph += 'K'
188
189
            elif ename[i] == 'H':
190
                if (
191
                    i > 0
192
                    and ename[i - 1] in self._uc_v_set
193 1
                    and ename[i + 1 : i + 2] not in self._uc_v_set
194 1
                ):
195 1
                    continue
196 1
                elif i > 0 and ename[i - 1] in self._varson:
197 1
                    continue
198 1
                else:
199
                    metaph += 'H'
200 1
201
            elif ename[i] in {'F', 'J', 'L', 'M', 'N', 'R'}:
202 1
                metaph += ename[i]
203
204 1
            elif ename[i] == 'K':
205 1
                if i > 0 and ename[i - 1] == 'C':
206
                    continue
207
                else:
208
                    metaph += 'K'
209
210 1
            elif ename[i] == 'P':
211 1
                if ename[i + 1 : i + 2] == 'H':
212 1
                    metaph += 'F'
213
                else:
214 1
                    metaph += 'P'
215
216 1
            elif ename[i] == 'Q':
217 1
                metaph += 'K'
218
219 1
            elif ename[i] == 'S':
220 1
                if (
221 1
                    i > 0
222
                    and i + 2 <= elen
223 1
                    and ename[i + 1] == 'I'
224
                    and ename[i + 2] in 'OA'
225 1
                ):
226 1
                    metaph += 'X'
227 1
                elif ename[i + 1 : i + 2] == 'H':
228
                    metaph += 'X'
229 1
                else:
230
                    metaph += 'S'
231 1
232 1
            elif ename[i] == 'T':
233
                if (
234 1
                    i > 0
235 1
                    and i + 2 <= elen
236
                    and ename[i + 1] == 'I'
237
                    and ename[i + 2] in {'A', 'O'}
238
                ):
239
                    metaph += 'X'
240
                elif ename[i + 1 : i + 2] == 'H':
241 1
                    metaph += '0'
242 1
                elif ename[i + 1 : i + 3] != 'CH':
243 1
                    if ename[i - 1 : i] != 'T':
244
                        metaph += 'T'
245 1
246
            elif ename[i] == 'V':
247 1
                metaph += 'F'
248 1
249
            elif ename[i] in 'WY':
250
                if ename[i + 1 : i + 2] in self._uc_v_set:
251
                    metaph += ename[i]
252
253
            elif ename[i] == 'X':
254 1
                metaph += 'KS'
255 1
256 1
            elif ename[i] == 'Z':
257 1
                metaph += 'S'
258 1
259 1
        return metaph
260
261 1
262 1
if __name__ == '__main__':
263
    import doctest
264 1
265
    doctest.testmod()
266