Completed
Push — master ( f43547...71985b )
by Chris
12:00 queued 10s
created

abydos.phonetic._caverphone   A

Complexity

Total Complexity 37

Size/Duplication

Total Lines 249
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
eloc 110
dl 0
loc 249
ccs 85
cts 85
cp 1
rs 9.44
c 0
b 0
f 0
wmc 37

1 Method

Rating   Name   Duplication   Size   Complexity  
F Caverphone.encode() 0 156 36

1 Function

Rating   Name   Duplication   Size   Complexity  
A caverphone() 0 39 1
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.phonetic._caverphone.
20
21
Caverphone phonetic algorithm
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from ._phonetic import _Phonetic
32
33 1
__all__ = ['Caverphone', 'caverphone']
34
35
36 1
class Caverphone(_Phonetic):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
37
    """Caverphone.
38
39
    A description of version 1 of the algorithm can be found in
40
    :cite:`Hood:2002`.
41
42
    A description of version 2 of the algorithm can be found in
43
    :cite:`Hood:2004`.
44
    """
45
46 1
    def encode(self, word, version=2):
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'encode' method
Loading history...
47
        """Return the Caverphone code for a word.
48
49
        Parameters
50
        ----------
51
        word : str
52
            The word to transform
53
        version : int
54
            The version of Caverphone to employ for encoding (defaults to 2)
55
56
        Returns
57
        -------
58
        str
59
            The Caverphone value
60
61
        Examples
62
        --------
63
        >>> pe = Caverphone()
64
        >>> pe.encode('Christopher')
65
        'KRSTFA1111'
66
        >>> pe.encode('Niall')
67
        'NA11111111'
68
        >>> pe.encode('Smith')
69
        'SMT1111111'
70
        >>> pe.encode('Schmidt')
71
        'SKMT111111'
72
73
        >>> pe.encode('Christopher', 1)
74
        'KRSTF1'
75
        >>> pe.encode('Niall', 1)
76
        'N11111'
77
        >>> pe.encode('Smith', 1)
78
        'SMT111'
79
        >>> pe.encode('Schmidt', 1)
80
        'SKMT11'
81
82
        """
83 1
        word = word.lower()
84 1
        word = ''.join(c for c in word if c in self._lc_set)
85
86 1
        def _squeeze_replace(word, char):
87
            """Convert strings of char in word to one instance.
88
89
            Parameters
90
            ----------
91
            word : str
92
                The partially converted word
93
            char : str
94
                A character to 'squeeze'
95
96
            Returns
97
            -------
98
            str
99
                The word with instances of char squeezed down to one
100
101
            """
102 1
            while char * 2 in word:
103 1
                word = word.replace(char * 2, char)
104 1
            return word.replace(char, char.upper())
105
106
        # the main replacement algorithm
107 1
        if version != 1 and word[-1:] == 'e':
108 1
            word = word[:-1]
109 1
        if word:
110 1
            if word[:5] == 'cough':
111 1
                word = 'cou2f' + word[5:]
112 1
            if word[:5] == 'rough':
113 1
                word = 'rou2f' + word[5:]
114 1
            if word[:5] == 'tough':
115 1
                word = 'tou2f' + word[5:]
116 1
            if word[:6] == 'enough':
117 1
                word = 'enou2f' + word[6:]
118 1
            if version != 1 and word[:6] == 'trough':
119 1
                word = 'trou2f' + word[6:]
120 1
            if word[:2] == 'gn':
121 1
                word = '2n' + word[2:]
122 1
            if word[-2:] == 'mb':
123 1
                word = word[:-1] + '2'
124 1
            for src, tar in (
125
                ('cq', '2q'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
126
                ('ci', 'si'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
127
                ('ce', 'se'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
128
                ('cy', 'sy'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
129
                ('tch', '2ch'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
130
                ('c', 'k'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
131
                ('q', 'k'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
132
                ('x', 'k'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
133
                ('v', 'f'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
134
                ('dg', '2g'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
135
                ('tio', 'sio'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
136
                ('tia', 'sia'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
137
                ('d', 't'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
138
                ('ph', 'fh'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
139
                ('b', 'p'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
140
                ('sh', 's2'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
141
                ('z', 's'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
142
            ):
143 1
                word = word.replace(src, tar)
144 1
            if word[0] in self._lc_v_set:
145 1
                word = 'A' + word[1:]
146 1
            for vowel in 'aeiou':
147 1
                word = word.replace(vowel, '3')
148 1
            if version != 1:
149 1
                word = word.replace('j', 'y')
150 1
                if word[:2] == 'y3':
151 1
                    word = 'Y3' + word[2:]
152 1
                if word[:1] == 'y':
153 1
                    word = 'A' + word[1:]
154 1
                word = word.replace('y', '3')
155 1
            for src, tar in (('3gh3', '3kh3'), ('gh', '22'), ('g', 'k')):
156 1
                word = word.replace(src, tar)
157
158 1
            for char in 'stpkfmn':
159 1
                word = _squeeze_replace(word, char)
160
161 1
            word = word.replace('w3', 'W3')
162 1
            if version == 1:
163 1
                word = word.replace('wy', 'Wy')
164 1
            word = word.replace('wh3', 'Wh3')
165 1
            if version == 1:
166 1
                word = word.replace('why', 'Why')
167 1
            if version != 1 and word[-1:] == 'w':
168 1
                word = word[:-1] + '3'
169 1
            word = word.replace('w', '2')
170 1
            if word[:1] == 'h':
171 1
                word = 'A' + word[1:]
172 1
            word = word.replace('h', '2')
173 1
            word = word.replace('r3', 'R3')
174 1
            if version == 1:
175 1
                word = word.replace('ry', 'Ry')
176 1
            if version != 1 and word[-1:] == 'r':
177 1
                word = word[:-1] + '3'
178 1
            word = word.replace('r', '2')
179 1
            word = word.replace('l3', 'L3')
180 1
            if version == 1:
181 1
                word = word.replace('ly', 'Ly')
182 1
            if version != 1 and word[-1:] == 'l':
183 1
                word = word[:-1] + '3'
184 1
            word = word.replace('l', '2')
185 1
            if version == 1:
186 1
                word = word.replace('j', 'y')
187 1
                word = word.replace('y3', 'Y3')
188 1
                word = word.replace('y', '2')
189 1
            word = word.replace('2', '')
190 1
            if version != 1 and word[-1:] == '3':
191 1
                word = word[:-1] + 'A'
192 1
            word = word.replace('3', '')
193
194
        # pad with 1s, then extract the necessary length of code
195 1
        word += '1' * 10
196 1
        if version != 1:
197 1
            word = word[:10]
198
        else:
199 1
            word = word[:6]
200
201 1
        return word
202
203
204 1
def caverphone(word, version=2):
205
    """Return the Caverphone code for a word.
206
207
    This is a wrapper for :py:meth:`Caverphone.encode`.
208
209
    Parameters
210
    ----------
211
    word : str
212
        The word to transform
213
    version : int
214
        The version of Caverphone to employ for encoding (defaults to 2)
215
216
    Returns
217
    -------
218
    str
219
        The Caverphone value
220
221
    Examples
222
    --------
223
    >>> caverphone('Christopher')
224
    'KRSTFA1111'
225
    >>> caverphone('Niall')
226
    'NA11111111'
227
    >>> caverphone('Smith')
228
    'SMT1111111'
229
    >>> caverphone('Schmidt')
230
    'SKMT111111'
231
232
    >>> caverphone('Christopher', 1)
233
    'KRSTF1'
234
    >>> caverphone('Niall', 1)
235
    'N11111'
236
    >>> caverphone('Smith', 1)
237
    'SMT111'
238
    >>> caverphone('Schmidt', 1)
239
    'SKMT11'
240
241
    """
242 1
    return Caverphone().encode(word, version)
243
244
245
if __name__ == '__main__':
246
    import doctest
247
248
    doctest.testmod()
249