Completed
Branch master (78a222)
by Chris
14:36
created

abydos.phonetic._caverphone   A

Complexity

Total Complexity 36

Size/Duplication

Total Lines 207
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 36
eloc 133
dl 0
loc 207
ccs 82
cts 82
cp 1
rs 9.52
c 0
b 0
f 0

1 Function

Rating   Name   Duplication   Size   Complexity  
F caverphone() 0 172 36
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.phonetic._caverphone.
20
21
The phonetic._caverphone module implements the Caverphone phonetic algorithm.
22
"""
23
24 1
from __future__ import unicode_literals
25
26 1
__all__ = ['caverphone']
27
28
29 1
def caverphone(word, version=2):
30
    """Return the Caverphone code for a word.
31
32
    A description of version 1 of the algorithm can be found in
33
    :cite:`Hood:2002`.
34
35
    A description of version 2 of the algorithm can be found in
36
    :cite:`Hood:2004`.
37
38
    :param str word: the word to transform
39
    :param int version: the version of Caverphone to employ for encoding
40
        (defaults to 2)
41
    :returns: the Caverphone value
42
    :rtype: str
43
44
    >>> caverphone('Christopher')
45
    'KRSTFA1111'
46
    >>> caverphone('Niall')
47
    'NA11111111'
48
    >>> caverphone('Smith')
49
    'SMT1111111'
50
    >>> caverphone('Schmidt')
51
    'SKMT111111'
52
53
    >>> caverphone('Christopher', 1)
54
    'KRSTF1'
55
    >>> caverphone('Niall', 1)
56
    'N11111'
57
    >>> caverphone('Smith', 1)
58
    'SMT111'
59
    >>> caverphone('Schmidt', 1)
60
    'SKMT11'
61
    """
62 1
    _vowels = {'a', 'e', 'i', 'o', 'u'}
63
64 1
    word = word.lower()
65 1
    word = ''.join(
66
        c
67
        for c in word
68
        if c
69
        in {
70
            'a',
71
            'b',
72
            'c',
73
            'd',
74
            'e',
75
            'f',
76
            'g',
77
            'h',
78
            'i',
79
            'j',
80
            'k',
81
            'l',
82
            'm',
83
            'n',
84
            'o',
85
            'p',
86
            'q',
87
            'r',
88
            's',
89
            't',
90
            'u',
91
            'v',
92
            'w',
93
            'x',
94
            'y',
95
            'z',
96
        }
97
    )
98
99 1
    def _squeeze_replace(word, char):
100
        """Convert strings of char in word to one instance of new_char."""
101 1
        while char * 2 in word:
102 1
            word = word.replace(char * 2, char)
103 1
        return word.replace(char, char.upper())
104
105
    # the main replacement algorithm
106 1
    if version != 1 and word[-1:] == 'e':
107 1
        word = word[:-1]
108 1
    if word:
109 1
        if word[:5] == 'cough':
110 1
            word = 'cou2f' + word[5:]
111 1
        if word[:5] == 'rough':
112 1
            word = 'rou2f' + word[5:]
113 1
        if word[:5] == 'tough':
114 1
            word = 'tou2f' + word[5:]
115 1
        if word[:6] == 'enough':
116 1
            word = 'enou2f' + word[6:]
117 1
        if version != 1 and word[:6] == 'trough':
118 1
            word = 'trou2f' + word[6:]
119 1
        if word[:2] == 'gn':
120 1
            word = '2n' + word[2:]
121 1
        if word[-2:] == 'mb':
122 1
            word = word[:-1] + '2'
123 1
        for src, tar in (
124
            ('cq', '2q'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
125
            ('ci', 'si'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
126
            ('ce', 'se'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
127
            ('cy', 'sy'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
128
            ('tch', '2ch'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
129
            ('c', 'k'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
130
            ('q', 'k'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
131
            ('x', 'k'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
132
            ('v', 'f'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
133
            ('dg', '2g'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
134
            ('tio', 'sio'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
135
            ('tia', 'sia'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
136
            ('d', 't'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
137
            ('ph', 'fh'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
138
            ('b', 'p'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
139
            ('sh', 's2'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
140
            ('z', 's'),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
141
        ):
142 1
            word = word.replace(src, tar)
143 1
        if word[0] in _vowels:
144 1
            word = 'A' + word[1:]
145 1
        for vowel in 'aeiou':
146 1
            word = word.replace(vowel, '3')
147 1
        if version != 1:
148 1
            word = word.replace('j', 'y')
149 1
            if word[:2] == 'y3':
150 1
                word = 'Y3' + word[2:]
151 1
            if word[:1] == 'y':
152 1
                word = 'A' + word[1:]
153 1
            word = word.replace('y', '3')
154 1
        for src, tar in (('3gh3', '3kh3'), ('gh', '22'), ('g', 'k')):
155 1
            word = word.replace(src, tar)
156
157 1
        for char in 'stpkfmn':
158 1
            word = _squeeze_replace(word, char)
159
160 1
        word = word.replace('w3', 'W3')
161 1
        if version == 1:
162 1
            word = word.replace('wy', 'Wy')
163 1
        word = word.replace('wh3', 'Wh3')
164 1
        if version == 1:
165 1
            word = word.replace('why', 'Why')
166 1
        if version != 1 and word[-1:] == 'w':
167 1
            word = word[:-1] + '3'
168 1
        word = word.replace('w', '2')
169 1
        if word[:1] == 'h':
170 1
            word = 'A' + word[1:]
171 1
        word = word.replace('h', '2')
172 1
        word = word.replace('r3', 'R3')
173 1
        if version == 1:
174 1
            word = word.replace('ry', 'Ry')
175 1
        if version != 1 and word[-1:] == 'r':
176 1
            word = word[:-1] + '3'
177 1
        word = word.replace('r', '2')
178 1
        word = word.replace('l3', 'L3')
179 1
        if version == 1:
180 1
            word = word.replace('ly', 'Ly')
181 1
        if version != 1 and word[-1:] == 'l':
182 1
            word = word[:-1] + '3'
183 1
        word = word.replace('l', '2')
184 1
        if version == 1:
185 1
            word = word.replace('j', 'y')
186 1
            word = word.replace('y3', 'Y3')
187 1
            word = word.replace('y', '2')
188 1
        word = word.replace('2', '')
189 1
        if version != 1 and word[-1:] == '3':
190 1
            word = word[:-1] + 'A'
191 1
        word = word.replace('3', '')
192
193
    # pad with 1s, then extract the necessary length of code
194 1
    word += '1' * 10
195 1
    if version != 1:
196 1
        word = word[:10]
197
    else:
198 1
        word = word[:6]
199
200 1
    return word
201
202
203
if __name__ == '__main__':
204
    import doctest
205
206
    doctest.testmod()
207