Test Failed
Push — master ( 64abe2...a464fa )
by Chris
04:02 queued 11s
created

abydos.phonetic.caverphone   A

Complexity

Total Complexity 32

Size/Duplication

Total Lines 183
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 112
dl 0
loc 183
rs 9.84
c 0
b 0
f 0
wmc 32

1 Function

Rating   Name   Duplication   Size   Complexity  
F caverphone() 0 149 32
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
"""abydos.phonetic.caverphone.
20
21
The phonetic.caverphone module implements the Caverphone phonetic algorithm.
22
"""
23
24
from __future__ import unicode_literals
25
26
__all__ = ['caverphone']
27
28
29
def caverphone(word, version=2):
30
    """Return the Caverphone code for a word.
31
32
    A description of version 1 of the algorithm can be found in
33
    :cite:`Hood:2002`.
34
35
    A description of version 2 of the algorithm can be found in
36
    :cite:`Hood:2004`.
37
38
    :param str word: the word to transform
39
    :param int version: the version of Caverphone to employ for encoding
40
        (defaults to 2)
41
    :returns: the Caverphone value
42
    :rtype: str
43
44
    >>> caverphone('Christopher')
45
    'KRSTFA1111'
46
    >>> caverphone('Niall')
47
    'NA11111111'
48
    >>> caverphone('Smith')
49
    'SMT1111111'
50
    >>> caverphone('Schmidt')
51
    'SKMT111111'
52
53
    >>> caverphone('Christopher', 1)
54
    'KRSTF1'
55
    >>> caverphone('Niall', 1)
56
    'N11111'
57
    >>> caverphone('Smith', 1)
58
    'SMT111'
59
    >>> caverphone('Schmidt', 1)
60
    'SKMT11'
61
    """
62
    _vowels = {'a', 'e', 'i', 'o', 'u'}
63
64
    word = word.lower()
65
    word = ''.join(c for c in word if c in
66
                   {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
67
                    'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
68
                    'y', 'z'})
69
70
    def _squeeze_replace(word, char, new_char):
71
        """Convert strings of char in word to one instance of new_char."""
72
        while char * 2 in word:
73
            word = word.replace(char * 2, char)
74
        return word.replace(char, new_char)
75
76
    # the main replacement algorithm
77
    if version != 1 and word[-1:] == 'e':
78
        word = word[:-1]
79
    if word:
80
        if word[:5] == 'cough':
81
            word = 'cou2f'+word[5:]
82
        if word[:5] == 'rough':
83
            word = 'rou2f'+word[5:]
84
        if word[:5] == 'tough':
85
            word = 'tou2f'+word[5:]
86
        if word[:6] == 'enough':
87
            word = 'enou2f'+word[6:]
88
        if version != 1 and word[:6] == 'trough':
89
            word = 'trou2f'+word[6:]
90
        if word[:2] == 'gn':
91
            word = '2n'+word[2:]
92
        if word[-2:] == 'mb':
93
            word = word[:-1]+'2'
94
        word = word.replace('cq', '2q')
95
        word = word.replace('ci', 'si')
96
        word = word.replace('ce', 'se')
97
        word = word.replace('cy', 'sy')
98
        word = word.replace('tch', '2ch')
99
        word = word.replace('c', 'k')
100
        word = word.replace('q', 'k')
101
        word = word.replace('x', 'k')
102
        word = word.replace('v', 'f')
103
        word = word.replace('dg', '2g')
104
        word = word.replace('tio', 'sio')
105
        word = word.replace('tia', 'sia')
106
        word = word.replace('d', 't')
107
        word = word.replace('ph', 'fh')
108
        word = word.replace('b', 'p')
109
        word = word.replace('sh', 's2')
110
        word = word.replace('z', 's')
111
        if word[0] in _vowels:
112
            word = 'A'+word[1:]
113
        word = word.replace('a', '3')
114
        word = word.replace('e', '3')
115
        word = word.replace('i', '3')
116
        word = word.replace('o', '3')
117
        word = word.replace('u', '3')
118
        if version != 1:
119
            word = word.replace('j', 'y')
120
            if word[:2] == 'y3':
121
                word = 'Y3'+word[2:]
122
            if word[:1] == 'y':
123
                word = 'A'+word[1:]
124
            word = word.replace('y', '3')
125
        word = word.replace('3gh3', '3kh3')
126
        word = word.replace('gh', '22')
127
        word = word.replace('g', 'k')
128
129
        word = _squeeze_replace(word, 's', 'S')
130
        word = _squeeze_replace(word, 't', 'T')
131
        word = _squeeze_replace(word, 'p', 'P')
132
        word = _squeeze_replace(word, 'k', 'K')
133
        word = _squeeze_replace(word, 'f', 'F')
134
        word = _squeeze_replace(word, 'm', 'M')
135
        word = _squeeze_replace(word, 'n', 'N')
136
137
        word = word.replace('w3', 'W3')
138
        if version == 1:
139
            word = word.replace('wy', 'Wy')
140
        word = word.replace('wh3', 'Wh3')
141
        if version == 1:
142
            word = word.replace('why', 'Why')
143
        if version != 1 and word[-1:] == 'w':
144
            word = word[:-1]+'3'
145
        word = word.replace('w', '2')
146
        if word[:1] == 'h':
147
            word = 'A'+word[1:]
148
        word = word.replace('h', '2')
149
        word = word.replace('r3', 'R3')
150
        if version == 1:
151
            word = word.replace('ry', 'Ry')
152
        if version != 1 and word[-1:] == 'r':
153
            word = word[:-1]+'3'
154
        word = word.replace('r', '2')
155
        word = word.replace('l3', 'L3')
156
        if version == 1:
157
            word = word.replace('ly', 'Ly')
158
        if version != 1 and word[-1:] == 'l':
159
            word = word[:-1]+'3'
160
        word = word.replace('l', '2')
161
        if version == 1:
162
            word = word.replace('j', 'y')
163
            word = word.replace('y3', 'Y3')
164
            word = word.replace('y', '2')
165
        word = word.replace('2', '')
166
        if version != 1 and word[-1:] == '3':
167
            word = word[:-1]+'A'
168
        word = word.replace('3', '')
169
170
    # pad with 1s, then extract the necessary length of code
171
    word += '1'*10
172
    if version != 1:
173
        word = word[:10]
174
    else:
175
        word = word[:6]
176
177
    return word
178
179
180
if __name__ == '__main__':
181
    import doctest
182
    doctest.testmod()
183