abydos.phonetic._caverphone - Code Metrics - Inspection of "78a222a9f7d8976f6744d263e3d6d01a2a991c27" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Branch — master (78a222)

by Chris

created 2018-10-26 11:30 UTC

abydos.phonetic._caverphone A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	207
Duplicated Lines	0 %

Test Coverage

Coverage

100%

Importance

Changes

Metric	Value
wmc	36
eloc	133
dl	0
loc	207
ccs	82
cts	82
cp	1
rs	9.52
c	0
b	0
f	0

1 Function

Rating	Name	Duplication	Size	Complexity
F	caverphone()	0	172	36

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._caverphone.

The phonetic._caverphone module implements the Caverphone phonetic algorithm.
"""

from __future__ import unicode_literals

__all__ = ['caverphone']


def caverphone(word, version=2):
    """Return the Caverphone code for a word.

    A description of version 1 of the algorithm can be found in
    :cite:`Hood:2002`.

    A description of version 2 of the algorithm can be found in
    :cite:`Hood:2004`.

    :param str word: the word to transform
    :param int version: the version of Caverphone to employ for encoding
        (defaults to 2)
    :returns: the Caverphone value
    :rtype: str

    >>> caverphone('Christopher')
    'KRSTFA1111'
    >>> caverphone('Niall')
    'NA11111111'
    >>> caverphone('Smith')
    'SMT1111111'
    >>> caverphone('Schmidt')
    'SKMT111111'

    >>> caverphone('Christopher', 1)
    'KRSTF1'
    >>> caverphone('Niall', 1)
    'N11111'
    >>> caverphone('Smith', 1)
    'SMT111'
    >>> caverphone('Schmidt', 1)
    'SKMT11'
    """
    _vowels = {'a', 'e', 'i', 'o', 'u'}

    word = word.lower()
    word = ''.join(
        c
        for c in word
        if c
        in {
            'a',
            'b',
            'c',
            'd',
            'e',
            'f',
            'g',
            'h',
            'i',
            'j',
            'k',
            'l',
            'm',
            'n',
            'o',
            'p',
            'q',
            'r',
            's',
            't',
            'u',
            'v',
            'w',
            'x',
            'y',
            'z',
        }
    )

    def _squeeze_replace(word, char):
        """Convert strings of char in word to one instance of new_char."""
        while char * 2 in word:
            word = word.replace(char * 2, char)
        return word.replace(char, char.upper())

    # the main replacement algorithm
    if version != 1 and word[-1:] == 'e':
        word = word[:-1]
    if word:
        if word[:5] == 'cough':
            word = 'cou2f' + word[5:]
        if word[:5] == 'rough':
            word = 'rou2f' + word[5:]
        if word[:5] == 'tough':
            word = 'tou2f' + word[5:]
        if word[:6] == 'enough':
            word = 'enou2f' + word[6:]
        if version != 1 and word[:6] == 'trough':
            word = 'trou2f' + word[6:]
        if word[:2] == 'gn':
            word = '2n' + word[2:]
        if word[-2:] == 'mb':
            word = word[:-1] + '2'
        for src, tar in (
            ('cq', '2q'),

            ('ci', 'si'),

            ('ce', 'se'),

            ('cy', 'sy'),

            ('tch', '2ch'),

            ('c', 'k'),

            ('q', 'k'),

            ('x', 'k'),

            ('v', 'f'),

            ('dg', '2g'),

            ('tio', 'sio'),

            ('tia', 'sia'),

            ('d', 't'),

            ('ph', 'fh'),

            ('b', 'p'),

            ('sh', 's2'),

            ('z', 's'),

        ):
            word = word.replace(src, tar)
        if word[0] in _vowels:
            word = 'A' + word[1:]
        for vowel in 'aeiou':
            word = word.replace(vowel, '3')
        if version != 1:
            word = word.replace('j', 'y')
            if word[:2] == 'y3':
                word = 'Y3' + word[2:]
            if word[:1] == 'y':
                word = 'A' + word[1:]
            word = word.replace('y', '3')
        for src, tar in (('3gh3', '3kh3'), ('gh', '22'), ('g', 'k')):
            word = word.replace(src, tar)

        for char in 'stpkfmn':
            word = _squeeze_replace(word, char)

        word = word.replace('w3', 'W3')
        if version == 1:
            word = word.replace('wy', 'Wy')
        word = word.replace('wh3', 'Wh3')
        if version == 1:
            word = word.replace('why', 'Why')
        if version != 1 and word[-1:] == 'w':
            word = word[:-1] + '3'
        word = word.replace('w', '2')
        if word[:1] == 'h':
            word = 'A' + word[1:]
        word = word.replace('h', '2')
        word = word.replace('r3', 'R3')
        if version == 1:
            word = word.replace('ry', 'Ry')
        if version != 1 and word[-1:] == 'r':
            word = word[:-1] + '3'
        word = word.replace('r', '2')
        word = word.replace('l3', 'L3')
        if version == 1:
            word = word.replace('ly', 'Ly')
        if version != 1 and word[-1:] == 'l':
            word = word[:-1] + '3'
        word = word.replace('l', '2')
        if version == 1:
            word = word.replace('j', 'y')
            word = word.replace('y3', 'Y3')
            word = word.replace('y', '2')
        word = word.replace('2', '')
        if version != 1 and word[-1:] == '3':
            word = word[:-1] + 'A'
        word = word.replace('3', '')

    # pad with 1s, then extract the necessary length of code
    word += '1' * 10
    if version != 1:
        word = word[:10]
    else:
        word = word[:6]

    return word


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._caverphone.
20
21		The phonetic._caverphone module implements the Caverphone phonetic algorithm.
22		"""
23
24	1	from __future__ import unicode_literals
25
26	1	__all__ = ['caverphone']
27
28
29	1	def caverphone(word, version=2):
30		"""Return the Caverphone code for a word.
31
32		A description of version 1 of the algorithm can be found in
33		:cite:`Hood:2002`.
34
35		A description of version 2 of the algorithm can be found in
36		:cite:`Hood:2004`.
37
38		:param str word: the word to transform
39		:param int version: the version of Caverphone to employ for encoding
40		(defaults to 2)
41		:returns: the Caverphone value
42		:rtype: str
43
44		>>> caverphone('Christopher')
45		'KRSTFA1111'
46		>>> caverphone('Niall')
47		'NA11111111'
48		>>> caverphone('Smith')
49		'SMT1111111'
50		>>> caverphone('Schmidt')
51		'SKMT111111'
52
53		>>> caverphone('Christopher', 1)
54		'KRSTF1'
55		>>> caverphone('Niall', 1)
56		'N11111'
57		>>> caverphone('Smith', 1)
58		'SMT111'
59		>>> caverphone('Schmidt', 1)
60		'SKMT11'
61		"""
62	1	_vowels = {'a', 'e', 'i', 'o', 'u'}
63
64	1	word = word.lower()
65	1	word = ''.join(
66		c
67		for c in word
68		if c
69		in {
70		'a',
71		'b',
72		'c',
73		'd',
74		'e',
75		'f',
76		'g',
77		'h',
78		'i',
79		'j',
80		'k',
81		'l',
82		'm',
83		'n',
84		'o',
85		'p',
86		'q',
87		'r',
88		's',
89		't',
90		'u',
91		'v',
92		'w',
93		'x',
94		'y',
95		'z',
96		}
97		)
98
99	1	def _squeeze_replace(word, char):
100		"""Convert strings of char in word to one instance of new_char."""
101	1	while char * 2 in word:
102	1	word = word.replace(char * 2, char)
103	1	return word.replace(char, char.upper())
104
105		# the main replacement algorithm
106	1	if version != 1 and word[-1:] == 'e':
107	1	word = word[:-1]
108	1	if word:
109	1	if word[:5] == 'cough':
110	1	word = 'cou2f' + word[5:]
111	1	if word[:5] == 'rough':
112	1	word = 'rou2f' + word[5:]
113	1	if word[:5] == 'tough':
114	1	word = 'tou2f' + word[5:]
115	1	if word[:6] == 'enough':
116	1	word = 'enou2f' + word[6:]
117	1	if version != 1 and word[:6] == 'trough':
118	1	word = 'trou2f' + word[6:]
119	1	if word[:2] == 'gn':
120	1	word = '2n' + word[2:]
121	1	if word[-2:] == 'mb':
122	1	word = word[:-1] + '2'
123	1	for src, tar in (
124		('cq', '2q'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
125		('ci', 'si'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
126		('ce', 'se'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
127		('cy', 'sy'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
128		('tch', '2ch'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
129		('c', 'k'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
130		('q', 'k'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
131		('x', 'k'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
132		('v', 'f'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
133		('dg', '2g'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
134		('tio', 'sio'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
135		('tia', 'sia'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
136		('d', 't'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
137		('ph', 'fh'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
138		('b', 'p'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
139		('sh', 's2'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
140		('z', 's'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
141		):
142	1	word = word.replace(src, tar)
143	1	if word[0] in _vowels:
144	1	word = 'A' + word[1:]
145	1	for vowel in 'aeiou':
146	1	word = word.replace(vowel, '3')
147	1	if version != 1:
148	1	word = word.replace('j', 'y')
149	1	if word[:2] == 'y3':
150	1	word = 'Y3' + word[2:]
151	1	if word[:1] == 'y':
152	1	word = 'A' + word[1:]
153	1	word = word.replace('y', '3')
154	1	for src, tar in (('3gh3', '3kh3'), ('gh', '22'), ('g', 'k')):
155	1	word = word.replace(src, tar)
156
157	1	for char in 'stpkfmn':
158	1	word = _squeeze_replace(word, char)
159
160	1	word = word.replace('w3', 'W3')
161	1	if version == 1:
162	1	word = word.replace('wy', 'Wy')
163	1	word = word.replace('wh3', 'Wh3')
164	1	if version == 1:
165	1	word = word.replace('why', 'Why')
166	1	if version != 1 and word[-1:] == 'w':
167	1	word = word[:-1] + '3'
168	1	word = word.replace('w', '2')
169	1	if word[:1] == 'h':
170	1	word = 'A' + word[1:]
171	1	word = word.replace('h', '2')
172	1	word = word.replace('r3', 'R3')
173	1	if version == 1:
174	1	word = word.replace('ry', 'Ry')
175	1	if version != 1 and word[-1:] == 'r':
176	1	word = word[:-1] + '3'
177	1	word = word.replace('r', '2')
178	1	word = word.replace('l3', 'L3')
179	1	if version == 1:
180	1	word = word.replace('ly', 'Ly')
181	1	if version != 1 and word[-1:] == 'l':
182	1	word = word[:-1] + '3'
183	1	word = word.replace('l', '2')
184	1	if version == 1:
185	1	word = word.replace('j', 'y')
186	1	word = word.replace('y3', 'Y3')
187	1	word = word.replace('y', '2')
188	1	word = word.replace('2', '')
189	1	if version != 1 and word[-1:] == '3':
190	1	word = word[:-1] + 'A'
191	1	word = word.replace('3', '')
192
193		# pad with 1s, then extract the necessary length of code
194	1	word += '1' * 10
195	1	if version != 1:
196	1	word = word[:10]
197		else:
198	1	word = word[:6]
199
200	1	return word
201
202
203		if __name__ == '__main__':
204		import doctest
205
206		doctest.testmod()
207

chrislit / abydos

Branch — master (78a222)

abydos.phonetic._caverphone A

Complexity

Size/Duplication

Test Coverage

Importance

1 Function

Duplication Side-by-Side

Filter issues like