abydos.phonetic._caverphone.caverphone() - Code Metrics - Inspection of "0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#141)

by Chris

created 2018-11-10 03:25 UTC

abydos.phonetic._caverphone.caverphone() A

↳ Parent: abydos.phonetic._caverphone

Complexity

Conditions

Size

Total Lines	34
Code Lines	2

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	2
CRAP Score	1

Importance

Changes

Metric	Value
eloc	2
dl	0
loc	34
ccs	2
cts	2
cp	1
rs	10
c	0
b	0
f	0
cc	1
nop	2
crap	1

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._caverphone.

Caverphone phonetic algorithm
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from ._phonetic import Phonetic

__all__ = ['Caverphone', 'caverphone']


class Caverphone(Phonetic):

    """Caverphone.

    A description of version 1 of the algorithm can be found in
    :cite:`Hood:2002`.

    A description of version 2 of the algorithm can be found in
    :cite:`Hood:2004`.
    """

    def encode(self, word, version=2):

        """Return the Caverphone code for a word.

        Args:
            word (str): The word to transform
            version (int): The version of Caverphone to employ for encoding
                (defaults to 2)

        Returns:
            str: The Caverphone value

        Examples:
            >>> pe = Caverphone()
            >>> pe.encode('Christopher')
            'KRSTFA1111'
            >>> pe.encode('Niall')
            'NA11111111'
            >>> pe.encode('Smith')
            'SMT1111111'
            >>> pe.encode('Schmidt')
            'SKMT111111'

            >>> pe.encode('Christopher', 1)
            'KRSTF1'
            >>> pe.encode('Niall', 1)
            'N11111'
            >>> pe.encode('Smith', 1)
            'SMT111'
            >>> pe.encode('Schmidt', 1)
            'SKMT11'

        """
        word = word.lower()
        word = ''.join(c for c in word if c in self._lc_set)

        def _squeeze_replace(word, char):
            """Convert strings of char in word to one instance.

            Args:
                word (str): The partially converted word
                char (str): A character to 'squeeze'

            Returns:
                str: The word with instances of char squeezed down to one

            """
            while char * 2 in word:
                word = word.replace(char * 2, char)
            return word.replace(char, char.upper())

        # the main replacement algorithm
        if version != 1 and word[-1:] == 'e':
            word = word[:-1]
        if word:
            if word[:5] == 'cough':
                word = 'cou2f' + word[5:]
            if word[:5] == 'rough':
                word = 'rou2f' + word[5:]
            if word[:5] == 'tough':
                word = 'tou2f' + word[5:]
            if word[:6] == 'enough':
                word = 'enou2f' + word[6:]
            if version != 1 and word[:6] == 'trough':
                word = 'trou2f' + word[6:]
            if word[:2] == 'gn':
                word = '2n' + word[2:]
            if word[-2:] == 'mb':
                word = word[:-1] + '2'
            for src, tar in (
                ('cq', '2q'),

                ('ci', 'si'),

                ('ce', 'se'),

                ('cy', 'sy'),

                ('tch', '2ch'),

                ('c', 'k'),

                ('q', 'k'),

                ('x', 'k'),

                ('v', 'f'),

                ('dg', '2g'),

                ('tio', 'sio'),

                ('tia', 'sia'),

                ('d', 't'),

                ('ph', 'fh'),

                ('b', 'p'),

                ('sh', 's2'),

                ('z', 's'),

            ):
                word = word.replace(src, tar)
            if word[0] in self._lc_v_set:
                word = 'A' + word[1:]
            for vowel in 'aeiou':
                word = word.replace(vowel, '3')
            if version != 1:
                word = word.replace('j', 'y')
                if word[:2] == 'y3':
                    word = 'Y3' + word[2:]
                if word[:1] == 'y':
                    word = 'A' + word[1:]
                word = word.replace('y', '3')
            for src, tar in (('3gh3', '3kh3'), ('gh', '22'), ('g', 'k')):
                word = word.replace(src, tar)

            for char in 'stpkfmn':
                word = _squeeze_replace(word, char)

            word = word.replace('w3', 'W3')
            if version == 1:
                word = word.replace('wy', 'Wy')
            word = word.replace('wh3', 'Wh3')
            if version == 1:
                word = word.replace('why', 'Why')
            if version != 1 and word[-1:] == 'w':
                word = word[:-1] + '3'
            word = word.replace('w', '2')
            if word[:1] == 'h':
                word = 'A' + word[1:]
            word = word.replace('h', '2')
            word = word.replace('r3', 'R3')
            if version == 1:
                word = word.replace('ry', 'Ry')
            if version != 1 and word[-1:] == 'r':
                word = word[:-1] + '3'
            word = word.replace('r', '2')
            word = word.replace('l3', 'L3')
            if version == 1:
                word = word.replace('ly', 'Ly')
            if version != 1 and word[-1:] == 'l':
                word = word[:-1] + '3'
            word = word.replace('l', '2')
            if version == 1:
                word = word.replace('j', 'y')
                word = word.replace('y3', 'Y3')
                word = word.replace('y', '2')
            word = word.replace('2', '')
            if version != 1 and word[-1:] == '3':
                word = word[:-1] + 'A'
            word = word.replace('3', '')

        # pad with 1s, then extract the necessary length of code
        word += '1' * 10
        if version != 1:
            word = word[:10]
        else:
            word = word[:6]

        return word


def caverphone(word, version=2):
    """Return the Caverphone code for a word.

    This is a wrapper for :py:meth:`Caverphone.encode`.

    Args:
        word (str): The word to transform
        version (int): The version of Caverphone to employ for encoding
            (defaults to 2)

    Returns:
        str: The Caverphone value

    Examples:
        >>> caverphone('Christopher')
        'KRSTFA1111'
        >>> caverphone('Niall')
        'NA11111111'
        >>> caverphone('Smith')
        'SMT1111111'
        >>> caverphone('Schmidt')
        'SKMT111111'

        >>> caverphone('Christopher', 1)
        'KRSTF1'
        >>> caverphone('Niall', 1)
        'N11111'
        >>> caverphone('Smith', 1)
        'SMT111'
        >>> caverphone('Schmidt', 1)
        'SKMT11'

    """
    return Caverphone().encode(word, version)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._caverphone.
20
21		Caverphone phonetic algorithm
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from ._phonetic import Phonetic
32
33	1	__all__ = ['Caverphone', 'caverphone']
34
35
36	1	class Caverphone(Phonetic):
		0 ignored issues – show Unused Code introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
37		"""Caverphone.
38
39		A description of version 1 of the algorithm can be found in
40		:cite:`Hood:2002`.
41
42		A description of version 2 of the algorithm can be found in
43		:cite:`Hood:2004`.
44		"""
45
46	1	def encode(self, word, version=2):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'encode' method Loading history...
47		"""Return the Caverphone code for a word.
48
49		Args:
50		word (str): The word to transform
51		version (int): The version of Caverphone to employ for encoding
52		(defaults to 2)
53
54		Returns:
55		str: The Caverphone value
56
57		Examples:
58		>>> pe = Caverphone()
59		>>> pe.encode('Christopher')
60		'KRSTFA1111'
61		>>> pe.encode('Niall')
62		'NA11111111'
63		>>> pe.encode('Smith')
64		'SMT1111111'
65		>>> pe.encode('Schmidt')
66		'SKMT111111'
67
68		>>> pe.encode('Christopher', 1)
69		'KRSTF1'
70		>>> pe.encode('Niall', 1)
71		'N11111'
72		>>> pe.encode('Smith', 1)
73		'SMT111'
74		>>> pe.encode('Schmidt', 1)
75		'SKMT11'
76
77		"""
78	1	word = word.lower()
79	1	word = ''.join(c for c in word if c in self._lc_set)
80
81	1	def _squeeze_replace(word, char):
82		"""Convert strings of char in word to one instance.
83
84		Args:
85		word (str): The partially converted word
86		char (str): A character to 'squeeze'
87
88		Returns:
89		str: The word with instances of char squeezed down to one
90
91		"""
92	1	while char * 2 in word:
93	1	word = word.replace(char * 2, char)
94	1	return word.replace(char, char.upper())
95
96		# the main replacement algorithm
97	1	if version != 1 and word[-1:] == 'e':
98	1	word = word[:-1]
99	1	if word:
100	1	if word[:5] == 'cough':
101	1	word = 'cou2f' + word[5:]
102	1	if word[:5] == 'rough':
103	1	word = 'rou2f' + word[5:]
104	1	if word[:5] == 'tough':
105	1	word = 'tou2f' + word[5:]
106	1	if word[:6] == 'enough':
107	1	word = 'enou2f' + word[6:]
108	1	if version != 1 and word[:6] == 'trough':
109	1	word = 'trou2f' + word[6:]
110	1	if word[:2] == 'gn':
111	1	word = '2n' + word[2:]
112	1	if word[-2:] == 'mb':
113	1	word = word[:-1] + '2'
114	1	for src, tar in (
115		('cq', '2q'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
116		('ci', 'si'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
117		('ce', 'se'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
118		('cy', 'sy'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
119		('tch', '2ch'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
120		('c', 'k'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
121		('q', 'k'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
122		('x', 'k'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
123		('v', 'f'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
124		('dg', '2g'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
125		('tio', 'sio'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
126		('tia', 'sia'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
127		('d', 't'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
128		('ph', 'fh'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
129		('b', 'p'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
130		('sh', 's2'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
131		('z', 's'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
132		):
133	1	word = word.replace(src, tar)
134	1	if word[0] in self._lc_v_set:
135	1	word = 'A' + word[1:]
136	1	for vowel in 'aeiou':
137	1	word = word.replace(vowel, '3')
138	1	if version != 1:
139	1	word = word.replace('j', 'y')
140	1	if word[:2] == 'y3':
141	1	word = 'Y3' + word[2:]
142	1	if word[:1] == 'y':
143	1	word = 'A' + word[1:]
144	1	word = word.replace('y', '3')
145	1	for src, tar in (('3gh3', '3kh3'), ('gh', '22'), ('g', 'k')):
146	1	word = word.replace(src, tar)
147
148	1	for char in 'stpkfmn':
149	1	word = _squeeze_replace(word, char)
150
151	1	word = word.replace('w3', 'W3')
152	1	if version == 1:
153	1	word = word.replace('wy', 'Wy')
154	1	word = word.replace('wh3', 'Wh3')
155	1	if version == 1:
156	1	word = word.replace('why', 'Why')
157	1	if version != 1 and word[-1:] == 'w':
158	1	word = word[:-1] + '3'
159	1	word = word.replace('w', '2')
160	1	if word[:1] == 'h':
161	1	word = 'A' + word[1:]
162	1	word = word.replace('h', '2')
163	1	word = word.replace('r3', 'R3')
164	1	if version == 1:
165	1	word = word.replace('ry', 'Ry')
166	1	if version != 1 and word[-1:] == 'r':
167	1	word = word[:-1] + '3'
168	1	word = word.replace('r', '2')
169	1	word = word.replace('l3', 'L3')
170	1	if version == 1:
171	1	word = word.replace('ly', 'Ly')
172	1	if version != 1 and word[-1:] == 'l':
173	1	word = word[:-1] + '3'
174	1	word = word.replace('l', '2')
175	1	if version == 1:
176	1	word = word.replace('j', 'y')
177	1	word = word.replace('y3', 'Y3')
178	1	word = word.replace('y', '2')
179	1	word = word.replace('2', '')
180	1	if version != 1 and word[-1:] == '3':
181	1	word = word[:-1] + 'A'
182	1	word = word.replace('3', '')
183
184		# pad with 1s, then extract the necessary length of code
185	1	word += '1' * 10
186	1	if version != 1:
187	1	word = word[:10]
188		else:
189	1	word = word[:6]
190
191	1	return word
192
193
194	1	def caverphone(word, version=2):
195		"""Return the Caverphone code for a word.
196
197		This is a wrapper for :py:meth:`Caverphone.encode`.
198
199		Args:
200		word (str): The word to transform
201		version (int): The version of Caverphone to employ for encoding
202		(defaults to 2)
203
204		Returns:
205		str: The Caverphone value
206
207		Examples:
208		>>> caverphone('Christopher')
209		'KRSTFA1111'
210		>>> caverphone('Niall')
211		'NA11111111'
212		>>> caverphone('Smith')
213		'SMT1111111'
214		>>> caverphone('Schmidt')
215		'SKMT111111'
216
217		>>> caverphone('Christopher', 1)
218		'KRSTF1'
219		>>> caverphone('Niall', 1)
220		'N11111'
221		>>> caverphone('Smith', 1)
222		'SMT111'
223		>>> caverphone('Schmidt', 1)
224		'SKMT11'
225
226		"""
227	1	return Caverphone().encode(word, version)
228
229
230		if __name__ == '__main__':
231		import doctest
232
233		doctest.testmod()
234

chrislit / abydos

Pull Request — master (#141)

abydos.phonetic._caverphone.caverphone() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like