abydos.phonetic._caverphone.Caverphone.encode() - Code Metrics - Inspection of "0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#141)

by Chris

created 2018-11-08 03:44 UTC

abydos.phonetic._caverphone.Caverphone.encode() F

↳ Parent: abydos.phonetic._caverphone

Complexity

Conditions

Size

Total Lines	146
Code Lines	95

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	78
CRAP Score	36

Importance

Changes

Metric	Value
eloc	95
dl	0
loc	146
ccs	78
cts	78
cp	1
rs	0
c	0
b	0
f	0
cc	36
nop	3
crap	36

How to fix Long Method Complexity

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._caverphone.

The phonetic._caverphone module implements the Caverphone phonetic algorithm.
"""

from __future__ import unicode_literals

from ._phonetic import Phonetic

__all__ = ['Caverphone', 'caverphone']


class Caverphone(Phonetic):

    """Caverphone.

    A description of version 1 of the algorithm can be found in
    :cite:`Hood:2002`.

    A description of version 2 of the algorithm can be found in
    :cite:`Hood:2004`.
    """

    def encode(self, word, version=2):

        """Return the Caverphone code for a word.

        Args:
            word (str): The word to transform
            version (int): The version of Caverphone to employ for encoding
                (defaults to 2)

        Returns:
            str: The Caverphone value

        Examples:
            >>> pe = Caverphone()
            >>> pe.encode('Christopher')
            'KRSTFA1111'
            >>> pe.encode('Niall')
            'NA11111111'
            >>> pe.encode('Smith')
            'SMT1111111'
            >>> pe.encode('Schmidt')
            'SKMT111111'

            >>> pe.encode('Christopher', 1)
            'KRSTF1'
            >>> pe.encode('Niall', 1)
            'N11111'
            >>> pe.encode('Smith', 1)
            'SMT111'
            >>> pe.encode('Schmidt', 1)
            'SKMT11'

        """
        word = word.lower()
        word = ''.join(c for c in word if c in self._lc_set)

        def _squeeze_replace(word, char):
            """Convert strings of char in word to one instance.

            Args:
                word (str): The partially converted word
                char (str): A character to 'squeeze'

            Returns:
                str: The word with instances of char squeezed down to one

            """
            while char * 2 in word:
                word = word.replace(char * 2, char)
            return word.replace(char, char.upper())

        # the main replacement algorithm
        if version != 1 and word[-1:] == 'e':
            word = word[:-1]
        if word:
            if word[:5] == 'cough':
                word = 'cou2f' + word[5:]
            if word[:5] == 'rough':
                word = 'rou2f' + word[5:]
            if word[:5] == 'tough':
                word = 'tou2f' + word[5:]
            if word[:6] == 'enough':
                word = 'enou2f' + word[6:]
            if version != 1 and word[:6] == 'trough':
                word = 'trou2f' + word[6:]
            if word[:2] == 'gn':
                word = '2n' + word[2:]
            if word[-2:] == 'mb':
                word = word[:-1] + '2'
            for src, tar in (
                ('cq', '2q'),

                ('ci', 'si'),

                ('ce', 'se'),

                ('cy', 'sy'),

                ('tch', '2ch'),

                ('c', 'k'),

                ('q', 'k'),

                ('x', 'k'),

                ('v', 'f'),

                ('dg', '2g'),

                ('tio', 'sio'),

                ('tia', 'sia'),

                ('d', 't'),

                ('ph', 'fh'),

                ('b', 'p'),

                ('sh', 's2'),

                ('z', 's'),

            ):
                word = word.replace(src, tar)
            if word[0] in self._lc_v_set:
                word = 'A' + word[1:]
            for vowel in 'aeiou':
                word = word.replace(vowel, '3')
            if version != 1:
                word = word.replace('j', 'y')
                if word[:2] == 'y3':
                    word = 'Y3' + word[2:]
                if word[:1] == 'y':
                    word = 'A' + word[1:]
                word = word.replace('y', '3')
            for src, tar in (('3gh3', '3kh3'), ('gh', '22'), ('g', 'k')):
                word = word.replace(src, tar)

            for char in 'stpkfmn':
                word = _squeeze_replace(word, char)

            word = word.replace('w3', 'W3')
            if version == 1:
                word = word.replace('wy', 'Wy')
            word = word.replace('wh3', 'Wh3')
            if version == 1:
                word = word.replace('why', 'Why')
            if version != 1 and word[-1:] == 'w':
                word = word[:-1] + '3'
            word = word.replace('w', '2')
            if word[:1] == 'h':
                word = 'A' + word[1:]
            word = word.replace('h', '2')
            word = word.replace('r3', 'R3')
            if version == 1:
                word = word.replace('ry', 'Ry')
            if version != 1 and word[-1:] == 'r':
                word = word[:-1] + '3'
            word = word.replace('r', '2')
            word = word.replace('l3', 'L3')
            if version == 1:
                word = word.replace('ly', 'Ly')
            if version != 1 and word[-1:] == 'l':
                word = word[:-1] + '3'
            word = word.replace('l', '2')
            if version == 1:
                word = word.replace('j', 'y')
                word = word.replace('y3', 'Y3')
                word = word.replace('y', '2')
            word = word.replace('2', '')
            if version != 1 and word[-1:] == '3':
                word = word[:-1] + 'A'
            word = word.replace('3', '')

        # pad with 1s, then extract the necessary length of code
        word += '1' * 10
        if version != 1:
            word = word[:10]
        else:
            word = word[:6]

        return word


def caverphone(word, version=2):
    """Return the Caverphone code for a word.

    This is a wrapper for :py:meth:`Caverphone.encode`.

    Args:
        word (str): The word to transform
        version (int): The version of Caverphone to employ for encoding
            (defaults to 2)

    Returns:
        str: The Caverphone value

    Examples:
        >>> caverphone('Christopher')
        'KRSTFA1111'
        >>> caverphone('Niall')
        'NA11111111'
        >>> caverphone('Smith')
        'SMT1111111'
        >>> caverphone('Schmidt')
        'SKMT111111'

        >>> caverphone('Christopher', 1)
        'KRSTF1'
        >>> caverphone('Niall', 1)
        'N11111'
        >>> caverphone('Smith', 1)
        'SMT111'
        >>> caverphone('Schmidt', 1)
        'SKMT11'

    """
    return Caverphone().encode(word, version)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._caverphone.
20
21		The phonetic._caverphone module implements the Caverphone phonetic algorithm.
22		"""
23
24	1	from __future__ import unicode_literals
25
26	1	from ._phonetic import Phonetic
27
28	1	__all__ = ['Caverphone', 'caverphone']
29
30
31	1	class Caverphone(Phonetic):
		0 ignored issues – show Unused Code introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
32		"""Caverphone.
33
34		A description of version 1 of the algorithm can be found in
35		:cite:`Hood:2002`.
36
37		A description of version 2 of the algorithm can be found in
38		:cite:`Hood:2004`.
39		"""
40
41	1	def encode(self, word, version=2):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'encode' method Loading history...
42		"""Return the Caverphone code for a word.
43
44		Args:
45		word (str): The word to transform
46		version (int): The version of Caverphone to employ for encoding
47		(defaults to 2)
48
49		Returns:
50		str: The Caverphone value
51
52		Examples:
53		>>> pe = Caverphone()
54		>>> pe.encode('Christopher')
55		'KRSTFA1111'
56		>>> pe.encode('Niall')
57		'NA11111111'
58		>>> pe.encode('Smith')
59		'SMT1111111'
60		>>> pe.encode('Schmidt')
61		'SKMT111111'
62
63		>>> pe.encode('Christopher', 1)
64		'KRSTF1'
65		>>> pe.encode('Niall', 1)
66		'N11111'
67		>>> pe.encode('Smith', 1)
68		'SMT111'
69		>>> pe.encode('Schmidt', 1)
70		'SKMT11'
71
72		"""
73	1	word = word.lower()
74	1	word = ''.join(c for c in word if c in self._lc_set)
75
76	1	def _squeeze_replace(word, char):
77		"""Convert strings of char in word to one instance.
78
79		Args:
80		word (str): The partially converted word
81		char (str): A character to 'squeeze'
82
83		Returns:
84		str: The word with instances of char squeezed down to one
85
86		"""
87	1	while char * 2 in word:
88	1	word = word.replace(char * 2, char)
89	1	return word.replace(char, char.upper())
90
91		# the main replacement algorithm
92	1	if version != 1 and word[-1:] == 'e':
93	1	word = word[:-1]
94	1	if word:
95	1	if word[:5] == 'cough':
96	1	word = 'cou2f' + word[5:]
97	1	if word[:5] == 'rough':
98	1	word = 'rou2f' + word[5:]
99	1	if word[:5] == 'tough':
100	1	word = 'tou2f' + word[5:]
101	1	if word[:6] == 'enough':
102	1	word = 'enou2f' + word[6:]
103	1	if version != 1 and word[:6] == 'trough':
104	1	word = 'trou2f' + word[6:]
105	1	if word[:2] == 'gn':
106	1	word = '2n' + word[2:]
107	1	if word[-2:] == 'mb':
108	1	word = word[:-1] + '2'
109	1	for src, tar in (
110		('cq', '2q'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
111		('ci', 'si'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
112		('ce', 'se'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
113		('cy', 'sy'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
114		('tch', '2ch'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
115		('c', 'k'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
116		('q', 'k'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
117		('x', 'k'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
118		('v', 'f'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
119		('dg', '2g'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
120		('tio', 'sio'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
121		('tia', 'sia'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
122		('d', 't'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
123		('ph', 'fh'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
124		('b', 'p'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
125		('sh', 's2'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
126		('z', 's'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
127		):
128	1	word = word.replace(src, tar)
129	1	if word[0] in self._lc_v_set:
130	1	word = 'A' + word[1:]
131	1	for vowel in 'aeiou':
132	1	word = word.replace(vowel, '3')
133	1	if version != 1:
134	1	word = word.replace('j', 'y')
135	1	if word[:2] == 'y3':
136	1	word = 'Y3' + word[2:]
137	1	if word[:1] == 'y':
138	1	word = 'A' + word[1:]
139	1	word = word.replace('y', '3')
140	1	for src, tar in (('3gh3', '3kh3'), ('gh', '22'), ('g', 'k')):
141	1	word = word.replace(src, tar)
142
143	1	for char in 'stpkfmn':
144	1	word = _squeeze_replace(word, char)
145
146	1	word = word.replace('w3', 'W3')
147	1	if version == 1:
148	1	word = word.replace('wy', 'Wy')
149	1	word = word.replace('wh3', 'Wh3')
150	1	if version == 1:
151	1	word = word.replace('why', 'Why')
152	1	if version != 1 and word[-1:] == 'w':
153	1	word = word[:-1] + '3'
154	1	word = word.replace('w', '2')
155	1	if word[:1] == 'h':
156	1	word = 'A' + word[1:]
157	1	word = word.replace('h', '2')
158	1	word = word.replace('r3', 'R3')
159	1	if version == 1:
160	1	word = word.replace('ry', 'Ry')
161	1	if version != 1 and word[-1:] == 'r':
162	1	word = word[:-1] + '3'
163	1	word = word.replace('r', '2')
164	1	word = word.replace('l3', 'L3')
165	1	if version == 1:
166	1	word = word.replace('ly', 'Ly')
167	1	if version != 1 and word[-1:] == 'l':
168	1	word = word[:-1] + '3'
169	1	word = word.replace('l', '2')
170	1	if version == 1:
171	1	word = word.replace('j', 'y')
172	1	word = word.replace('y3', 'Y3')
173	1	word = word.replace('y', '2')
174	1	word = word.replace('2', '')
175	1	if version != 1 and word[-1:] == '3':
176	1	word = word[:-1] + 'A'
177	1	word = word.replace('3', '')
178
179		# pad with 1s, then extract the necessary length of code
180	1	word += '1' * 10
181	1	if version != 1:
182	1	word = word[:10]
183		else:
184	1	word = word[:6]
185
186	1	return word
187
188
189	1	def caverphone(word, version=2):
190		"""Return the Caverphone code for a word.
191
192		This is a wrapper for :py:meth:`Caverphone.encode`.
193
194		Args:
195		word (str): The word to transform
196		version (int): The version of Caverphone to employ for encoding
197		(defaults to 2)
198
199		Returns:
200		str: The Caverphone value
201
202		Examples:
203		>>> caverphone('Christopher')
204		'KRSTFA1111'
205		>>> caverphone('Niall')
206		'NA11111111'
207		>>> caverphone('Smith')
208		'SMT1111111'
209		>>> caverphone('Schmidt')
210		'SKMT111111'
211
212		>>> caverphone('Christopher', 1)
213		'KRSTF1'
214		>>> caverphone('Niall', 1)
215		'N11111'
216		>>> caverphone('Smith', 1)
217		'SMT111'
218		>>> caverphone('Schmidt', 1)
219		'SKMT11'
220
221		"""
222	1	return Caverphone().encode(word, version)
223
224
225		if __name__ == '__main__':
226		import doctest
227
228		doctest.testmod()
229

chrislit / abydos

Pull Request — master (#141)

abydos.phonetic._caverphone.Caverphone.encode() F

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like