abydos.phonetic._caverphone - Code Metrics - Inspection of "0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#135)

by Chris

created 2018-11-04 07:51 UTC

abydos.phonetic._caverphone A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	212
Duplicated Lines	0 %

Test Coverage

Coverage

100%

Importance

Changes

Metric	Value
wmc	37
eloc	106
dl	0
loc	212
ccs	85
cts	85
cp	1
rs	9.44
c	0
b	0
f	0

1 Method

Rating	Name	Duplication	Size	Complexity
F	Caverphone.encode()	0	133	36

1 Function

Rating	Name	Duplication	Size	Complexity
A	caverphone()	0	30	1

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._caverphone.

The phonetic._caverphone module implements the Caverphone phonetic algorithm.
"""

from __future__ import unicode_literals

from ._phonetic import Phonetic

__all__ = ['Caverphone', 'caverphone']


class Caverphone(Phonetic):

    """Caverphone.

    A description of version 1 of the algorithm can be found in
    :cite:`Hood:2002`.

    A description of version 2 of the algorithm can be found in
    :cite:`Hood:2004`.
    """

    def encode(self, word, version=2):

        """Return the Caverphone code for a word.

        :param str word: the word to transform
        :param int version: the version of Caverphone to employ for encoding
            (defaults to 2)
        :returns: the Caverphone value
        :rtype: str

        >>> pe = Caverphone()
        >>> pe.encode('Christopher')
        'KRSTFA1111'
        >>> pe.encode('Niall')
        'NA11111111'
        >>> pe.encode('Smith')
        'SMT1111111'
        >>> pe.encode('Schmidt')
        'SKMT111111'

        >>> pe.encode('Christopher', 1)
        'KRSTF1'
        >>> pe.encode('Niall', 1)
        'N11111'
        >>> pe.encode('Smith', 1)
        'SMT111'
        >>> pe.encode('Schmidt', 1)
        'SKMT11'
        """
        word = word.lower()
        word = ''.join(c for c in word if c in self._lc_set)

        def _squeeze_replace(word, char):
            """Convert strings of char in word to one instance of new_char."""
            while char * 2 in word:
                word = word.replace(char * 2, char)
            return word.replace(char, char.upper())

        # the main replacement algorithm
        if version != 1 and word[-1:] == 'e':
            word = word[:-1]
        if word:
            if word[:5] == 'cough':
                word = 'cou2f' + word[5:]
            if word[:5] == 'rough':
                word = 'rou2f' + word[5:]
            if word[:5] == 'tough':
                word = 'tou2f' + word[5:]
            if word[:6] == 'enough':
                word = 'enou2f' + word[6:]
            if version != 1 and word[:6] == 'trough':
                word = 'trou2f' + word[6:]
            if word[:2] == 'gn':
                word = '2n' + word[2:]
            if word[-2:] == 'mb':
                word = word[:-1] + '2'
            for src, tar in (
                ('cq', '2q'),

                ('ci', 'si'),

                ('ce', 'se'),

                ('cy', 'sy'),

                ('tch', '2ch'),

                ('c', 'k'),

                ('q', 'k'),

                ('x', 'k'),

                ('v', 'f'),

                ('dg', '2g'),

                ('tio', 'sio'),

                ('tia', 'sia'),

                ('d', 't'),

                ('ph', 'fh'),

                ('b', 'p'),

                ('sh', 's2'),

                ('z', 's'),

            ):
                word = word.replace(src, tar)
            if word[0] in self._lc_v_set:
                word = 'A' + word[1:]
            for vowel in 'aeiou':
                word = word.replace(vowel, '3')
            if version != 1:
                word = word.replace('j', 'y')
                if word[:2] == 'y3':
                    word = 'Y3' + word[2:]
                if word[:1] == 'y':
                    word = 'A' + word[1:]
                word = word.replace('y', '3')
            for src, tar in (('3gh3', '3kh3'), ('gh', '22'), ('g', 'k')):
                word = word.replace(src, tar)

            for char in 'stpkfmn':
                word = _squeeze_replace(word, char)

            word = word.replace('w3', 'W3')
            if version == 1:
                word = word.replace('wy', 'Wy')
            word = word.replace('wh3', 'Wh3')
            if version == 1:
                word = word.replace('why', 'Why')
            if version != 1 and word[-1:] == 'w':
                word = word[:-1] + '3'
            word = word.replace('w', '2')
            if word[:1] == 'h':
                word = 'A' + word[1:]
            word = word.replace('h', '2')
            word = word.replace('r3', 'R3')
            if version == 1:
                word = word.replace('ry', 'Ry')
            if version != 1 and word[-1:] == 'r':
                word = word[:-1] + '3'
            word = word.replace('r', '2')
            word = word.replace('l3', 'L3')
            if version == 1:
                word = word.replace('ly', 'Ly')
            if version != 1 and word[-1:] == 'l':
                word = word[:-1] + '3'
            word = word.replace('l', '2')
            if version == 1:
                word = word.replace('j', 'y')
                word = word.replace('y3', 'Y3')
                word = word.replace('y', '2')
            word = word.replace('2', '')
            if version != 1 and word[-1:] == '3':
                word = word[:-1] + 'A'
            word = word.replace('3', '')

        # pad with 1s, then extract the necessary length of code
        word += '1' * 10
        if version != 1:
            word = word[:10]
        else:
            word = word[:6]

        return word


def caverphone(word, version=2):
    """Return the Caverphone code for a word.

    This is a wrapper for :py:meth:`Caverphone.encode`.

    :param str word: the word to transform
    :param int version: the version of Caverphone to employ for encoding
        (defaults to 2)
    :returns: the Caverphone value
    :rtype: str

    >>> caverphone('Christopher')
    'KRSTFA1111'
    >>> caverphone('Niall')
    'NA11111111'
    >>> caverphone('Smith')
    'SMT1111111'
    >>> caverphone('Schmidt')
    'SKMT111111'

    >>> caverphone('Christopher', 1)
    'KRSTF1'
    >>> caverphone('Niall', 1)
    'N11111'
    >>> caverphone('Smith', 1)
    'SMT111'
    >>> caverphone('Schmidt', 1)
    'SKMT11'
    """
    return Caverphone().encode(word, version)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._caverphone.
20
21		The phonetic._caverphone module implements the Caverphone phonetic algorithm.
22		"""
23
24	1	from __future__ import unicode_literals
25
26	1	from ._phonetic import Phonetic
27
28	1	__all__ = ['Caverphone', 'caverphone']
29
30
31	1	class Caverphone(Phonetic):
		0 ignored issues – show Unused Code introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
32		"""Caverphone.
33
34		A description of version 1 of the algorithm can be found in
35		:cite:`Hood:2002`.
36
37		A description of version 2 of the algorithm can be found in
38		:cite:`Hood:2004`.
39		"""
40
41	1	def encode(self, word, version=2):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'encode' method Loading history...
42		"""Return the Caverphone code for a word.
43
44		:param str word: the word to transform
45		:param int version: the version of Caverphone to employ for encoding
46		(defaults to 2)
47		:returns: the Caverphone value
48		:rtype: str
49
50		>>> pe = Caverphone()
51		>>> pe.encode('Christopher')
52		'KRSTFA1111'
53		>>> pe.encode('Niall')
54		'NA11111111'
55		>>> pe.encode('Smith')
56		'SMT1111111'
57		>>> pe.encode('Schmidt')
58		'SKMT111111'
59
60		>>> pe.encode('Christopher', 1)
61		'KRSTF1'
62		>>> pe.encode('Niall', 1)
63		'N11111'
64		>>> pe.encode('Smith', 1)
65		'SMT111'
66		>>> pe.encode('Schmidt', 1)
67		'SKMT11'
68		"""
69	1	word = word.lower()
70	1	word = ''.join(c for c in word if c in self._lc_set)
71
72	1	def _squeeze_replace(word, char):
73		"""Convert strings of char in word to one instance of new_char."""
74	1	while char * 2 in word:
75	1	word = word.replace(char * 2, char)
76	1	return word.replace(char, char.upper())
77
78		# the main replacement algorithm
79	1	if version != 1 and word[-1:] == 'e':
80	1	word = word[:-1]
81	1	if word:
82	1	if word[:5] == 'cough':
83	1	word = 'cou2f' + word[5:]
84	1	if word[:5] == 'rough':
85	1	word = 'rou2f' + word[5:]
86	1	if word[:5] == 'tough':
87	1	word = 'tou2f' + word[5:]
88	1	if word[:6] == 'enough':
89	1	word = 'enou2f' + word[6:]
90	1	if version != 1 and word[:6] == 'trough':
91	1	word = 'trou2f' + word[6:]
92	1	if word[:2] == 'gn':
93	1	word = '2n' + word[2:]
94	1	if word[-2:] == 'mb':
95	1	word = word[:-1] + '2'
96	1	for src, tar in (
97		('cq', '2q'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
98		('ci', 'si'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
99		('ce', 'se'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
100		('cy', 'sy'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
101		('tch', '2ch'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
102		('c', 'k'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
103		('q', 'k'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
104		('x', 'k'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
105		('v', 'f'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
106		('dg', '2g'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
107		('tio', 'sio'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
108		('tia', 'sia'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
109		('d', 't'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
110		('ph', 'fh'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
111		('b', 'p'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
112		('sh', 's2'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
113		('z', 's'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
114		):
115	1	word = word.replace(src, tar)
116	1	if word[0] in self._lc_v_set:
117	1	word = 'A' + word[1:]
118	1	for vowel in 'aeiou':
119	1	word = word.replace(vowel, '3')
120	1	if version != 1:
121	1	word = word.replace('j', 'y')
122	1	if word[:2] == 'y3':
123	1	word = 'Y3' + word[2:]
124	1	if word[:1] == 'y':
125	1	word = 'A' + word[1:]
126	1	word = word.replace('y', '3')
127	1	for src, tar in (('3gh3', '3kh3'), ('gh', '22'), ('g', 'k')):
128	1	word = word.replace(src, tar)
129
130	1	for char in 'stpkfmn':
131	1	word = _squeeze_replace(word, char)
132
133	1	word = word.replace('w3', 'W3')
134	1	if version == 1:
135	1	word = word.replace('wy', 'Wy')
136	1	word = word.replace('wh3', 'Wh3')
137	1	if version == 1:
138	1	word = word.replace('why', 'Why')
139	1	if version != 1 and word[-1:] == 'w':
140	1	word = word[:-1] + '3'
141	1	word = word.replace('w', '2')
142	1	if word[:1] == 'h':
143	1	word = 'A' + word[1:]
144	1	word = word.replace('h', '2')
145	1	word = word.replace('r3', 'R3')
146	1	if version == 1:
147	1	word = word.replace('ry', 'Ry')
148	1	if version != 1 and word[-1:] == 'r':
149	1	word = word[:-1] + '3'
150	1	word = word.replace('r', '2')
151	1	word = word.replace('l3', 'L3')
152	1	if version == 1:
153	1	word = word.replace('ly', 'Ly')
154	1	if version != 1 and word[-1:] == 'l':
155	1	word = word[:-1] + '3'
156	1	word = word.replace('l', '2')
157	1	if version == 1:
158	1	word = word.replace('j', 'y')
159	1	word = word.replace('y3', 'Y3')
160	1	word = word.replace('y', '2')
161	1	word = word.replace('2', '')
162	1	if version != 1 and word[-1:] == '3':
163	1	word = word[:-1] + 'A'
164	1	word = word.replace('3', '')
165
166		# pad with 1s, then extract the necessary length of code
167	1	word += '1' * 10
168	1	if version != 1:
169	1	word = word[:10]
170		else:
171	1	word = word[:6]
172
173	1	return word
174
175
176	1	def caverphone(word, version=2):
177		"""Return the Caverphone code for a word.
178
179		This is a wrapper for :py:meth:`Caverphone.encode`.
180
181		:param str word: the word to transform
182		:param int version: the version of Caverphone to employ for encoding
183		(defaults to 2)
184		:returns: the Caverphone value
185		:rtype: str
186
187		>>> caverphone('Christopher')
188		'KRSTFA1111'
189		>>> caverphone('Niall')
190		'NA11111111'
191		>>> caverphone('Smith')
192		'SMT1111111'
193		>>> caverphone('Schmidt')
194		'SKMT111111'
195
196		>>> caverphone('Christopher', 1)
197		'KRSTF1'
198		>>> caverphone('Niall', 1)
199		'N11111'
200		>>> caverphone('Smith', 1)
201		'SMT111'
202		>>> caverphone('Schmidt', 1)
203		'SKMT11'
204		"""
205	1	return Caverphone().encode(word, version)
206
207
208		if __name__ == '__main__':
209		import doctest
210
211		doctest.testmod()
212

chrislit / abydos

Pull Request — master (#135)

abydos.phonetic._caverphone A

Complexity

Size/Duplication

Test Coverage

Importance

1 Method

1 Function

Duplication Side-by-Side

Filter issues like