abydos.phonetic._caverphone.caverphone() - Code Metrics - Inspection of "Merge pull request #149 from chrislit/0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( f43547...71985b )

by Chris

created 2018-11-17 08:52 UTC

abydos.phonetic._caverphone.caverphone() A

↳ Parent: abydos.phonetic._caverphone

Complexity

Conditions

Size

Total Lines	39
Code Lines	2

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	2
CRAP Score	1

Importance

Changes

Metric	Value
cc	1
eloc	2
nop	2
dl	0
loc	39
ccs	2
cts	2
cp	1
crap	1
rs	10
c	0
b	0
f	0

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._caverphone.

Caverphone phonetic algorithm
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from ._phonetic import _Phonetic

__all__ = ['Caverphone', 'caverphone']


class Caverphone(_Phonetic):

    """Caverphone.

    A description of version 1 of the algorithm can be found in
    :cite:`Hood:2002`.

    A description of version 2 of the algorithm can be found in
    :cite:`Hood:2004`.
    """

    def encode(self, word, version=2):

        """Return the Caverphone code for a word.

        Parameters
        ----------
        word : str
            The word to transform
        version : int
            The version of Caverphone to employ for encoding (defaults to 2)

        Returns
        -------
        str
            The Caverphone value

        Examples
        --------
        >>> pe = Caverphone()
        >>> pe.encode('Christopher')
        'KRSTFA1111'
        >>> pe.encode('Niall')
        'NA11111111'
        >>> pe.encode('Smith')
        'SMT1111111'
        >>> pe.encode('Schmidt')
        'SKMT111111'

        >>> pe.encode('Christopher', 1)
        'KRSTF1'
        >>> pe.encode('Niall', 1)
        'N11111'
        >>> pe.encode('Smith', 1)
        'SMT111'
        >>> pe.encode('Schmidt', 1)
        'SKMT11'

        """
        word = word.lower()
        word = ''.join(c for c in word if c in self._lc_set)

        def _squeeze_replace(word, char):
            """Convert strings of char in word to one instance.

            Parameters
            ----------
            word : str
                The partially converted word
            char : str
                A character to 'squeeze'

            Returns
            -------
            str
                The word with instances of char squeezed down to one

            """
            while char * 2 in word:
                word = word.replace(char * 2, char)
            return word.replace(char, char.upper())

        # the main replacement algorithm
        if version != 1 and word[-1:] == 'e':
            word = word[:-1]
        if word:
            if word[:5] == 'cough':
                word = 'cou2f' + word[5:]
            if word[:5] == 'rough':
                word = 'rou2f' + word[5:]
            if word[:5] == 'tough':
                word = 'tou2f' + word[5:]
            if word[:6] == 'enough':
                word = 'enou2f' + word[6:]
            if version != 1 and word[:6] == 'trough':
                word = 'trou2f' + word[6:]
            if word[:2] == 'gn':
                word = '2n' + word[2:]
            if word[-2:] == 'mb':
                word = word[:-1] + '2'
            for src, tar in (
                ('cq', '2q'),

                ('ci', 'si'),

                ('ce', 'se'),

                ('cy', 'sy'),

                ('tch', '2ch'),

                ('c', 'k'),

                ('q', 'k'),

                ('x', 'k'),

                ('v', 'f'),

                ('dg', '2g'),

                ('tio', 'sio'),

                ('tia', 'sia'),

                ('d', 't'),

                ('ph', 'fh'),

                ('b', 'p'),

                ('sh', 's2'),

                ('z', 's'),

            ):
                word = word.replace(src, tar)
            if word[0] in self._lc_v_set:
                word = 'A' + word[1:]
            for vowel in 'aeiou':
                word = word.replace(vowel, '3')
            if version != 1:
                word = word.replace('j', 'y')
                if word[:2] == 'y3':
                    word = 'Y3' + word[2:]
                if word[:1] == 'y':
                    word = 'A' + word[1:]
                word = word.replace('y', '3')
            for src, tar in (('3gh3', '3kh3'), ('gh', '22'), ('g', 'k')):
                word = word.replace(src, tar)

            for char in 'stpkfmn':
                word = _squeeze_replace(word, char)

            word = word.replace('w3', 'W3')
            if version == 1:
                word = word.replace('wy', 'Wy')
            word = word.replace('wh3', 'Wh3')
            if version == 1:
                word = word.replace('why', 'Why')
            if version != 1 and word[-1:] == 'w':
                word = word[:-1] + '3'
            word = word.replace('w', '2')
            if word[:1] == 'h':
                word = 'A' + word[1:]
            word = word.replace('h', '2')
            word = word.replace('r3', 'R3')
            if version == 1:
                word = word.replace('ry', 'Ry')
            if version != 1 and word[-1:] == 'r':
                word = word[:-1] + '3'
            word = word.replace('r', '2')
            word = word.replace('l3', 'L3')
            if version == 1:
                word = word.replace('ly', 'Ly')
            if version != 1 and word[-1:] == 'l':
                word = word[:-1] + '3'
            word = word.replace('l', '2')
            if version == 1:
                word = word.replace('j', 'y')
                word = word.replace('y3', 'Y3')
                word = word.replace('y', '2')
            word = word.replace('2', '')
            if version != 1 and word[-1:] == '3':
                word = word[:-1] + 'A'
            word = word.replace('3', '')

        # pad with 1s, then extract the necessary length of code
        word += '1' * 10
        if version != 1:
            word = word[:10]
        else:
            word = word[:6]

        return word


def caverphone(word, version=2):
    """Return the Caverphone code for a word.

    This is a wrapper for :py:meth:`Caverphone.encode`.

    Parameters
    ----------
    word : str
        The word to transform
    version : int
        The version of Caverphone to employ for encoding (defaults to 2)

    Returns
    -------
    str
        The Caverphone value

    Examples
    --------
    >>> caverphone('Christopher')
    'KRSTFA1111'
    >>> caverphone('Niall')
    'NA11111111'
    >>> caverphone('Smith')
    'SMT1111111'
    >>> caverphone('Schmidt')
    'SKMT111111'

    >>> caverphone('Christopher', 1)
    'KRSTF1'
    >>> caverphone('Niall', 1)
    'N11111'
    >>> caverphone('Smith', 1)
    'SMT111'
    >>> caverphone('Schmidt', 1)
    'SKMT11'

    """
    return Caverphone().encode(word, version)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._caverphone.
20
21		Caverphone phonetic algorithm
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from ._phonetic import _Phonetic
32
33	1	__all__ = ['Caverphone', 'caverphone']
34
35
36	1	class Caverphone(_Phonetic):
		0 ignored issues – show Unused Code introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
37		"""Caverphone.
38
39		A description of version 1 of the algorithm can be found in
40		:cite:`Hood:2002`.
41
42		A description of version 2 of the algorithm can be found in
43		:cite:`Hood:2004`.
44		"""
45
46	1	def encode(self, word, version=2):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'encode' method Loading history...
47		"""Return the Caverphone code for a word.
48
49		Parameters
50		----------
51		word : str
52		The word to transform
53		version : int
54		The version of Caverphone to employ for encoding (defaults to 2)
55
56		Returns
57		-------
58		str
59		The Caverphone value
60
61		Examples
62		--------
63		>>> pe = Caverphone()
64		>>> pe.encode('Christopher')
65		'KRSTFA1111'
66		>>> pe.encode('Niall')
67		'NA11111111'
68		>>> pe.encode('Smith')
69		'SMT1111111'
70		>>> pe.encode('Schmidt')
71		'SKMT111111'
72
73		>>> pe.encode('Christopher', 1)
74		'KRSTF1'
75		>>> pe.encode('Niall', 1)
76		'N11111'
77		>>> pe.encode('Smith', 1)
78		'SMT111'
79		>>> pe.encode('Schmidt', 1)
80		'SKMT11'
81
82		"""
83	1	word = word.lower()
84	1	word = ''.join(c for c in word if c in self._lc_set)
85
86	1	def _squeeze_replace(word, char):
87		"""Convert strings of char in word to one instance.
88
89		Parameters
90		----------
91		word : str
92		The partially converted word
93		char : str
94		A character to 'squeeze'
95
96		Returns
97		-------
98		str
99		The word with instances of char squeezed down to one
100
101		"""
102	1	while char * 2 in word:
103	1	word = word.replace(char * 2, char)
104	1	return word.replace(char, char.upper())
105
106		# the main replacement algorithm
107	1	if version != 1 and word[-1:] == 'e':
108	1	word = word[:-1]
109	1	if word:
110	1	if word[:5] == 'cough':
111	1	word = 'cou2f' + word[5:]
112	1	if word[:5] == 'rough':
113	1	word = 'rou2f' + word[5:]
114	1	if word[:5] == 'tough':
115	1	word = 'tou2f' + word[5:]
116	1	if word[:6] == 'enough':
117	1	word = 'enou2f' + word[6:]
118	1	if version != 1 and word[:6] == 'trough':
119	1	word = 'trou2f' + word[6:]
120	1	if word[:2] == 'gn':
121	1	word = '2n' + word[2:]
122	1	if word[-2:] == 'mb':
123	1	word = word[:-1] + '2'
124	1	for src, tar in (
125		('cq', '2q'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
126		('ci', 'si'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
127		('ce', 'se'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
128		('cy', 'sy'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
129		('tch', '2ch'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
130		('c', 'k'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
131		('q', 'k'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
132		('x', 'k'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
133		('v', 'f'),
		0 ignored issues – show Coding Style introduced 2018-11-14 09:24 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
134		('dg', '2g'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
135		('tio', 'sio'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
136		('tia', 'sia'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
137		('d', 't'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
138		('ph', 'fh'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
139		('b', 'p'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
140		('sh', 's2'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
141		('z', 's'),
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
142		):
143	1	word = word.replace(src, tar)
144	1	if word[0] in self._lc_v_set:
145	1	word = 'A' + word[1:]
146	1	for vowel in 'aeiou':
147	1	word = word.replace(vowel, '3')
148	1	if version != 1:
149	1	word = word.replace('j', 'y')
150	1	if word[:2] == 'y3':
151	1	word = 'Y3' + word[2:]
152	1	if word[:1] == 'y':
153	1	word = 'A' + word[1:]
154	1	word = word.replace('y', '3')
155	1	for src, tar in (('3gh3', '3kh3'), ('gh', '22'), ('g', 'k')):
156	1	word = word.replace(src, tar)
157
158	1	for char in 'stpkfmn':
159	1	word = _squeeze_replace(word, char)
160
161	1	word = word.replace('w3', 'W3')
162	1	if version == 1:
163	1	word = word.replace('wy', 'Wy')
164	1	word = word.replace('wh3', 'Wh3')
165	1	if version == 1:
166	1	word = word.replace('why', 'Why')
167	1	if version != 1 and word[-1:] == 'w':
168	1	word = word[:-1] + '3'
169	1	word = word.replace('w', '2')
170	1	if word[:1] == 'h':
171	1	word = 'A' + word[1:]
172	1	word = word.replace('h', '2')
173	1	word = word.replace('r3', 'R3')
174	1	if version == 1:
175	1	word = word.replace('ry', 'Ry')
176	1	if version != 1 and word[-1:] == 'r':
177	1	word = word[:-1] + '3'
178	1	word = word.replace('r', '2')
179	1	word = word.replace('l3', 'L3')
180	1	if version == 1:
181	1	word = word.replace('ly', 'Ly')
182	1	if version != 1 and word[-1:] == 'l':
183	1	word = word[:-1] + '3'
184	1	word = word.replace('l', '2')
185	1	if version == 1:
186	1	word = word.replace('j', 'y')
187	1	word = word.replace('y3', 'Y3')
188	1	word = word.replace('y', '2')
189	1	word = word.replace('2', '')
190	1	if version != 1 and word[-1:] == '3':
191	1	word = word[:-1] + 'A'
192	1	word = word.replace('3', '')
193
194		# pad with 1s, then extract the necessary length of code
195	1	word += '1' * 10
196	1	if version != 1:
197	1	word = word[:10]
198		else:
199	1	word = word[:6]
200
201	1	return word
202
203
204	1	def caverphone(word, version=2):
205		"""Return the Caverphone code for a word.
206
207		This is a wrapper for :py:meth:`Caverphone.encode`.
208
209		Parameters
210		----------
211		word : str
212		The word to transform
213		version : int
214		The version of Caverphone to employ for encoding (defaults to 2)
215
216		Returns
217		-------
218		str
219		The Caverphone value
220
221		Examples
222		--------
223		>>> caverphone('Christopher')
224		'KRSTFA1111'
225		>>> caverphone('Niall')
226		'NA11111111'
227		>>> caverphone('Smith')
228		'SMT1111111'
229		>>> caverphone('Schmidt')
230		'SKMT111111'
231
232		>>> caverphone('Christopher', 1)
233		'KRSTF1'
234		>>> caverphone('Niall', 1)
235		'N11111'
236		>>> caverphone('Smith', 1)
237		'SMT111'
238		>>> caverphone('Schmidt', 1)
239		'SKMT11'
240
241		"""
242	1	return Caverphone().encode(word, version)
243
244
245		if __name__ == '__main__':
246		import doctest
247
248		doctest.testmod()
249

chrislit / abydos

Push — master ( f43547...71985b )

abydos.phonetic._caverphone.caverphone() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like