abydos.phonetic._Metaphone.Metaphone.encode() - Code Metrics - Inspection of "0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#141)

by Chris

created 2018-11-10 01:31 UTC

abydos.phonetic._Metaphone.Metaphone.encode() F

↳ Parent: abydos.phonetic._Metaphone

Complexity

Conditions

Size

Total Lines	200
Code Lines	128

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	99
CRAP Score	80

Importance

Changes

Metric	Value
cc	80
eloc	128
nop	3
dl	0
loc	200
ccs	99
cts	99
cp	1
crap	80
rs	0
c	0
b	0
f	0

How to fix Long Method Complexity

# -*- coding: utf-8 -*-


# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._Metaphone.

Metaphone
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from six.moves import range

from ._Phonetic import Phonetic

__all__ = ['Metaphone', 'metaphone']


class Metaphone(Phonetic):

    """Metaphone.

    Based on Lawrence Philips' Pick BASIC code from 1990 :cite:`Philips:1990`,
    as described in :cite:`Philips:1990b`.
    This incorporates some corrections to the above code, particularly
    some of those suggested by Michael Kuhn in :cite:`Kuhn:1995`.
    """

    _frontv = {'E', 'I', 'Y'}
    _varson = {'C', 'G', 'P', 'S', 'T'}

    def encode(self, word, max_length=-1):

        """Return the Metaphone code for a word.

        Based on Lawrence Philips' Pick BASIC code from 1990
        :cite:`Philips:1990`, as described in :cite:`Philips:1990b`.
        This incorporates some corrections to the above code, particularly
        some of those suggested by Michael Kuhn in :cite:`Kuhn:1995`.

        Args:
            word (str): The word to transform
            max_length (int): The maximum length of the returned Metaphone
                code (defaults to 64, but in Philips' original implementation
                this was 4)

        Returns:
            str: The Metaphone value

        Examples:
            >>> pe = Metaphone()
            >>> pe.encode('Christopher')
            'KRSTFR'
            >>> pe.encode('Niall')
            'NL'
            >>> pe.encode('Smith')
            'SM0'
            >>> pe.encode('Schmidt')
            'SKMTT'

        """
        # Require a max_length of at least 4
        if max_length != -1:
            max_length = max(4, max_length)
        else:
            max_length = 64

        # As in variable sound--those modified by adding an "h"
        ename = ''.join(c for c in word.upper() if c.isalnum())
        ename = ename.replace('ß', 'SS')

        # Delete non-alphanumeric characters and make all caps
        if not ename:
            return ''
        if ename[0:2] in {'PN', 'AE', 'KN', 'GN', 'WR'}:
            ename = ename[1:]
        elif ename[0] == 'X':
            ename = 'S' + ename[1:]
        elif ename[0:2] == 'WH':
            ename = 'W' + ename[2:]

        # Convert to metaphone
        elen = len(ename) - 1
        metaph = ''
        for i in range(len(ename)):

            if len(metaph) >= max_length:
                break
            if (
                ename[i] not in {'G', 'T'}

                and i > 0

                and ename[i - 1] == ename[i]

            ):
                continue

            if ename[i] in self._uc_v_set and i == 0:
                metaph = ename[i]

            elif ename[i] == 'B':
                if i != elen or ename[i - 1] != 'M':
                    metaph += ename[i]

            elif ename[i] == 'C':
                if not (
                    i > 0

                    and ename[i - 1] == 'S'

                    and ename[i + 1 : i + 2] in self._frontv

                ):
                    if ename[i + 1 : i + 3] == 'IA':
                        metaph += 'X'
                    elif ename[i + 1 : i + 2] in self._frontv:
                        metaph += 'S'
                    elif i > 0 and ename[i - 1 : i + 2] == 'SCH':
                        metaph += 'K'
                    elif ename[i + 1 : i + 2] == 'H':
                        if (
                            i == 0

                            and i + 1 < elen

                            and ename[i + 2 : i + 3] not in self._uc_v_set

                        ):
                            metaph += 'K'
                        else:
                            metaph += 'X'
                    else:
                        metaph += 'K'

            elif ename[i] == 'D':
                if (
                    ename[i + 1 : i + 2] == 'G'

                    and ename[i + 2 : i + 3] in self._frontv

                ):
                    metaph += 'J'
                else:
                    metaph += 'T'

            elif ename[i] == 'G':
                if ename[i + 1 : i + 2] == 'H' and not (
                    i + 1 == elen or ename[i + 2 : i + 3] not in self._uc_v_set

                ):
                    continue
                elif i > 0 and (
                    (i + 1 == elen and ename[i + 1] == 'N')

                    or (i + 3 == elen and ename[i + 1 : i + 4] == 'NED')

                ):
                    continue
                elif (
                    i - 1 > 0

                    and i + 1 <= elen

                    and ename[i - 1] == 'D'

                    and ename[i + 1] in self._frontv

                ):
                    continue
                elif ename[i + 1 : i + 2] == 'G':
                    continue
                elif ename[i + 1 : i + 2] in self._frontv:
                    if i == 0 or ename[i - 1] != 'G':
                        metaph += 'J'
                    else:
                        metaph += 'K'
                else:
                    metaph += 'K'

            elif ename[i] == 'H':
                if (
                    i > 0

                    and ename[i - 1] in self._uc_v_set

                    and ename[i + 1 : i + 2] not in self._uc_v_set

                ):
                    continue
                elif i > 0 and ename[i - 1] in self._varson:
                    continue
                else:
                    metaph += 'H'

            elif ename[i] in {'F', 'J', 'L', 'M', 'N', 'R'}:
                metaph += ename[i]

            elif ename[i] == 'K':
                if i > 0 and ename[i - 1] == 'C':
                    continue
                else:
                    metaph += 'K'

            elif ename[i] == 'P':
                if ename[i + 1 : i + 2] == 'H':
                    metaph += 'F'
                else:
                    metaph += 'P'

            elif ename[i] == 'Q':
                metaph += 'K'

            elif ename[i] == 'S':
                if (
                    i > 0

                    and i + 2 <= elen

                    and ename[i + 1] == 'I'

                    and ename[i + 2] in 'OA'

                ):
                    metaph += 'X'
                elif ename[i + 1 : i + 2] == 'H':
                    metaph += 'X'
                else:
                    metaph += 'S'

            elif ename[i] == 'T':
                if (
                    i > 0

                    and i + 2 <= elen

                    and ename[i + 1] == 'I'

                    and ename[i + 2] in {'A', 'O'}

                ):
                    metaph += 'X'
                elif ename[i + 1 : i + 2] == 'H':
                    metaph += '0'
                elif ename[i + 1 : i + 3] != 'CH':
                    if ename[i - 1 : i] != 'T':
                        metaph += 'T'

            elif ename[i] == 'V':
                metaph += 'F'

            elif ename[i] in 'WY':
                if ename[i + 1 : i + 2] in self._uc_v_set:
                    metaph += ename[i]

            elif ename[i] == 'X':
                metaph += 'KS'

            elif ename[i] == 'Z':
                metaph += 'S'

        return metaph


def metaphone(word, max_length=-1):
    """Return the Metaphone code for a word.

    This is a wrapper for :py:meth:`Metaphone.encode`.

    Args:
        word (str): The word to transform
        max_length (int): The maximum length of the returned Metaphone
            code (defaults to 64, but in Philips' original implementation
            this was 4)

    Returns:
        str: The Metaphone value

    Examples:
        >>> metaphone('Christopher')
        'KRSTFR'
        >>> metaphone('Niall')
        'NL'
        >>> metaphone('Smith')
        'SM0'
        >>> metaphone('Schmidt')
        'SKMTT'

    """
    return Metaphone().encode(word, max_length)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
		0 ignored issues – show Coding Style Naming introduced 2018-11-10 01:42 UTC by Report Bug Copy Issue Report The name `_Metaphone` does not conform to the module naming conventions (`(([a-z_][a-z0-9_]*)\|([A-Z][a-zA-Z0-9]+))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._Metaphone.
20
21		Metaphone
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from six.moves import range
32
33	1	from ._Phonetic import Phonetic
34
35	1	__all__ = ['Metaphone', 'metaphone']
36
37
38	1	class Metaphone(Phonetic):
		0 ignored issues – show Unused Code introduced 2018-11-10 01:42 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
39		"""Metaphone.
40
41		Based on Lawrence Philips' Pick BASIC code from 1990 :cite:`Philips:1990`,
42		as described in :cite:`Philips:1990b`.
43		This incorporates some corrections to the above code, particularly
44		some of those suggested by Michael Kuhn in :cite:`Kuhn:1995`.
45		"""
46
47	1	_frontv = {'E', 'I', 'Y'}
48	1	_varson = {'C', 'G', 'P', 'S', 'T'}
49
50	1	def encode(self, word, max_length=-1):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'encode' method Loading history...
51		"""Return the Metaphone code for a word.
52
53		Based on Lawrence Philips' Pick BASIC code from 1990
54		:cite:`Philips:1990`, as described in :cite:`Philips:1990b`.
55		This incorporates some corrections to the above code, particularly
56		some of those suggested by Michael Kuhn in :cite:`Kuhn:1995`.
57
58		Args:
59		word (str): The word to transform
60		max_length (int): The maximum length of the returned Metaphone
61		code (defaults to 64, but in Philips' original implementation
62		this was 4)
63
64		Returns:
65		str: The Metaphone value
66
67		Examples:
68		>>> pe = Metaphone()
69		>>> pe.encode('Christopher')
70		'KRSTFR'
71		>>> pe.encode('Niall')
72		'NL'
73		>>> pe.encode('Smith')
74		'SM0'
75		>>> pe.encode('Schmidt')
76		'SKMTT'
77
78		"""
79		# Require a max_length of at least 4
80	1	if max_length != -1:
81	1	max_length = max(4, max_length)
82		else:
83	1	max_length = 64
84
85		# As in variable sound--those modified by adding an "h"
86	1	ename = ''.join(c for c in word.upper() if c.isalnum())
87	1	ename = ename.replace('ß', 'SS')
88
89		# Delete non-alphanumeric characters and make all caps
90	1	if not ename:
91	1	return ''
92	1	if ename[0:2] in {'PN', 'AE', 'KN', 'GN', 'WR'}:
93	1	ename = ename[1:]
94	1	elif ename[0] == 'X':
95	1	ename = 'S' + ename[1:]
96	1	elif ename[0:2] == 'WH':
97	1	ename = 'W' + ename[2:]
98
99		# Convert to metaphone
100	1	elen = len(ename) - 1
101	1	metaph = ''
102	1	for i in range(len(ename)):
		0 ignored issues – show unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Consider using enumerate instead of iterating with range and len Loading history...
103	1	if len(metaph) >= max_length:
104	1	break
105	1	if (
106		ename[i] not in {'G', 'T'}
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
107		and i > 0
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
108		and ename[i - 1] == ename[i]
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
109		):
110	1	continue
111
112	1	if ename[i] in self._uc_v_set and i == 0:
113	1	metaph = ename[i]
114
115	1	elif ename[i] == 'B':
116	1	if i != elen or ename[i - 1] != 'M':
117	1	metaph += ename[i]
118
119	1	elif ename[i] == 'C':
120	1	if not (
121		i > 0
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
122		and ename[i - 1] == 'S'
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
123		and ename[i + 1 : i + 2] in self._frontv
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
124		):
125	1	if ename[i + 1 : i + 3] == 'IA':
126	1	metaph += 'X'
127	1	elif ename[i + 1 : i + 2] in self._frontv:
128	1	metaph += 'S'
129	1	elif i > 0 and ename[i - 1 : i + 2] == 'SCH':
130	1	metaph += 'K'
131	1	elif ename[i + 1 : i + 2] == 'H':
132	1	if (
133		i == 0
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
134		and i + 1 < elen
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
135		and ename[i + 2 : i + 3] not in self._uc_v_set
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
136		):
137	1	metaph += 'K'
138		else:
139	1	metaph += 'X'
140		else:
141	1	metaph += 'K'
142
143	1	elif ename[i] == 'D':
144	1	if (
145		ename[i + 1 : i + 2] == 'G'
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
146		and ename[i + 2 : i + 3] in self._frontv
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
147		):
148	1	metaph += 'J'
149		else:
150	1	metaph += 'T'
151
152	1	elif ename[i] == 'G':
153	1	if ename[i + 1 : i + 2] == 'H' and not (
154		i + 1 == elen or ename[i + 2 : i + 3] not in self._uc_v_set
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
155		):
156	1	continue
157	1	elif i > 0 and (
158		(i + 1 == elen and ename[i + 1] == 'N')
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
159		or (i + 3 == elen and ename[i + 1 : i + 4] == 'NED')
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
160		):
161	1	continue
162	1	elif (
163		i - 1 > 0
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
164		and i + 1 <= elen
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
165		and ename[i - 1] == 'D'
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
166		and ename[i + 1] in self._frontv
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
167		):
168	1	continue
169	1	elif ename[i + 1 : i + 2] == 'G':
170	1	continue
171	1	elif ename[i + 1 : i + 2] in self._frontv:
172	1	if i == 0 or ename[i - 1] != 'G':
173	1	metaph += 'J'
174		else:
175	1	metaph += 'K'
176		else:
177	1	metaph += 'K'
178
179	1	elif ename[i] == 'H':
180	1	if (
181		i > 0
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
182		and ename[i - 1] in self._uc_v_set
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
183		and ename[i + 1 : i + 2] not in self._uc_v_set
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
184		):
185	1	continue
186	1	elif i > 0 and ename[i - 1] in self._varson:
187	1	continue
188		else:
189	1	metaph += 'H'
190
191	1	elif ename[i] in {'F', 'J', 'L', 'M', 'N', 'R'}:
192	1	metaph += ename[i]
193
194	1	elif ename[i] == 'K':
195	1	if i > 0 and ename[i - 1] == 'C':
196	1	continue
197		else:
198	1	metaph += 'K'
199
200	1	elif ename[i] == 'P':
201	1	if ename[i + 1 : i + 2] == 'H':
202	1	metaph += 'F'
203		else:
204	1	metaph += 'P'
205
206	1	elif ename[i] == 'Q':
207	1	metaph += 'K'
208
209	1	elif ename[i] == 'S':
210	1	if (
211		i > 0
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
212		and i + 2 <= elen
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
213		and ename[i + 1] == 'I'
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
214		and ename[i + 2] in 'OA'
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
215		):
216	1	metaph += 'X'
217	1	elif ename[i + 1 : i + 2] == 'H':
218	1	metaph += 'X'
219		else:
220	1	metaph += 'S'
221
222	1	elif ename[i] == 'T':
223	1	if (
224		i > 0
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
225		and i + 2 <= elen
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
226		and ename[i + 1] == 'I'
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
227		and ename[i + 2] in {'A', 'O'}
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
228		):
229	1	metaph += 'X'
230	1	elif ename[i + 1 : i + 2] == 'H':
231	1	metaph += '0'
232	1	elif ename[i + 1 : i + 3] != 'CH':
233	1	if ename[i - 1 : i] != 'T':
234	1	metaph += 'T'
235
236	1	elif ename[i] == 'V':
237	1	metaph += 'F'
238
239	1	elif ename[i] in 'WY':
240	1	if ename[i + 1 : i + 2] in self._uc_v_set:
241	1	metaph += ename[i]
242
243	1	elif ename[i] == 'X':
244	1	metaph += 'KS'
245
246	1	elif ename[i] == 'Z':
247	1	metaph += 'S'
248
249	1	return metaph
250
251
252	1	def metaphone(word, max_length=-1):
253		"""Return the Metaphone code for a word.
254
255		This is a wrapper for :py:meth:`Metaphone.encode`.
256
257		Args:
258		word (str): The word to transform
259		max_length (int): The maximum length of the returned Metaphone
260		code (defaults to 64, but in Philips' original implementation
261		this was 4)
262
263		Returns:
264		str: The Metaphone value
265
266		Examples:
267		>>> metaphone('Christopher')
268		'KRSTFR'
269		>>> metaphone('Niall')
270		'NL'
271		>>> metaphone('Smith')
272		'SM0'
273		>>> metaphone('Schmidt')
274		'SKMTT'
275
276		"""
277	1	return Metaphone().encode(word, max_length)
278
279
280		if __name__ == '__main__':
281		import doctest
282
283		doctest.testmod()
284

chrislit / abydos

Pull Request — master (#141)

abydos.phonetic._Metaphone.Metaphone.encode() F

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like