abydos.phonetic._spanish_metaphone - Code Metrics - Inspection of "Merge pull request #149 from chrislit/0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( f43547...71985b )

by Chris

created 2018-11-17 08:52 UTC

abydos.phonetic._spanish_metaphone A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	276
Duplicated Lines	0 %

Test Coverage

Coverage

100%

Importance

Changes

Metric	Value
eloc	117
dl	0
loc	276
ccs	96
cts	96
cp	1
rs	10
c	0
b	0
f	0
wmc	30

1 Function

Rating	Name	Duplication	Size	Complexity
A	spanish_metaphone()	0	35	1

1 Method

Rating	Name	Duplication	Size	Complexity
F	SpanishMetaphone.encode()	0	183	29

# -*- coding: utf-8 -*-

# Copyright 2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._spanish_metaphone.

Spanish Metaphone
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from unicodedata import normalize as unicode_normalize

from six import text_type

from ._phonetic import _Phonetic

__all__ = ['SpanishMetaphone', 'spanish_metaphone']


class SpanishMetaphone(_Phonetic):

    """Spanish Metaphone.

    This is a quick rewrite of the Spanish Metaphone Algorithm, as presented at
    https://github.com/amsqr/Spanish-Metaphone and discussed in
    :cite:`Mosquera:2012`.

    Modified version based on :cite:`delPilarAngeles:2016`.
    """

    def encode(self, word, max_length=6, modified=False):

        """Return the Spanish Metaphone of a word.

        Parameters
        ----------
        word : str
            The word to transform
        max_length : int
            The length of the code returned (defaults to 6)
        modified : bool
            Set to True to use del Pilar Angeles & Bailón-Miguel's modified
            version of the algorithm

        Returns
        -------
        str
            The Spanish Metaphone code

        Examples
        --------
        >>> pe = SpanishMetaphone()
        >>> pe.encode('Perez')
        'PRZ'
        >>> pe.encode('Martinez')
        'MRTNZ'
        >>> pe.encode('Gutierrez')
        'GTRRZ'
        >>> pe.encode('Santiago')
        'SNTG'
        >>> pe.encode('Nicolás')
        'NKLS'

        """

        def _is_vowel(pos):
            """Return True if the character at word[pos] is a vowel.

            Parameters
            ----------
            pos : int
                Position to check for a vowel

            Returns
            -------
            bool
                True if word[pos] is a vowel

            """
            return pos < len(word) and word[pos] in {'A', 'E', 'I', 'O', 'U'}

        word = unicode_normalize('NFC', text_type(word.upper()))

        meta_key = ''
        pos = 0

        # do some replacements for the modified version
        if modified:
            word = word.replace('MB', 'NB')
            word = word.replace('MP', 'NP')
            word = word.replace('BS', 'S')
            if word[:2] == 'PS':
                word = word[1:]

        # simple replacements
        word = word.replace('Á', 'A')
        word = word.replace('CH', 'X')
        word = word.replace('Ç', 'S')
        word = word.replace('É', 'E')
        word = word.replace('Í', 'I')
        word = word.replace('Ó', 'O')
        word = word.replace('Ú', 'U')
        word = word.replace('Ñ', 'NY')
        word = word.replace('GÜ', 'W')
        word = word.replace('Ü', 'U')
        word = word.replace('B', 'V')
        word = word.replace('LL', 'Y')

        while len(meta_key) < max_length:
            if pos >= len(word):
                break

            # get the next character
            current_char = word[pos]

            # if a vowel in pos 0, add to key
            if _is_vowel(pos) and pos == 0:
                meta_key += current_char
                pos += 1
            # otherwise, do consonant rules
            else:
                # simple consonants (unmutated)
                if current_char in {
                    'D',

                    'F',

                    'J',

                    'K',

                    'M',

                    'N',

                    'P',

                    'T',

                    'V',

                    'L',

                    'Y',

                }:
                    meta_key += current_char
                    # skip doubled consonants
                    if word[pos + 1 : pos + 2] == current_char:
                        pos += 2
                    else:
                        pos += 1
                else:
                    if current_char == 'C':
                        # special case 'acción', 'reacción',etc.
                        if word[pos + 1 : pos + 2] == 'C':
                            meta_key += 'X'
                            pos += 2
                        # special case 'cesar', 'cien', 'cid', 'conciencia'
                        elif word[pos + 1 : pos + 2] in {'E', 'I'}:
                            meta_key += 'Z'
                            pos += 2
                        # base case
                        else:
                            meta_key += 'K'
                            pos += 1
                    elif current_char == 'G':
                        # special case 'gente', 'ecologia',etc
                        if word[pos + 1 : pos + 2] in {'E', 'I'}:
                            meta_key += 'J'
                            pos += 2
                        # base case
                        else:
                            meta_key += 'G'
                            pos += 1
                    elif current_char == 'H':
                        # since the letter 'H' is silent in Spanish,
                        # set the meta key to the vowel after the letter 'H'
                        if _is_vowel(pos + 1):
                            meta_key += word[pos + 1]
                            pos += 2
                        else:
                            meta_key += 'H'
                            pos += 1
                    elif current_char == 'Q':
                        if word[pos + 1 : pos + 2] == 'U':
                            pos += 2
                        else:
                            pos += 1
                        meta_key += 'K'
                    elif current_char == 'W':
                        meta_key += 'U'
                        pos += 1
                    elif current_char == 'R':
                        meta_key += 'R'
                        pos += 1
                    elif current_char == 'S':
                        if not _is_vowel(pos + 1) and pos == 0:
                            meta_key += 'ES'
                            pos += 1
                        else:
                            meta_key += 'S'
                            pos += 1
                    elif current_char == 'Z':
                        meta_key += 'Z'
                        pos += 1
                    elif current_char == 'X':
                        if (
                            len(word) > 1

                            and pos == 0

                            and not _is_vowel(pos + 1)

                        ):
                            meta_key += 'EX'
                            pos += 1
                        else:
                            meta_key += 'X'
                            pos += 1
                    else:
                        pos += 1

        # Final change from S to Z in modified version
        if modified:
            meta_key = meta_key.replace('S', 'Z')

        return meta_key


def spanish_metaphone(word, max_length=6, modified=False):
    """Return the Spanish Metaphone of a word.

    This is a wrapper for :py:meth:`SpanishMetaphone.encode`.

    Parameters
    ----------
    word : str
        The word to transform
    max_length : int
        The length of the code returned (defaults to 6)
    modified : bool
        Set to True to use del Pilar Angeles & Bailón-Miguel's modified version
        of the algorithm

    Returns
    -------
    str
        The Spanish Metaphone code

    Examples
    --------
    >>> spanish_metaphone('Perez')
    'PRZ'
    >>> spanish_metaphone('Martinez')
    'MRTNZ'
    >>> spanish_metaphone('Gutierrez')
    'GTRRZ'
    >>> spanish_metaphone('Santiago')
    'SNTG'
    >>> spanish_metaphone('Nicolás')
    'NKLS'

    """
    return SpanishMetaphone().encode(word, max_length, modified)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._spanish_metaphone.
20
21		Spanish Metaphone
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from unicodedata import normalize as unicode_normalize
32
33	1	from six import text_type
34
35	1	from ._phonetic import _Phonetic
36
37	1	__all__ = ['SpanishMetaphone', 'spanish_metaphone']
38
39
40	1	class SpanishMetaphone(_Phonetic):
		0 ignored issues – show Unused Code introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
41		"""Spanish Metaphone.
42
43		This is a quick rewrite of the Spanish Metaphone Algorithm, as presented at
44		https://github.com/amsqr/Spanish-Metaphone and discussed in
45		:cite:`Mosquera:2012`.
46
47		Modified version based on :cite:`delPilarAngeles:2016`.
48		"""
49
50	1	def encode(self, word, max_length=6, modified=False):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'encode' method Loading history...
51		"""Return the Spanish Metaphone of a word.
52
53		Parameters
54		----------
55		word : str
56		The word to transform
57		max_length : int
58		The length of the code returned (defaults to 6)
59		modified : bool
60		Set to True to use del Pilar Angeles & Bailón-Miguel's modified
61		version of the algorithm
62
63		Returns
64		-------
65		str
66		The Spanish Metaphone code
67
68		Examples
69		--------
70		>>> pe = SpanishMetaphone()
71		>>> pe.encode('Perez')
72		'PRZ'
73		>>> pe.encode('Martinez')
74		'MRTNZ'
75		>>> pe.encode('Gutierrez')
76		'GTRRZ'
77		>>> pe.encode('Santiago')
78		'SNTG'
79		>>> pe.encode('Nicolás')
80		'NKLS'
81
82		"""
83
84	1	def _is_vowel(pos):
85		"""Return True if the character at word[pos] is a vowel.
86
87		Parameters
88		----------
89		pos : int
90		Position to check for a vowel
91
92		Returns
93		-------
94		bool
95		True if word[pos] is a vowel
96
97		"""
98	1	return pos < len(word) and word[pos] in {'A', 'E', 'I', 'O', 'U'}
99
100	1	word = unicode_normalize('NFC', text_type(word.upper()))
101
102	1	meta_key = ''
103	1	pos = 0
104
105		# do some replacements for the modified version
106	1	if modified:
107	1	word = word.replace('MB', 'NB')
108	1	word = word.replace('MP', 'NP')
109	1	word = word.replace('BS', 'S')
110	1	if word[:2] == 'PS':
111	1	word = word[1:]
112
113		# simple replacements
114	1	word = word.replace('Á', 'A')
115	1	word = word.replace('CH', 'X')
116	1	word = word.replace('Ç', 'S')
117	1	word = word.replace('É', 'E')
118	1	word = word.replace('Í', 'I')
119	1	word = word.replace('Ó', 'O')
120	1	word = word.replace('Ú', 'U')
121	1	word = word.replace('Ñ', 'NY')
122	1	word = word.replace('GÜ', 'W')
123	1	word = word.replace('Ü', 'U')
124	1	word = word.replace('B', 'V')
125	1	word = word.replace('LL', 'Y')
126
127	1	while len(meta_key) < max_length:
128	1	if pos >= len(word):
129	1	break
130
131		# get the next character
132	1	current_char = word[pos]
133
134		# if a vowel in pos 0, add to key
135	1	if _is_vowel(pos) and pos == 0:
136	1	meta_key += current_char
137	1	pos += 1
138		# otherwise, do consonant rules
139		else:
140		# simple consonants (unmutated)
141	1	if current_char in {
142		'D',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
143		'F',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
144		'J',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
145		'K',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
146		'M',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
147		'N',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
148		'P',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
149		'T',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
150		'V',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
151		'L',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
152		'Y',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
153		}:
154	1	meta_key += current_char
155		# skip doubled consonants
156	1	if word[pos + 1 : pos + 2] == current_char:
157	1	pos += 2
158		else:
159	1	pos += 1
160		else:
161	1	if current_char == 'C':
162		# special case 'acción', 'reacción',etc.
163	1	if word[pos + 1 : pos + 2] == 'C':
164	1	meta_key += 'X'
165	1	pos += 2
166		# special case 'cesar', 'cien', 'cid', 'conciencia'
167	1	elif word[pos + 1 : pos + 2] in {'E', 'I'}:
168	1	meta_key += 'Z'
169	1	pos += 2
170		# base case
171		else:
172	1	meta_key += 'K'
173	1	pos += 1
174	1	elif current_char == 'G':
175		# special case 'gente', 'ecologia',etc
176	1	if word[pos + 1 : pos + 2] in {'E', 'I'}:
177	1	meta_key += 'J'
178	1	pos += 2
179		# base case
180		else:
181	1	meta_key += 'G'
182	1	pos += 1
183	1	elif current_char == 'H':
184		# since the letter 'H' is silent in Spanish,
185		# set the meta key to the vowel after the letter 'H'
186	1	if _is_vowel(pos + 1):
187	1	meta_key += word[pos + 1]
188	1	pos += 2
189		else:
190	1	meta_key += 'H'
191	1	pos += 1
192	1	elif current_char == 'Q':
193	1	if word[pos + 1 : pos + 2] == 'U':
194	1	pos += 2
195		else:
196	1	pos += 1
197	1	meta_key += 'K'
198	1	elif current_char == 'W':
199	1	meta_key += 'U'
200	1	pos += 1
201	1	elif current_char == 'R':
202	1	meta_key += 'R'
203	1	pos += 1
204	1	elif current_char == 'S':
205	1	if not _is_vowel(pos + 1) and pos == 0:
206	1	meta_key += 'ES'
207	1	pos += 1
208		else:
209	1	meta_key += 'S'
210	1	pos += 1
211	1	elif current_char == 'Z':
212	1	meta_key += 'Z'
213	1	pos += 1
214	1	elif current_char == 'X':
215	1	if (
216		len(word) > 1
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
217		and pos == 0
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
218		and not _is_vowel(pos + 1)
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
219		):
220	1	meta_key += 'EX'
221	1	pos += 1
222		else:
223	1	meta_key += 'X'
224	1	pos += 1
225		else:
226	1	pos += 1
227
228		# Final change from S to Z in modified version
229	1	if modified:
230	1	meta_key = meta_key.replace('S', 'Z')
231
232	1	return meta_key
233
234
235	1	def spanish_metaphone(word, max_length=6, modified=False):
236		"""Return the Spanish Metaphone of a word.
237
238		This is a wrapper for :py:meth:`SpanishMetaphone.encode`.
239
240		Parameters
241		----------
242		word : str
243		The word to transform
244		max_length : int
245		The length of the code returned (defaults to 6)
246		modified : bool
247		Set to True to use del Pilar Angeles & Bailón-Miguel's modified version
248		of the algorithm
249
250		Returns
251		-------
252		str
253		The Spanish Metaphone code
254
255		Examples
256		--------
257		>>> spanish_metaphone('Perez')
258		'PRZ'
259		>>> spanish_metaphone('Martinez')
260		'MRTNZ'
261		>>> spanish_metaphone('Gutierrez')
262		'GTRRZ'
263		>>> spanish_metaphone('Santiago')
264		'SNTG'
265		>>> spanish_metaphone('Nicolás')
266		'NKLS'
267
268		"""
269	1	return SpanishMetaphone().encode(word, max_length, modified)
270
271
272		if __name__ == '__main__':
273		import doctest
274
275		doctest.testmod()
276

chrislit / abydos

Push — master ( f43547...71985b )

abydos.phonetic._spanish_metaphone A

Complexity

Size/Duplication

Test Coverage

Importance

1 Function

1 Method

Duplication Side-by-Side

Filter issues like