abydos.phonetic._SpanishMetaphone.spanish_metaphone() - Code Metrics - Inspection of "0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#141)

by Chris

created 2018-11-10 01:31 UTC

spanish_metaphone() A

↳ Parent: abydos.phonetic._SpanishMetaphone

Complexity

Conditions

Size

Total Lines	28
Code Lines	2

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	2
CRAP Score	1

Importance

Changes

Metric	Value
cc	1
eloc	2
nop	3
dl	0
loc	28
ccs	2
cts	2
cp	1
crap	1
rs	10
c	0
b	0
f	0

# -*- coding: utf-8 -*-


# Copyright 2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._es.

The phonetic._es module implements phonetic algorithms intended for Spanish,
including:

    - Phonetic Spanish
    - Spanish Metaphone
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from unicodedata import normalize as unicode_normalize

from six import text_type

from ._Phonetic import Phonetic

__all__ = ['SpanishMetaphone', 'spanish_metaphone']


class SpanishMetaphone(Phonetic):

    """Spanish Metaphone.

    This is a quick rewrite of the Spanish Metaphone Algorithm, as presented at
    https://github.com/amsqr/Spanish-Metaphone and discussed in
    :cite:`Mosquera:2012`.

    Modified version based on :cite:`delPilarAngeles:2016`.
    """

    def encode(self, word, max_length=6, modified=False):

        """Return the Spanish Metaphone of a word.

        Args:
            word (str): The word to transform
            max_length (int): The length of the code returned (defaults to 6)
            modified (bool): Set to True to use del Pilar Angeles &
                Bailón-Miguel's modified version of the algorithm

        Returns:
            str: The Spanish Metaphone code

        Examples:
            >>> pe = SpanishMetaphone()
            >>> pe.encode('Perez')
            'PRZ'
            >>> pe.encode('Martinez')
            'MRTNZ'
            >>> pe.encode('Gutierrez')
            'GTRRZ'
            >>> pe.encode('Santiago')
            'SNTG'
            >>> pe.encode('Nicolás')
            'NKLS'

        """

        def _is_vowel(pos):
            """Return True if the character at word[pos] is a vowel.

            Args:
                pos (int): Position to check for a vowel

            Returns:
                bool: True if word[pos] is a vowel

            """
            return pos < len(word) and word[pos] in {'A', 'E', 'I', 'O', 'U'}

        word = unicode_normalize('NFC', text_type(word.upper()))

        meta_key = ''
        pos = 0

        # do some replacements for the modified version
        if modified:
            word = word.replace('MB', 'NB')
            word = word.replace('MP', 'NP')
            word = word.replace('BS', 'S')
            if word[:2] == 'PS':
                word = word[1:]

        # simple replacements
        word = word.replace('Á', 'A')
        word = word.replace('CH', 'X')
        word = word.replace('Ç', 'S')
        word = word.replace('É', 'E')
        word = word.replace('Í', 'I')
        word = word.replace('Ó', 'O')
        word = word.replace('Ú', 'U')
        word = word.replace('Ñ', 'NY')
        word = word.replace('GÜ', 'W')
        word = word.replace('Ü', 'U')
        word = word.replace('B', 'V')
        word = word.replace('LL', 'Y')

        while len(meta_key) < max_length:
            if pos >= len(word):
                break

            # get the next character
            current_char = word[pos]

            # if a vowel in pos 0, add to key
            if _is_vowel(pos) and pos == 0:
                meta_key += current_char
                pos += 1
            # otherwise, do consonant rules
            else:
                # simple consonants (unmutated)
                if current_char in {
                    'D',

                    'F',

                    'J',

                    'K',

                    'M',

                    'N',

                    'P',

                    'T',

                    'V',

                    'L',

                    'Y',

                }:
                    meta_key += current_char
                    # skip doubled consonants
                    if word[pos + 1 : pos + 2] == current_char:
                        pos += 2
                    else:
                        pos += 1
                else:
                    if current_char == 'C':
                        # special case 'acción', 'reacción',etc.
                        if word[pos + 1 : pos + 2] == 'C':
                            meta_key += 'X'
                            pos += 2
                        # special case 'cesar', 'cien', 'cid', 'conciencia'
                        elif word[pos + 1 : pos + 2] in {'E', 'I'}:
                            meta_key += 'Z'
                            pos += 2
                        # base case
                        else:
                            meta_key += 'K'
                            pos += 1
                    elif current_char == 'G':
                        # special case 'gente', 'ecologia',etc
                        if word[pos + 1 : pos + 2] in {'E', 'I'}:
                            meta_key += 'J'
                            pos += 2
                        # base case
                        else:
                            meta_key += 'G'
                            pos += 1
                    elif current_char == 'H':
                        # since the letter 'H' is silent in Spanish,
                        # set the meta key to the vowel after the letter 'H'
                        if _is_vowel(pos + 1):
                            meta_key += word[pos + 1]
                            pos += 2
                        else:
                            meta_key += 'H'
                            pos += 1
                    elif current_char == 'Q':
                        if word[pos + 1 : pos + 2] == 'U':
                            pos += 2
                        else:
                            pos += 1
                        meta_key += 'K'
                    elif current_char == 'W':
                        meta_key += 'U'
                        pos += 1
                    elif current_char == 'R':
                        meta_key += 'R'
                        pos += 1
                    elif current_char == 'S':
                        if not _is_vowel(pos + 1) and pos == 0:
                            meta_key += 'ES'
                            pos += 1
                        else:
                            meta_key += 'S'
                            pos += 1
                    elif current_char == 'Z':
                        meta_key += 'Z'
                        pos += 1
                    elif current_char == 'X':
                        if (
                            len(word) > 1

                            and pos == 0

                            and not _is_vowel(pos + 1)

                        ):
                            meta_key += 'EX'
                            pos += 1
                        else:
                            meta_key += 'X'
                            pos += 1
                    else:
                        pos += 1

        # Final change from S to Z in modified version
        if modified:
            meta_key = meta_key.replace('S', 'Z')

        return meta_key


def spanish_metaphone(word, max_length=6, modified=False):
    """Return the Spanish Metaphone of a word.

    This is a wrapper for :py:meth:`SpanishMetaphone.encode`.

    Args:
        word (str): The word to transform
        max_length (int): The length of the code returned (defaults to 6)
        modified (bool): Set to True to use del Pilar Angeles &
            Bailón-Miguel's modified version of the algorithm

    Returns:
        str: The Spanish Metaphone code

    Examples:
        >>> spanish_metaphone('Perez')
        'PRZ'
        >>> spanish_metaphone('Martinez')
        'MRTNZ'
        >>> spanish_metaphone('Gutierrez')
        'GTRRZ'
        >>> spanish_metaphone('Santiago')
        'SNTG'
        >>> spanish_metaphone('Nicolás')
        'NKLS'

    """
    return SpanishMetaphone().encode(word, max_length, modified)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
		0 ignored issues – show Coding Style Naming introduced 2018-11-10 01:42 UTC by Report Bug Copy Issue Report The name `_SpanishMetaphone` does not conform to the module naming conventions (`(([a-z_][a-z0-9_]*)\|([A-Z][a-zA-Z0-9]+))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
2
3		# Copyright 2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._es.
20
21		The phonetic._es module implements phonetic algorithms intended for Spanish,
22		including:
23
24		- Phonetic Spanish
25		- Spanish Metaphone
26		"""
27
28	1	from __future__ import (
29		absolute_import,
30		division,
31		print_function,
32		unicode_literals,
33		)
34
35	1	from unicodedata import normalize as unicode_normalize
36
37	1	from six import text_type
38
39	1	from ._Phonetic import Phonetic
40
41	1	__all__ = ['SpanishMetaphone', 'spanish_metaphone']
42
43
44	1	class SpanishMetaphone(Phonetic):
		0 ignored issues – show Unused Code introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
45		"""Spanish Metaphone.
46
47		This is a quick rewrite of the Spanish Metaphone Algorithm, as presented at
48		https://github.com/amsqr/Spanish-Metaphone and discussed in
49		:cite:`Mosquera:2012`.
50
51		Modified version based on :cite:`delPilarAngeles:2016`.
52		"""
53
54	1	def encode(self, word, max_length=6, modified=False):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'encode' method Loading history...
55		"""Return the Spanish Metaphone of a word.
56
57		Args:
58		word (str): The word to transform
59		max_length (int): The length of the code returned (defaults to 6)
60		modified (bool): Set to True to use del Pilar Angeles &
61		Bailón-Miguel's modified version of the algorithm
62
63		Returns:
64		str: The Spanish Metaphone code
65
66		Examples:
67		>>> pe = SpanishMetaphone()
68		>>> pe.encode('Perez')
69		'PRZ'
70		>>> pe.encode('Martinez')
71		'MRTNZ'
72		>>> pe.encode('Gutierrez')
73		'GTRRZ'
74		>>> pe.encode('Santiago')
75		'SNTG'
76		>>> pe.encode('Nicolás')
77		'NKLS'
78
79		"""
80
81	1	def _is_vowel(pos):
82		"""Return True if the character at word[pos] is a vowel.
83
84		Args:
85		pos (int): Position to check for a vowel
86
87		Returns:
88		bool: True if word[pos] is a vowel
89
90		"""
91	1	return pos < len(word) and word[pos] in {'A', 'E', 'I', 'O', 'U'}
92
93	1	word = unicode_normalize('NFC', text_type(word.upper()))
94
95	1	meta_key = ''
96	1	pos = 0
97
98		# do some replacements for the modified version
99	1	if modified:
100	1	word = word.replace('MB', 'NB')
101	1	word = word.replace('MP', 'NP')
102	1	word = word.replace('BS', 'S')
103	1	if word[:2] == 'PS':
104	1	word = word[1:]
105
106		# simple replacements
107	1	word = word.replace('Á', 'A')
108	1	word = word.replace('CH', 'X')
109	1	word = word.replace('Ç', 'S')
110	1	word = word.replace('É', 'E')
111	1	word = word.replace('Í', 'I')
112	1	word = word.replace('Ó', 'O')
113	1	word = word.replace('Ú', 'U')
114	1	word = word.replace('Ñ', 'NY')
115	1	word = word.replace('GÜ', 'W')
116	1	word = word.replace('Ü', 'U')
117	1	word = word.replace('B', 'V')
118	1	word = word.replace('LL', 'Y')
119
120	1	while len(meta_key) < max_length:
121	1	if pos >= len(word):
122	1	break
123
124		# get the next character
125	1	current_char = word[pos]
126
127		# if a vowel in pos 0, add to key
128	1	if _is_vowel(pos) and pos == 0:
129	1	meta_key += current_char
130	1	pos += 1
131		# otherwise, do consonant rules
132		else:
133		# simple consonants (unmutated)
134	1	if current_char in {
135		'D',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
136		'F',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
137		'J',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
138		'K',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
139		'M',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
140		'N',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
141		'P',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
142		'T',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
143		'V',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
144		'L',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
145		'Y',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
146		}:
147	1	meta_key += current_char
148		# skip doubled consonants
149	1	if word[pos + 1 : pos + 2] == current_char:
150	1	pos += 2
151		else:
152	1	pos += 1
153		else:
154	1	if current_char == 'C':
155		# special case 'acción', 'reacción',etc.
156	1	if word[pos + 1 : pos + 2] == 'C':
157	1	meta_key += 'X'
158	1	pos += 2
159		# special case 'cesar', 'cien', 'cid', 'conciencia'
160	1	elif word[pos + 1 : pos + 2] in {'E', 'I'}:
161	1	meta_key += 'Z'
162	1	pos += 2
163		# base case
164		else:
165	1	meta_key += 'K'
166	1	pos += 1
167	1	elif current_char == 'G':
168		# special case 'gente', 'ecologia',etc
169	1	if word[pos + 1 : pos + 2] in {'E', 'I'}:
170	1	meta_key += 'J'
171	1	pos += 2
172		# base case
173		else:
174	1	meta_key += 'G'
175	1	pos += 1
176	1	elif current_char == 'H':
177		# since the letter 'H' is silent in Spanish,
178		# set the meta key to the vowel after the letter 'H'
179	1	if _is_vowel(pos + 1):
180	1	meta_key += word[pos + 1]
181	1	pos += 2
182		else:
183	1	meta_key += 'H'
184	1	pos += 1
185	1	elif current_char == 'Q':
186	1	if word[pos + 1 : pos + 2] == 'U':
187	1	pos += 2
188		else:
189	1	pos += 1
190	1	meta_key += 'K'
191	1	elif current_char == 'W':
192	1	meta_key += 'U'
193	1	pos += 1
194	1	elif current_char == 'R':
195	1	meta_key += 'R'
196	1	pos += 1
197	1	elif current_char == 'S':
198	1	if not _is_vowel(pos + 1) and pos == 0:
199	1	meta_key += 'ES'
200	1	pos += 1
201		else:
202	1	meta_key += 'S'
203	1	pos += 1
204	1	elif current_char == 'Z':
205	1	meta_key += 'Z'
206	1	pos += 1
207	1	elif current_char == 'X':
208	1	if (
209		len(word) > 1
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
210		and pos == 0
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
211		and not _is_vowel(pos + 1)
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
212		):
213	1	meta_key += 'EX'
214	1	pos += 1
215		else:
216	1	meta_key += 'X'
217	1	pos += 1
218		else:
219	1	pos += 1
220
221		# Final change from S to Z in modified version
222	1	if modified:
223	1	meta_key = meta_key.replace('S', 'Z')
224
225	1	return meta_key
226
227
228	1	def spanish_metaphone(word, max_length=6, modified=False):
229		"""Return the Spanish Metaphone of a word.
230
231		This is a wrapper for :py:meth:`SpanishMetaphone.encode`.
232
233		Args:
234		word (str): The word to transform
235		max_length (int): The length of the code returned (defaults to 6)
236		modified (bool): Set to True to use del Pilar Angeles &
237		Bailón-Miguel's modified version of the algorithm
238
239		Returns:
240		str: The Spanish Metaphone code
241
242		Examples:
243		>>> spanish_metaphone('Perez')
244		'PRZ'
245		>>> spanish_metaphone('Martinez')
246		'MRTNZ'
247		>>> spanish_metaphone('Gutierrez')
248		'GTRRZ'
249		>>> spanish_metaphone('Santiago')
250		'SNTG'
251		>>> spanish_metaphone('Nicolás')
252		'NKLS'
253
254		"""
255	1	return SpanishMetaphone().encode(word, max_length, modified)
256
257
258		if __name__ == '__main__':
259		import doctest
260
261		doctest.testmod()
262

chrislit / abydos

Pull Request — master (#141)

spanish_metaphone() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like