abydos.phonetic._soundex_br.SoundexBR.encode() - Code Metrics - Inspection of "0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#141)

by Chris

created 2018-11-17 06:08 UTC

abydos.phonetic._soundex_br.SoundexBR.encode() C

↳ Parent: abydos.phonetic._soundex_br

Complexity

Conditions

Size

Total Lines	61
Code Lines	23

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	23
CRAP Score	11

Importance

Changes

Metric	Value
cc	11
eloc	23
nop	4
dl	0
loc	61
ccs	23
cts	23
cp	1
crap	11
rs	5.4
c	0
b	0
f	0

How to fix Long Method Complexity

# -*- coding: utf-8 -*-

# Copyright 2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._soundex_br.

SoundexBR
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from unicodedata import normalize as unicode_normalize

from six import text_type

from ._phonetic import _Phonetic

__all__ = ['SoundexBR', 'soundex_br']


class SoundexBR(_Phonetic):

    """SoundexBR.

    This is based on :cite:`Marcelino:2015`.
    """

    _trans = dict(
        zip(
            (ord(_) for _ in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'),

            '01230120022455012623010202',
        )
    )

    def encode(self, word, max_length=4, zero_pad=True):

        """Return the SoundexBR encoding of a word.

        Parameters
        ----------
        word : str
            The word to transform
        max_length : int
            The length of the code returned (defaults to 4)
        zero_pad : bool
            Pad the end of the return value with 0s to achieve a max_length
            string

        Returns
        -------
        str
            The SoundexBR code

        Examples
        --------
        >>> soundex_br('Oliveira')
        'O416'
        >>> soundex_br('Almeida')
        'A453'
        >>> soundex_br('Barbosa')
        'B612'
        >>> soundex_br('Araújo')
        'A620'
        >>> soundex_br('Gonçalves')
        'G524'
        >>> soundex_br('Goncalves')
        'G524'

        """
        word = unicode_normalize('NFKD', text_type(word.upper()))
        word = ''.join(c for c in word if c in self._uc_set)

        if word[:2] == 'WA':
            first = 'V'
        elif word[:1] == 'K' and word[1:2] in {'A', 'O', 'U'}:
            first = 'C'
        elif word[:1] == 'C' and word[1:2] in {'I', 'E'}:
            first = 'S'
        elif word[:1] == 'G' and word[1:2] in {'E', 'I'}:
            first = 'J'
        elif word[:1] == 'Y':
            first = 'I'
        elif word[:1] == 'H':
            first = word[1:2]
            word = word[1:]
        else:
            first = word[:1]

        sdx = first + word[1:].translate(self._trans)
        sdx = self._delete_consecutive_repeats(sdx)
        sdx = sdx.replace('0', '')

        if zero_pad:
            sdx += '0' * max_length

        return sdx[:max_length]


def soundex_br(word, max_length=4, zero_pad=True):
    """Return the SoundexBR encoding of a word.

    This is a wrapper for :py:meth:`SoundexBR.encode`.

    Parameters
    ----------
    word : str
        The word to transform
    max_length : int
        The length of the code returned (defaults to 4)
    zero_pad : bool
        Pad the end of the return value with 0s to achieve a max_length string

    Returns
    -------
    str
        The SoundexBR code

    Examples
    --------
    >>> soundex_br('Oliveira')
    'O416'
    >>> soundex_br('Almeida')
    'A453'
    >>> soundex_br('Barbosa')
    'B612'
    >>> soundex_br('Araújo')
    'A620'
    >>> soundex_br('Gonçalves')
    'G524'
    >>> soundex_br('Goncalves')
    'G524'

    """
    return SoundexBR().encode(word, max_length, zero_pad)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._soundex_br.
20
21		SoundexBR
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from unicodedata import normalize as unicode_normalize
32
33	1	from six import text_type
34
35	1	from ._phonetic import _Phonetic
36
37	1	__all__ = ['SoundexBR', 'soundex_br']
38
39
40	1	class SoundexBR(_Phonetic):
		0 ignored issues – show Unused Code introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
41		"""SoundexBR.
42
43		This is based on :cite:`Marcelino:2015`.
44		"""
45
46	1	_trans = dict(
47		zip(
48		(ord(_) for _ in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'),
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report The variable `_` does not seem to be defined. Loading history...
49		'01230120022455012623010202',
50		)
51		)
52
53	1	def encode(self, word, max_length=4, zero_pad=True):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'encode' method Loading history...
54		"""Return the SoundexBR encoding of a word.
55
56		Parameters
57		----------
58		word : str
59		The word to transform
60		max_length : int
61		The length of the code returned (defaults to 4)
62		zero_pad : bool
63		Pad the end of the return value with 0s to achieve a max_length
64		string
65
66		Returns
67		-------
68		str
69		The SoundexBR code
70
71		Examples
72		--------
73		>>> soundex_br('Oliveira')
74		'O416'
75		>>> soundex_br('Almeida')
76		'A453'
77		>>> soundex_br('Barbosa')
78		'B612'
79		>>> soundex_br('Araújo')
80		'A620'
81		>>> soundex_br('Gonçalves')
82		'G524'
83		>>> soundex_br('Goncalves')
84		'G524'
85
86		"""
87	1	word = unicode_normalize('NFKD', text_type(word.upper()))
88	1	word = ''.join(c for c in word if c in self._uc_set)
89
90	1	if word[:2] == 'WA':
91	1	first = 'V'
92	1	elif word[:1] == 'K' and word[1:2] in {'A', 'O', 'U'}:
93	1	first = 'C'
94	1	elif word[:1] == 'C' and word[1:2] in {'I', 'E'}:
95	1	first = 'S'
96	1	elif word[:1] == 'G' and word[1:2] in {'E', 'I'}:
97	1	first = 'J'
98	1	elif word[:1] == 'Y':
99	1	first = 'I'
100	1	elif word[:1] == 'H':
101	1	first = word[1:2]
102	1	word = word[1:]
103		else:
104	1	first = word[:1]
105
106	1	sdx = first + word[1:].translate(self._trans)
107	1	sdx = self._delete_consecutive_repeats(sdx)
108	1	sdx = sdx.replace('0', '')
109
110	1	if zero_pad:
111	1	sdx += '0' * max_length
112
113	1	return sdx[:max_length]
114
115
116	1	def soundex_br(word, max_length=4, zero_pad=True):
117		"""Return the SoundexBR encoding of a word.
118
119		This is a wrapper for :py:meth:`SoundexBR.encode`.
120
121		Parameters
122		----------
123		word : str
124		The word to transform
125		max_length : int
126		The length of the code returned (defaults to 4)
127		zero_pad : bool
128		Pad the end of the return value with 0s to achieve a max_length string
129
130		Returns
131		-------
132		str
133		The SoundexBR code
134
135		Examples
136		--------
137		>>> soundex_br('Oliveira')
138		'O416'
139		>>> soundex_br('Almeida')
140		'A453'
141		>>> soundex_br('Barbosa')
142		'B612'
143		>>> soundex_br('Araújo')
144		'A620'
145		>>> soundex_br('Gonçalves')
146		'G524'
147		>>> soundex_br('Goncalves')
148		'G524'
149
150		"""
151	1	return SoundexBR().encode(word, max_length, zero_pad)
152
153
154		if __name__ == '__main__':
155		import doctest
156
157		doctest.testmod()
158

chrislit / abydos

Pull Request — master (#141)

abydos.phonetic._soundex_br.SoundexBR.encode() C

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like