Completed
Push — master ( f43547...71985b )
by Chris
12:00 queued 10s
created

abydos.phonetic._soundex_br.soundex_br()   A

Complexity

Conditions 1

Size

Total Lines 36
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 3
dl 0
loc 36
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.phonetic._soundex_br.
20
21
SoundexBR
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from unicodedata import normalize as unicode_normalize
32
33 1
from six import text_type
34
35 1
from ._phonetic import _Phonetic
36
37 1
__all__ = ['SoundexBR', 'soundex_br']
38
39
40 1
class SoundexBR(_Phonetic):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
41
    """SoundexBR.
42
43
    This is based on :cite:`Marcelino:2015`.
44
    """
45
46 1
    _trans = dict(
47
        zip(
48
            (ord(_) for _ in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'),
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable _ does not seem to be defined.
Loading history...
49
            '01230120022455012623010202',
50
        )
51
    )
52
53 1
    def encode(self, word, max_length=4, zero_pad=True):
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'encode' method
Loading history...
54
        """Return the SoundexBR encoding of a word.
55
56
        Parameters
57
        ----------
58
        word : str
59
            The word to transform
60
        max_length : int
61
            The length of the code returned (defaults to 4)
62
        zero_pad : bool
63
            Pad the end of the return value with 0s to achieve a max_length
64
            string
65
66
        Returns
67
        -------
68
        str
69
            The SoundexBR code
70
71
        Examples
72
        --------
73
        >>> soundex_br('Oliveira')
74
        'O416'
75
        >>> soundex_br('Almeida')
76
        'A453'
77
        >>> soundex_br('Barbosa')
78
        'B612'
79
        >>> soundex_br('Araújo')
80
        'A620'
81
        >>> soundex_br('Gonçalves')
82
        'G524'
83
        >>> soundex_br('Goncalves')
84
        'G524'
85
86
        """
87 1
        word = unicode_normalize('NFKD', text_type(word.upper()))
88 1
        word = ''.join(c for c in word if c in self._uc_set)
89
90 1
        if word[:2] == 'WA':
91 1
            first = 'V'
92 1
        elif word[:1] == 'K' and word[1:2] in {'A', 'O', 'U'}:
93 1
            first = 'C'
94 1
        elif word[:1] == 'C' and word[1:2] in {'I', 'E'}:
95 1
            first = 'S'
96 1
        elif word[:1] == 'G' and word[1:2] in {'E', 'I'}:
97 1
            first = 'J'
98 1
        elif word[:1] == 'Y':
99 1
            first = 'I'
100 1
        elif word[:1] == 'H':
101 1
            first = word[1:2]
102 1
            word = word[1:]
103
        else:
104 1
            first = word[:1]
105
106 1
        sdx = first + word[1:].translate(self._trans)
107 1
        sdx = self._delete_consecutive_repeats(sdx)
108 1
        sdx = sdx.replace('0', '')
109
110 1
        if zero_pad:
111 1
            sdx += '0' * max_length
112
113 1
        return sdx[:max_length]
114
115
116 1
def soundex_br(word, max_length=4, zero_pad=True):
117
    """Return the SoundexBR encoding of a word.
118
119
    This is a wrapper for :py:meth:`SoundexBR.encode`.
120
121
    Parameters
122
    ----------
123
    word : str
124
        The word to transform
125
    max_length : int
126
        The length of the code returned (defaults to 4)
127
    zero_pad : bool
128
        Pad the end of the return value with 0s to achieve a max_length string
129
130
    Returns
131
    -------
132
    str
133
        The SoundexBR code
134
135
    Examples
136
    --------
137
    >>> soundex_br('Oliveira')
138
    'O416'
139
    >>> soundex_br('Almeida')
140
    'A453'
141
    >>> soundex_br('Barbosa')
142
    'B612'
143
    >>> soundex_br('Araújo')
144
    'A620'
145
    >>> soundex_br('Gonçalves')
146
    'G524'
147
    >>> soundex_br('Goncalves')
148
    'G524'
149
150
    """
151 1
    return SoundexBR().encode(word, max_length, zero_pad)
152
153
154
if __name__ == '__main__':
155
    import doctest
156
157
    doctest.testmod()
158