Completed
Branch master (78a222)
by Chris
14:36
created

abydos.phonetic._pt   A

Complexity

Total Complexity 11

Size/Duplication

Total Lines 135
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 11
eloc 66
dl 0
loc 135
ccs 30
cts 30
cp 1
rs 10
c 0
b 0
f 0

1 Function

Rating   Name   Duplication   Size   Complexity  
C soundex_br() 0 91 11
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.phonetic._pt.
20
21
The phonetic._pt module implements phonetic algorithms intended for Portuguese,
22
including:
23
24
    - SoundexBR
25
"""
26
27 1
from __future__ import unicode_literals
28
29 1
from unicodedata import normalize as unicode_normalize
30
31 1
from six import text_type
32
33 1
from ._util import _delete_consecutive_repeats
34
35 1
__all__ = ['soundex_br']
36
37
38 1
def soundex_br(word, max_length=4, zero_pad=True):
39
    """Return the SoundexBR encoding of a word.
40
41
    This is based on :cite:`Marcelino:2015`.
42
43
    :param str word: the word to transform
44
    :param int max_length: the length of the code returned (defaults to 4)
45
    :param bool zero_pad: pad the end of the return value with 0s to achieve a
46
        max_length string
47
    :returns: the SoundexBR code
48
    :rtype: str
49
50
    >>> soundex_br('Oliveira')
51
    'O416'
52
    >>> soundex_br('Almeida')
53
    'A453'
54
    >>> soundex_br('Barbosa')
55
    'B612'
56
    >>> soundex_br('Araújo')
57
    'A620'
58
    >>> soundex_br('Gonçalves')
59
    'G524'
60
    >>> soundex_br('Goncalves')
61
    'G524'
62
    """
63 1
    _soundex_br_translation = dict(
64
        zip(
65
            (ord(_) for _ in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'),
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable _ does not seem to be defined.
Loading history...
66
            '01230120022455012623010202',
67
        )
68
    )
69
70 1
    word = unicode_normalize('NFKD', text_type(word.upper()))
71 1
    word = ''.join(
72
        c
73
        for c in word
74
        if c
75
        in {
76
            'A',
77
            'B',
78
            'C',
79
            'D',
80
            'E',
81
            'F',
82
            'G',
83
            'H',
84
            'I',
85
            'J',
86
            'K',
87
            'L',
88
            'M',
89
            'N',
90
            'O',
91
            'P',
92
            'Q',
93
            'R',
94
            'S',
95
            'T',
96
            'U',
97
            'V',
98
            'W',
99
            'X',
100
            'Y',
101
            'Z',
102
        }
103
    )
104
105 1
    if word[:2] == 'WA':
106 1
        first = 'V'
107 1
    elif word[:1] == 'K' and word[1:2] in {'A', 'O', 'U'}:
108 1
        first = 'C'
109 1
    elif word[:1] == 'C' and word[1:2] in {'I', 'E'}:
110 1
        first = 'S'
111 1
    elif word[:1] == 'G' and word[1:2] in {'E', 'I'}:
112 1
        first = 'J'
113 1
    elif word[:1] == 'Y':
114 1
        first = 'I'
115 1
    elif word[:1] == 'H':
116 1
        first = word[1:2]
117 1
        word = word[1:]
118
    else:
119 1
        first = word[:1]
120
121 1
    sdx = first + word[1:].translate(_soundex_br_translation)
122 1
    sdx = _delete_consecutive_repeats(sdx)
123 1
    sdx = sdx.replace('0', '')
124
125 1
    if zero_pad:
126 1
        sdx += '0' * max_length
127
128 1
    return sdx[:max_length]
129
130
131
if __name__ == '__main__':
132
    import doctest
133
134
    doctest.testmod()
135