Completed
Branch master (78a222)
by Chris
14:36
created

abydos.phonetic._sound_d   A

Complexity

Total Complexity 6

Size/Duplication

Total Lines 125
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 6
eloc 62
dl 0
loc 125
ccs 26
cts 26
cp 1
rs 10
c 0
b 0
f 0

1 Function

Rating   Name   Duplication   Size   Complexity  
B sound_d() 0 84 6
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.phonetic._sound_d.
20
21
The phonetic._sound_d module implements the SoundD phonetic algorithm.
22
"""
23
24 1
from __future__ import unicode_literals
25
26 1
from unicodedata import normalize as unicode_normalize
27
28 1
from six import text_type
29
30 1
from ._util import _delete_consecutive_repeats
31
32 1
__all__ = ['sound_d']
33
34
35 1
def sound_d(word, max_length=4):
36
    """Return the SoundD code.
37
38
    SoundD is defined in :cite:`Varol:2012`.
39
40
    :param str word: the word to transform
41
    :param int max_length: the length of the code returned (defaults to 4)
42
    :returns: the SoundD code
43
    :rtype: str
44
45
    >>> sound_d('Gough')
46
    '2000'
47
    >>> sound_d('pneuma')
48
    '5500'
49
    >>> sound_d('knight')
50
    '5300'
51
    >>> sound_d('trice')
52
    '3620'
53
    >>> sound_d('judge')
54
    '2200'
55
    """
56 1
    _ref_soundd_translation = dict(
57
        zip(
58
            (ord(_) for _ in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'),
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable _ does not seem to be defined.
Loading history...
59
            '01230120022455012623010202',
60
        )
61
    )
62
63 1
    word = unicode_normalize('NFKD', text_type(word.upper()))
64 1
    word = word.replace('ß', 'SS')
65 1
    word = ''.join(
66
        c
67
        for c in word
68
        if c
69
        in {
70
            'A',
71
            'B',
72
            'C',
73
            'D',
74
            'E',
75
            'F',
76
            'G',
77
            'H',
78
            'I',
79
            'J',
80
            'K',
81
            'L',
82
            'M',
83
            'N',
84
            'O',
85
            'P',
86
            'Q',
87
            'R',
88
            'S',
89
            'T',
90
            'U',
91
            'V',
92
            'W',
93
            'X',
94
            'Y',
95
            'Z',
96
        }
97
    )
98
99 1
    if word[:2] in {'KN', 'GN', 'PN', 'AC', 'WR'}:
100 1
        word = word[1:]
101 1
    elif word[:1] == 'X':
102 1
        word = 'S' + word[1:]
103 1
    elif word[:2] == 'WH':
104 1
        word = 'W' + word[2:]
105
106 1
    word = word.replace('DGE', '20').replace('DGI', '20').replace('GH', '0')
107
108 1
    word = word.translate(_ref_soundd_translation)
109 1
    word = _delete_consecutive_repeats(word)
110 1
    word = word.replace('0', '')
111
112 1
    if max_length != -1:
113 1
        if len(word) < max_length:
114 1
            word += '0' * (max_length - len(word))
115
        else:
116 1
            word = word[:max_length]
117
118 1
    return word
119
120
121
if __name__ == '__main__':
122
    import doctest
123
124
    doctest.testmod()
125