Completed
Pull Request — master (#141)
by Chris
13:24
created

abydos.phonetic._phonem   A

Complexity

Total Complexity 3

Size/Duplication

Total Lines 154
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
eloc 49
dl 0
loc 154
ccs 18
cts 18
cp 1
rs 10
c 0
b 0
f 0
wmc 3

1 Method

Rating   Name   Duplication   Size   Complexity  
A Phonem.encode() 0 35 2

1 Function

Rating   Name   Duplication   Size   Complexity  
A phonem() 0 28 1
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.phonetic._phonem.
20
21
Phonem
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from unicodedata import normalize as unicode_normalize
32
33 1
from six import text_type
34
35 1
from ._phonetic import Phonetic
36
37 1
__all__ = ['Phonem', 'phonem']
38
39
40 1
class Phonem(Phonetic):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
41
    """Phonem.
42
43
    Phonem is defined in :cite:`Wilde:1988`.
44
45
    This version is based on the Perl implementation documented at
46
    :cite:`Wilz:2005`.
47
    It includes some enhancements presented in the Java port at
48
    :cite:`dcm4che:2011`.
49
50
    Phonem is intended chiefly for German names/words.
51
    """
52
53 1
    _substitutions = (
54
        ('SC', 'C'),
55
        ('SZ', 'C'),
56
        ('CZ', 'C'),
57
        ('TZ', 'C'),
58
        ('TS', 'C'),
59
        ('KS', 'X'),
60
        ('PF', 'V'),
61
        ('QU', 'KW'),
62
        ('PH', 'V'),
63
        ('UE', 'Y'),
64
        ('AE', 'E'),
65
        ('OE', 'Ö'),
66
        ('EI', 'AY'),
67
        ('EY', 'AY'),
68
        ('EU', 'OY'),
69
        ('AU', 'A§'),
70
        ('OU', '§'),
71
    )
72
73 1
    _trans = dict(
74
        zip(
75
            (ord(_) for _ in 'ZKGQÇÑßFWPTÁÀÂÃÅÄÆÉÈÊËIJÌÍÎÏÜݧÚÙÛÔÒÓÕØ'),
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable _ does not seem to be defined.
Loading history...
76
            'CCCCCNSVVBDAAAAAEEEEEEYYYYYYYYUUUUOOOOÖ',
77
        )
78
    )
79
80 1
    _uc_set = set('ABCDLMNORSUVWXYÖ')
81
82 1
    def encode(self, word):
83
        """Return the Phonem code for a word.
84
85
        Parameters
86
        ----------
87
        word : str
88
        The word to transform
89
90
        Returns
91
        -------
92
        str
93
            The Phonem value
94
95
        Examples
96
        --------
97
        >>> pe = Phonem()
98
        >>> pe.encode('Christopher')
99
        'CRYSDOVR'
100
        >>> pe.encode('Niall')
101
        'NYAL'
102
        >>> pe.encode('Smith')
103
        'SMYD'
104
        >>> pe.encode('Schmidt')
105
        'CMYD'
106
107
        """
108 1
        word = unicode_normalize('NFC', text_type(word.upper()))
109 1
        for i, j in self._substitutions:
110 1
            word = word.replace(i, j)
111 1
        word = word.translate(self._trans)
112
113 1
        return ''.join(
114
            c
115
            for c in self._delete_consecutive_repeats(word)
116
            if c in self._uc_set
117
        )
118
119
120 1
def phonem(word):
121
    """Return the Phonem code for a word.
122
123
    This is a wrapper for :py:meth:`Phonem.encode`.
124
125
    Parameters
126
    ----------
127
    word : str
128
        The word to transform
129
130
    Returns
131
    -------
132
    str
133
        The Phonem value
134
135
    Examples
136
    --------
137
    >>> phonem('Christopher')
138
    'CRYSDOVR'
139
    >>> phonem('Niall')
140
    'NYAL'
141
    >>> phonem('Smith')
142
    'SMYD'
143
    >>> phonem('Schmidt')
144
    'CMYD'
145
146
    """
147 1
    return Phonem().encode(word)
148
149
150
if __name__ == '__main__':
151
    import doctest
152
153
    doctest.testmod()
154