Completed
Push — master ( f43547...71985b )
by Chris
12:00 queued 10s
created

abydos.phonetic._norphone.norphone()   A

Complexity

Conditions 1

Size

Total Lines 30
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 1
dl 0
loc 30
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.phonetic._norphone.
20
21
Norphone
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31
32 1
from ._phonetic import _Phonetic
33
34 1
__all__ = ['Norphone', 'norphone']
35
36
37 1
class Norphone(_Phonetic):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
38
    """Norphone.
39
40
    The reference implementation by Lars Marius Garshol is available in
41
    :cite:`Garshol:2015`.
42
43
    Norphone was designed for Norwegian, but this implementation has been
44
    extended to support Swedish vowels as well. This function incorporates
45
    the "not implemented" rules from the above file's rule set.
46
    """
47
48 1
    _uc_v_set = {'A', 'E', 'I', 'O', 'U', 'Y', 'Å', 'Æ', 'Ø', 'Ä', 'Ö'}
49
50 1
    _replacements = {
51
        4: {'SKEI': 'X'},
52
        3: {'SKJ': 'X', 'KEI': 'X'},
53
        2: {
54
            'CH': 'K',
55
            'CK': 'K',
56
            'GJ': 'J',
57
            'GH': 'K',
58
            'HG': 'K',
59
            'HJ': 'J',
60
            'HL': 'L',
61
            'HR': 'R',
62
            'KJ': 'X',
63
            'KI': 'X',
64
            'LD': 'L',
65
            'ND': 'N',
66
            'PH': 'F',
67
            'TH': 'T',
68
            'SJ': 'X',
69
        },
70
        1: {'W': 'V', 'X': 'KS', 'Z': 'S', 'D': 'T', 'G': 'K'},
71
    }
72
73 1
    def encode(self, word):
74
        """Return the Norphone code.
75
76
        Parameters
77
        ----------
78
        word : str
79
            The word to transform
80
81
        Returns
82
        -------
83
        str
84
            The Norphone code
85
86
        Examples
87
        --------
88
        >>> pe = Norphone()
89
        >>> pe.encode('Hansen')
90
        'HNSN'
91
        >>> pe.encode('Larsen')
92
        'LRSN'
93
        >>> pe.encode('Aagaard')
94
        'ÅKRT'
95
        >>> pe.encode('Braaten')
96
        'BRTN'
97
        >>> pe.encode('Sandvik')
98
        'SNVK'
99
100
        """
101 1
        word = word.upper()
102
103 1
        code = ''
104 1
        skip = 0
105
106 1
        if word[0:2] == 'AA':
107 1
            code = 'Å'
108 1
            skip = 2
109 1
        elif word[0:2] == 'GI':
110 1
            code = 'J'
111 1
            skip = 2
112 1
        elif word[0:3] == 'SKY':
113 1
            code = 'X'
114 1
            skip = 3
115 1
        elif word[0:2] == 'EI':
116 1
            code = 'Æ'
117 1
            skip = 2
118 1
        elif word[0:2] == 'KY':
119 1
            code = 'X'
120 1
            skip = 2
121 1
        elif word[:1] == 'C':
122 1
            code = 'K'
123 1
            skip = 1
124 1
        elif word[:1] == 'Ä':
125 1
            code = 'Æ'
126 1
            skip = 1
127 1
        elif word[:1] == 'Ö':
128 1
            code = 'Ø'
129 1
            skip = 1
130
131 1
        if word[-2:] == 'DT':
132 1
            word = word[:-2] + 'T'
133
        # Though the rules indicate this rule applies in all positions, the
134
        # reference implementation indicates it applies only in final position.
135 1
        elif word[-2:-1] in self._uc_v_set and word[-1:] == 'D':
136 1
            word = word[:-2]
137
138 1
        for pos, char in enumerate(word):
139 1
            if skip:
140 1
                skip -= 1
141
            else:
142 1
                for length in sorted(self._replacements, reverse=True):
143 1
                    if word[pos : pos + length] in self._replacements[length]:
144 1
                        code += self._replacements[length][
145
                            word[pos : pos + length]
146
                        ]
147 1
                        skip = length - 1
148 1
                        break
149
                else:
150 1
                    if not pos or char not in self._uc_v_set:
151 1
                        code += char
152
153 1
        code = self._delete_consecutive_repeats(code)
154
155 1
        return code
156
157
158 1
def norphone(word):
159
    """Return the Norphone code.
160
161
    This is a wrapper for :py:meth:`Norphone.encode`.
162
163
    Parameters
164
    ----------
165
    word : str
166
        The word to transform
167
168
    Returns
169
    -------
170
    str
171
        The Norphone code
172
173
    Examples
174
    --------
175
    >>> norphone('Hansen')
176
    'HNSN'
177
    >>> norphone('Larsen')
178
    'LRSN'
179
    >>> norphone('Aagaard')
180
    'ÅKRT'
181
    >>> norphone('Braaten')
182
    'BRTN'
183
    >>> norphone('Sandvik')
184
    'SNVK'
185
186
    """
187 1
    return Norphone().encode(word)
188
189
190
if __name__ == '__main__':
191
    import doctest
192
193
    doctest.testmod()
194