abydos.phonetic._norphone.norphone() - Code Metrics - Inspection of "Merge pull request #149 from chrislit/0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( f43547...71985b )

by Chris

created 2018-11-17 08:52 UTC

abydos.phonetic._norphone.norphone() A

↳ Parent: abydos.phonetic._norphone

Complexity

Conditions

Size

Total Lines	30
Code Lines	2

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	2
CRAP Score	1

Importance

Changes

Metric	Value
cc	1
eloc	2
nop	1
dl	0
loc	30
ccs	2
cts	2
cp	1
crap	1
rs	10
c	0
b	0
f	0

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._norphone.

Norphone
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)


from ._phonetic import _Phonetic

__all__ = ['Norphone', 'norphone']


class Norphone(_Phonetic):

    """Norphone.

    The reference implementation by Lars Marius Garshol is available in
    :cite:`Garshol:2015`.

    Norphone was designed for Norwegian, but this implementation has been
    extended to support Swedish vowels as well. This function incorporates
    the "not implemented" rules from the above file's rule set.
    """

    _uc_v_set = {'A', 'E', 'I', 'O', 'U', 'Y', 'Å', 'Æ', 'Ø', 'Ä', 'Ö'}

    _replacements = {
        4: {'SKEI': 'X'},
        3: {'SKJ': 'X', 'KEI': 'X'},
        2: {
            'CH': 'K',
            'CK': 'K',
            'GJ': 'J',
            'GH': 'K',
            'HG': 'K',
            'HJ': 'J',
            'HL': 'L',
            'HR': 'R',
            'KJ': 'X',
            'KI': 'X',
            'LD': 'L',
            'ND': 'N',
            'PH': 'F',
            'TH': 'T',
            'SJ': 'X',
        },
        1: {'W': 'V', 'X': 'KS', 'Z': 'S', 'D': 'T', 'G': 'K'},
    }

    def encode(self, word):
        """Return the Norphone code.

        Parameters
        ----------
        word : str
            The word to transform

        Returns
        -------
        str
            The Norphone code

        Examples
        --------
        >>> pe = Norphone()
        >>> pe.encode('Hansen')
        'HNSN'
        >>> pe.encode('Larsen')
        'LRSN'
        >>> pe.encode('Aagaard')
        'ÅKRT'
        >>> pe.encode('Braaten')
        'BRTN'
        >>> pe.encode('Sandvik')
        'SNVK'

        """
        word = word.upper()

        code = ''
        skip = 0

        if word[0:2] == 'AA':
            code = 'Å'
            skip = 2
        elif word[0:2] == 'GI':
            code = 'J'
            skip = 2
        elif word[0:3] == 'SKY':
            code = 'X'
            skip = 3
        elif word[0:2] == 'EI':
            code = 'Æ'
            skip = 2
        elif word[0:2] == 'KY':
            code = 'X'
            skip = 2
        elif word[:1] == 'C':
            code = 'K'
            skip = 1
        elif word[:1] == 'Ä':
            code = 'Æ'
            skip = 1
        elif word[:1] == 'Ö':
            code = 'Ø'
            skip = 1

        if word[-2:] == 'DT':
            word = word[:-2] + 'T'
        # Though the rules indicate this rule applies in all positions, the
        # reference implementation indicates it applies only in final position.
        elif word[-2:-1] in self._uc_v_set and word[-1:] == 'D':
            word = word[:-2]

        for pos, char in enumerate(word):
            if skip:
                skip -= 1
            else:
                for length in sorted(self._replacements, reverse=True):
                    if word[pos : pos + length] in self._replacements[length]:
                        code += self._replacements[length][
                            word[pos : pos + length]
                        ]
                        skip = length - 1
                        break
                else:
                    if not pos or char not in self._uc_v_set:
                        code += char

        code = self._delete_consecutive_repeats(code)

        return code


def norphone(word):
    """Return the Norphone code.

    This is a wrapper for :py:meth:`Norphone.encode`.

    Parameters
    ----------
    word : str
        The word to transform

    Returns
    -------
    str
        The Norphone code

    Examples
    --------
    >>> norphone('Hansen')
    'HNSN'
    >>> norphone('Larsen')
    'LRSN'
    >>> norphone('Aagaard')
    'ÅKRT'
    >>> norphone('Braaten')
    'BRTN'
    >>> norphone('Sandvik')
    'SNVK'

    """
    return Norphone().encode(word)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._norphone.
20
21		Norphone
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31
32	1	from ._phonetic import _Phonetic
33
34	1	__all__ = ['Norphone', 'norphone']
35
36
37	1	class Norphone(_Phonetic):
		0 ignored issues – show Unused Code introduced 2018-11-10 01:42 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
38		"""Norphone.
39
40		The reference implementation by Lars Marius Garshol is available in
41		:cite:`Garshol:2015`.
42
43		Norphone was designed for Norwegian, but this implementation has been
44		extended to support Swedish vowels as well. This function incorporates
45		the "not implemented" rules from the above file's rule set.
46		"""
47
48	1	_uc_v_set = {'A', 'E', 'I', 'O', 'U', 'Y', 'Å', 'Æ', 'Ø', 'Ä', 'Ö'}
49
50	1	_replacements = {
51		4: {'SKEI': 'X'},
52		3: {'SKJ': 'X', 'KEI': 'X'},
53		2: {
54		'CH': 'K',
55		'CK': 'K',
56		'GJ': 'J',
57		'GH': 'K',
58		'HG': 'K',
59		'HJ': 'J',
60		'HL': 'L',
61		'HR': 'R',
62		'KJ': 'X',
63		'KI': 'X',
64		'LD': 'L',
65		'ND': 'N',
66		'PH': 'F',
67		'TH': 'T',
68		'SJ': 'X',
69		},
70		1: {'W': 'V', 'X': 'KS', 'Z': 'S', 'D': 'T', 'G': 'K'},
71		}
72
73	1	def encode(self, word):
74		"""Return the Norphone code.
75
76		Parameters
77		----------
78		word : str
79		The word to transform
80
81		Returns
82		-------
83		str
84		The Norphone code
85
86		Examples
87		--------
88		>>> pe = Norphone()
89		>>> pe.encode('Hansen')
90		'HNSN'
91		>>> pe.encode('Larsen')
92		'LRSN'
93		>>> pe.encode('Aagaard')
94		'ÅKRT'
95		>>> pe.encode('Braaten')
96		'BRTN'
97		>>> pe.encode('Sandvik')
98		'SNVK'
99
100		"""
101	1	word = word.upper()
102
103	1	code = ''
104	1	skip = 0
105
106	1	if word[0:2] == 'AA':
107	1	code = 'Å'
108	1	skip = 2
109	1	elif word[0:2] == 'GI':
110	1	code = 'J'
111	1	skip = 2
112	1	elif word[0:3] == 'SKY':
113	1	code = 'X'
114	1	skip = 3
115	1	elif word[0:2] == 'EI':
116	1	code = 'Æ'
117	1	skip = 2
118	1	elif word[0:2] == 'KY':
119	1	code = 'X'
120	1	skip = 2
121	1	elif word[:1] == 'C':
122	1	code = 'K'
123	1	skip = 1
124	1	elif word[:1] == 'Ä':
125	1	code = 'Æ'
126	1	skip = 1
127	1	elif word[:1] == 'Ö':
128	1	code = 'Ø'
129	1	skip = 1
130
131	1	if word[-2:] == 'DT':
132	1	word = word[:-2] + 'T'
133		# Though the rules indicate this rule applies in all positions, the
134		# reference implementation indicates it applies only in final position.
135	1	elif word[-2:-1] in self._uc_v_set and word[-1:] == 'D':
136	1	word = word[:-2]
137
138	1	for pos, char in enumerate(word):
139	1	if skip:
140	1	skip -= 1
141		else:
142	1	for length in sorted(self._replacements, reverse=True):
143	1	if word[pos : pos + length] in self._replacements[length]:
144	1	code += self._replacements[length][
145		word[pos : pos + length]
146		]
147	1	skip = length - 1
148	1	break
149		else:
150	1	if not pos or char not in self._uc_v_set:
151	1	code += char
152
153	1	code = self._delete_consecutive_repeats(code)
154
155	1	return code
156
157
158	1	def norphone(word):
159		"""Return the Norphone code.
160
161		This is a wrapper for :py:meth:`Norphone.encode`.
162
163		Parameters
164		----------
165		word : str
166		The word to transform
167
168		Returns
169		-------
170		str
171		The Norphone code
172
173		Examples
174		--------
175		>>> norphone('Hansen')
176		'HNSN'
177		>>> norphone('Larsen')
178		'LRSN'
179		>>> norphone('Aagaard')
180		'ÅKRT'
181		>>> norphone('Braaten')
182		'BRTN'
183		>>> norphone('Sandvik')
184		'SNVK'
185
186		"""
187	1	return Norphone().encode(word)
188
189
190		if __name__ == '__main__':
191		import doctest
192
193		doctest.testmod()
194

chrislit / abydos

Push — master ( f43547...71985b )

abydos.phonetic._norphone.norphone() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like