abydos.phonetic._phonex.phonex() - Code Metrics - Inspection of "0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#141)

by Chris

created 2018-11-10 03:32 UTC

abydos.phonetic._phonex.phonex() A

↳ Parent: abydos.phonetic._phonex

Complexity

Conditions

Size

Total Lines	26
Code Lines	2

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	2
CRAP Score	1

Importance

Changes

Metric	Value
cc	1
eloc	2
nop	3
dl	0
loc	26
ccs	2
cts	2
cp	1
crap	1
rs	10
c	0
b	0
f	0

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._phonex.

Phonex
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from unicodedata import normalize as unicode_normalize

from six import text_type
from six.moves import range

from ._phonetic import Phonetic

__all__ = ['Phonex', 'phonex']


class Phonex(Phonetic):

    """Phonex code.

    Phonex is an algorithm derived from Soundex, defined in :cite:`Lait:1996`.
    """

    def encode(self, word, max_length=4, zero_pad=True):

        """Return the Phonex code for a word.

        Args:
            word (str): The word to transform
            max_length (int): The length of the code returned (defaults to 4)
            zero_pad (bool): pad the end of the return value with 0s to achieve
                a max_length string

        Returns:
            str: The Phonex value

        Examples:
            >>> pe = Phonex()
            >>> pe.encode('Christopher')
            'C623'
            >>> pe.encode('Niall')
            'N400'
            >>> pe.encode('Schmidt')
            'S253'
            >>> pe.encode('Smith')
            'S530'

        """
        name = unicode_normalize('NFKD', text_type(word.upper()))
        name = name.replace('ß', 'SS')

        # Clamp max_length to [4, 64]
        if max_length != -1:
            max_length = min(max(4, max_length), 64)
        else:
            max_length = 64

        name_code = last = ''

        # Deletions effected by replacing with next letter which
        # will be ignored due to duplicate handling of Soundex code.
        # This is faster than 'moving' all subsequent letters.

        # Remove any trailing Ss
        while name[-1:] == 'S':
            name = name[:-1]

        # Phonetic equivalents of first 2 characters
        # Works since duplicate letters are ignored
        if name[:2] == 'KN':
            name = 'N' + name[2:]  # KN.. == N..
        elif name[:2] == 'PH':
            name = 'F' + name[2:]  # PH.. == F.. (H ignored anyway)
        elif name[:2] == 'WR':
            name = 'R' + name[2:]  # WR.. == R..

        if name:
            # Special case, ignore H first letter (subsequent Hs ignored
            # anyway)
            # Works since duplicate letters are ignored
            if name[0] == 'H':
                name = name[1:]

        if name:
            # Phonetic equivalents of first character
            if name[0] in self._uc_vy_set:
                name = 'A' + name[1:]
            elif name[0] in {'B', 'P'}:
                name = 'B' + name[1:]
            elif name[0] in {'V', 'F'}:
                name = 'F' + name[1:]
            elif name[0] in {'C', 'K', 'Q'}:
                name = 'C' + name[1:]
            elif name[0] in {'G', 'J'}:
                name = 'G' + name[1:]
            elif name[0] in {'S', 'Z'}:
                name = 'S' + name[1:]

            name_code = last = name[0]

        # Modified Soundex code
        for i in range(1, len(name)):
            code = '0'
            if name[i] in {'B', 'F', 'P', 'V'}:
                code = '1'
            elif name[i] in {'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z'}:
                code = '2'
            elif name[i] in {'D', 'T'}:
                if name[i + 1 : i + 2] != 'C':
                    code = '3'
            elif name[i] == 'L':
                if name[i + 1 : i + 2] in self._uc_vy_set or i + 1 == len(
                    name

                ):
                    code = '4'
            elif name[i] in {'M', 'N'}:
                if name[i + 1 : i + 2] in {'D', 'G'}:
                    name = name[: i + 1] + name[i] + name[i + 2 :]
                code = '5'
            elif name[i] == 'R':
                if name[i + 1 : i + 2] in self._uc_vy_set or i + 1 == len(
                    name

                ):
                    code = '6'

            if code != last and code != '0' and i != 0:
                name_code += code

            last = name_code[-1]

        if zero_pad:
            name_code += '0' * max_length
        if not name_code:
            name_code = '0'
        return name_code[:max_length]


def phonex(word, max_length=4, zero_pad=True):
    """Return the Phonex code for a word.

    This is a wrapper for :py:meth:`Phonex.encode`.

    Args:
        word (str): The word to transform
        max_length (int): The length of the code returned (defaults to 4)
        zero_pad (bool): pad the end of the return value with 0s to achieve
            a max_length string

    Returns:
        str: The Phonex value

    Examples:
        >>> phonex('Christopher')
        'C623'
        >>> phonex('Niall')
        'N400'
        >>> phonex('Schmidt')
        'S253'
        >>> phonex('Smith')
        'S530'

    """
    return Phonex().encode(word, max_length, zero_pad)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._phonex.
20
21		Phonex
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from unicodedata import normalize as unicode_normalize
32
33	1	from six import text_type
34	1	from six.moves import range
35
36	1	from ._phonetic import Phonetic
37
38	1	__all__ = ['Phonex', 'phonex']
39
40
41	1	class Phonex(Phonetic):
		0 ignored issues – show Unused Code introduced 2018-11-10 01:42 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
42		"""Phonex code.
43
44		Phonex is an algorithm derived from Soundex, defined in :cite:`Lait:1996`.
45		"""
46
47	1	def encode(self, word, max_length=4, zero_pad=True):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'encode' method Loading history...
48		"""Return the Phonex code for a word.
49
50		Args:
51		word (str): The word to transform
52		max_length (int): The length of the code returned (defaults to 4)
53		zero_pad (bool): pad the end of the return value with 0s to achieve
54		a max_length string
55
56		Returns:
57		str: The Phonex value
58
59		Examples:
60		>>> pe = Phonex()
61		>>> pe.encode('Christopher')
62		'C623'
63		>>> pe.encode('Niall')
64		'N400'
65		>>> pe.encode('Schmidt')
66		'S253'
67		>>> pe.encode('Smith')
68		'S530'
69
70		"""
71	1	name = unicode_normalize('NFKD', text_type(word.upper()))
72	1	name = name.replace('ß', 'SS')
73
74		# Clamp max_length to [4, 64]
75	1	if max_length != -1:
76	1	max_length = min(max(4, max_length), 64)
77		else:
78	1	max_length = 64
79
80	1	name_code = last = ''
81
82		# Deletions effected by replacing with next letter which
83		# will be ignored due to duplicate handling of Soundex code.
84		# This is faster than 'moving' all subsequent letters.
85
86		# Remove any trailing Ss
87	1	while name[-1:] == 'S':
88	1	name = name[:-1]
89
90		# Phonetic equivalents of first 2 characters
91		# Works since duplicate letters are ignored
92	1	if name[:2] == 'KN':
93	1	name = 'N' + name[2:] # KN.. == N..
94	1	elif name[:2] == 'PH':
95	1	name = 'F' + name[2:] # PH.. == F.. (H ignored anyway)
96	1	elif name[:2] == 'WR':
97	1	name = 'R' + name[2:] # WR.. == R..
98
99	1	if name:
100		# Special case, ignore H first letter (subsequent Hs ignored
101		# anyway)
102		# Works since duplicate letters are ignored
103	1	if name[0] == 'H':
104	1	name = name[1:]
105
106	1	if name:
107		# Phonetic equivalents of first character
108	1	if name[0] in self._uc_vy_set:
109	1	name = 'A' + name[1:]
110	1	elif name[0] in {'B', 'P'}:
111	1	name = 'B' + name[1:]
112	1	elif name[0] in {'V', 'F'}:
113	1	name = 'F' + name[1:]
114	1	elif name[0] in {'C', 'K', 'Q'}:
115	1	name = 'C' + name[1:]
116	1	elif name[0] in {'G', 'J'}:
117	1	name = 'G' + name[1:]
118	1	elif name[0] in {'S', 'Z'}:
119	1	name = 'S' + name[1:]
120
121	1	name_code = last = name[0]
122
123		# Modified Soundex code
124	1	for i in range(1, len(name)):
125	1	code = '0'
126	1	if name[i] in {'B', 'F', 'P', 'V'}:
127	1	code = '1'
128	1	elif name[i] in {'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z'}:
129	1	code = '2'
130	1	elif name[i] in {'D', 'T'}:
131	1	if name[i + 1 : i + 2] != 'C':
132	1	code = '3'
133	1	elif name[i] == 'L':
134	1	if name[i + 1 : i + 2] in self._uc_vy_set or i + 1 == len(
135		name
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
136		):
137	1	code = '4'
138	1	elif name[i] in {'M', 'N'}:
139	1	if name[i + 1 : i + 2] in {'D', 'G'}:
140	1	name = name[: i + 1] + name[i] + name[i + 2 :]
141	1	code = '5'
142	1	elif name[i] == 'R':
143	1	if name[i + 1 : i + 2] in self._uc_vy_set or i + 1 == len(
144		name
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
145		):
146	1	code = '6'
147
148	1	if code != last and code != '0' and i != 0:
149	1	name_code += code
150
151	1	last = name_code[-1]
152
153	1	if zero_pad:
154	1	name_code += '0' * max_length
155	1	if not name_code:
156	1	name_code = '0'
157	1	return name_code[:max_length]
158
159
160	1	def phonex(word, max_length=4, zero_pad=True):
161		"""Return the Phonex code for a word.
162
163		This is a wrapper for :py:meth:`Phonex.encode`.
164
165		Args:
166		word (str): The word to transform
167		max_length (int): The length of the code returned (defaults to 4)
168		zero_pad (bool): pad the end of the return value with 0s to achieve
169		a max_length string
170
171		Returns:
172		str: The Phonex value
173
174		Examples:
175		>>> phonex('Christopher')
176		'C623'
177		>>> phonex('Niall')
178		'N400'
179		>>> phonex('Schmidt')
180		'S253'
181		>>> phonex('Smith')
182		'S530'
183
184		"""
185	1	return Phonex().encode(word, max_length, zero_pad)
186
187
188		if __name__ == '__main__':
189		import doctest
190
191		doctest.testmod()
192

chrislit / abydos

Pull Request — master (#141)

abydos.phonetic._phonex.phonex() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like