abydos.phonetic._phonex.Phonex.encode_alpha() - Code Metrics - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

abydos.phonetic._phonex.Phonex.encode_alpha() A
last analyzed 2020-12-31 20:10 UTC

↳ Parent: abydos.phonetic._phonex

Complexity

Conditions

Size

Total Lines	31
Code Lines	3

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	5
CRAP Score	1

Importance

Changes

Metric	Value
eloc	3
dl	0
loc	31
ccs	5
cts	5
cp	1
rs	10
c	0
b	0
f	0
cc	1
nop	2
crap	1

# Copyright 2014-2020 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._phonex.

Phonex
"""

from unicodedata import normalize as unicode_normalize

from ._phonetic import _Phonetic

__all__ = ['Phonex']


class Phonex(_Phonetic):
    """Phonex code.

    Phonex is an algorithm derived from Soundex, defined in :cite:`Lait:1996`.

    .. versionadded:: 0.3.6
    """

    _alphabetic = dict(zip((ord(_) for _ in '123456'), 'PSTLNR'))


    def __init__(self, max_length: int = 4, zero_pad: bool = True) -> None:
        """Initialize Phonex instance.

        Parameters
        ----------
        max_length : int
            The length of the code returned (defaults to 4)
        zero_pad : bool
            Pad the end of the return value with 0s to achieve a max_length
            string


        .. versionadded:: 0.4.0

        """
        # Clamp max_length to [4, 64]
        if max_length != -1:
            self._max_length = min(max(4, max_length), 64)
        else:
            self._max_length = 64
        self._zero_pad = zero_pad

    def encode_alpha(self, word: str) -> str:
        """Return the alphabetic Phonex code for a word.

        Parameters
        ----------
        word : str
            The word to transform

        Returns
        -------
        str
            The alphabetic Phonex value

        Examples
        --------
        >>> pe = Phonex()
        >>> pe.encode_alpha('Christopher')
        'CRST'
        >>> pe.encode_alpha('Niall')
        'NL'
        >>> pe.encode_alpha('Smith')
        'SNT'
        >>> pe.encode_alpha('Schmidt')
        'SSNT'


        .. versionadded:: 0.4.0

        """
        code = self.encode(word).rstrip('0')
        return code[:1] + code[1:].translate(self._alphabetic)

    def encode(self, word: str) -> str:
        """Return the Phonex code for a word.

        Parameters
        ----------
        word : str
            The word to transform

        Returns
        -------
        str
            The Phonex value

        Examples
        --------
        >>> pe = Phonex()
        >>> pe.encode('Christopher')
        'C623'
        >>> pe.encode('Niall')
        'N400'
        >>> pe.encode('Schmidt')
        'S253'
        >>> pe.encode('Smith')
        'S530'


        .. versionadded:: 0.1.0
        .. versionchanged:: 0.3.6
            Encapsulated in class

        """
        name = unicode_normalize('NFKD', word.upper())

        name_code = last = ''

        # Deletions effected by replacing with next letter which
        # will be ignored due to duplicate handling of Soundex code.
        # This is faster than 'moving' all subsequent letters.

        # Remove any trailing Ss
        while name[-1:] == 'S':
            name = name[:-1]

        # Phonetic equivalents of first 2 characters
        # Works since duplicate letters are ignored
        if name[:2] == 'KN':
            name = 'N' + name[2:]  # KN.. == N..
        elif name[:2] == 'PH':
            name = 'F' + name[2:]  # PH.. == F.. (H ignored anyway)
        elif name[:2] == 'WR':
            name = 'R' + name[2:]  # WR.. == R..

        if name:
            # Special case, ignore H first letter (subsequent Hs ignored
            # anyway)
            # Works since duplicate letters are ignored
            if name[0] == 'H':
                name = name[1:]

        if name:
            # Phonetic equivalents of first character
            if name[0] in self._uc_vy_set:
                name = 'A' + name[1:]
            elif name[0] in {'B', 'P'}:
                name = 'B' + name[1:]
            elif name[0] in {'V', 'F'}:
                name = 'F' + name[1:]
            elif name[0] in {'C', 'K', 'Q'}:
                name = 'C' + name[1:]
            elif name[0] in {'G', 'J'}:
                name = 'G' + name[1:]
            elif name[0] in {'S', 'Z'}:
                name = 'S' + name[1:]

            name_code = last = name[0]

        # Modified Soundex code
        for i in range(1, len(name)):
            code = '0'
            if name[i] in {'B', 'F', 'P', 'V'}:
                code = '1'
            elif name[i] in {'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z'}:
                code = '2'
            elif name[i] in {'D', 'T'}:
                if name[i + 1 : i + 2] != 'C':
                    code = '3'
            elif name[i] == 'L':
                if name[i + 1 : i + 2] in self._uc_vy_set or i + 1 == len(
                    name
                ):
                    code = '4'
            elif name[i] in {'M', 'N'}:
                if name[i + 1 : i + 2] in {'D', 'G'}:
                    name = name[: i + 1] + name[i] + name[i + 2 :]
                code = '5'
            elif name[i] == 'R':
                if name[i + 1 : i + 2] in self._uc_vy_set or i + 1 == len(
                    name
                ):
                    code = '6'

            if code != last and code != '0' and i != 0:
                name_code += code

            last = name_code[-1]

        if self._zero_pad:
            name_code += '0' * self._max_length
        if not name_code:
            name_code = '0'
        return name_code[: self._max_length]


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# Copyright 2014-2020 by Christopher C. Little.
2		# This file is part of Abydos.
3		#
4		# Abydos is free software: you can redistribute it and/or modify
5		# it under the terms of the GNU General Public License as published by
6		# the Free Software Foundation, either version 3 of the License, or
7		# (at your option) any later version.
8		#
9		# Abydos is distributed in the hope that it will be useful,
10		# but WITHOUT ANY WARRANTY; without even the implied warranty of
11		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12		# GNU General Public License for more details.
13		#
14		# You should have received a copy of the GNU General Public License
15		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17		"""abydos.phonetic._phonex.
18
19	1	Phonex
20		"""
21
22		from unicodedata import normalize as unicode_normalize
23
24	1	from ._phonetic import _Phonetic
25
26		__all__ = ['Phonex']
27
28
29		class Phonex(_Phonetic):
30		"""Phonex code.
31	1
32		Phonex is an algorithm derived from Soundex, defined in :cite:`Lait:1996`.
33	1
34		.. versionadded:: 0.3.6
35	1	"""
36	1
37		_alphabetic = dict(zip((ord(_) for _ in '123456'), 'PSTLNR'))
		0 ignored issues – show Comprehensibility Best Practice introduced 2019-02-15 07:24 UTC by Report Bug Copy Issue Report The variable `_` does not seem to be defined. Loading history...
38	1
39	1	def __init__(self, max_length: int = 4, zero_pad: bool = True) -> None:
40		"""Initialize Phonex instance.
41	1
42		Parameters
43		----------
44	1	max_length : int
45		The length of the code returned (defaults to 4)
46		zero_pad : bool
47		Pad the end of the return value with 0s to achieve a max_length
48		string
49
50
51		.. versionadded:: 0.4.0
52	1
53		"""
54	1	# Clamp max_length to [4, 64]
55		if max_length != -1:
56		self._max_length = min(max(4, max_length), 64)
57		else:
58		self._max_length = 64
59		self._zero_pad = zero_pad
60
61		def encode_alpha(self, word: str) -> str:
62		"""Return the alphabetic Phonex code for a word.
63
64		Parameters
65		----------
66		word : str
67		The word to transform
68
69		Returns
70	1	-------
71	1	str
72		The alphabetic Phonex value
73	1
74	1	Examples
75		--------
76	1	>>> pe = Phonex()
77		>>> pe.encode_alpha('Christopher')
78		'CRST'
79		>>> pe.encode_alpha('Niall')
80		'NL'
81		>>> pe.encode_alpha('Smith')
82		'SNT'
83		>>> pe.encode_alpha('Schmidt')
84		'SSNT'
85
86
87		.. versionadded:: 0.4.0
88
89		"""
90		code = self.encode(word).rstrip('0')
91		return code[:1] + code[1:].translate(self._alphabetic)
92
93		def encode(self, word: str) -> str:
94		"""Return the Phonex code for a word.
95
96		Parameters
97		----------
98		word : str
99		The word to transform
100
101		Returns
102		-------
103		str
104		The Phonex value
105	1
106	1	Examples
107		--------
108	1	>>> pe = Phonex()
109		>>> pe.encode('Christopher')
110		'C623'
111		>>> pe.encode('Niall')
112		'N400'
113		>>> pe.encode('Schmidt')
114		'S253'
115		>>> pe.encode('Smith')
116		'S530'
117
118
119		.. versionadded:: 0.1.0
120		.. versionchanged:: 0.3.6
121		Encapsulated in class
122
123		"""
124		name = unicode_normalize('NFKD', word.upper())
125
126		name_code = last = ''
127
128		# Deletions effected by replacing with next letter which
129		# will be ignored due to duplicate handling of Soundex code.
130		# This is faster than 'moving' all subsequent letters.
131
132		# Remove any trailing Ss
133		while name[-1:] == 'S':
134		name = name[:-1]
135
136		# Phonetic equivalents of first 2 characters
137		# Works since duplicate letters are ignored
138		if name[:2] == 'KN':
139	1	name = 'N' + name[2:] # KN.. == N..
140	1	elif name[:2] == 'PH':
141		name = 'F' + name[2:] # PH.. == F.. (H ignored anyway)
142	1	elif name[:2] == 'WR':
143		name = 'R' + name[2:] # WR.. == R..
144
145		if name:
146		# Special case, ignore H first letter (subsequent Hs ignored
147		# anyway)
148		# Works since duplicate letters are ignored
149	1	if name[0] == 'H':
150	1	name = name[1:]
151
152		if name:
153		# Phonetic equivalents of first character
154	1	if name[0] in self._uc_vy_set:
155	1	name = 'A' + name[1:]
156	1	elif name[0] in {'B', 'P'}:
157	1	name = 'B' + name[1:]
158	1	elif name[0] in {'V', 'F'}:
159	1	name = 'F' + name[1:]
160		elif name[0] in {'C', 'K', 'Q'}:
161	1	name = 'C' + name[1:]
162		elif name[0] in {'G', 'J'}:
163		name = 'G' + name[1:]
164		elif name[0] in {'S', 'Z'}:
165	1	name = 'S' + name[1:]
166	1
167		name_code = last = name[0]
168	1
169		# Modified Soundex code
170	1	for i in range(1, len(name)):
171	1	code = '0'
172	1	if name[i] in {'B', 'F', 'P', 'V'}:
173	1	code = '1'
174	1	elif name[i] in {'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z'}:
175	1	code = '2'
176	1	elif name[i] in {'D', 'T'}:
177	1	if name[i + 1 : i + 2] != 'C':
178	1	code = '3'
179	1	elif name[i] == 'L':
180	1	if name[i + 1 : i + 2] in self._uc_vy_set or i + 1 == len(
181	1	name
182		):
183	1	code = '4'
184		elif name[i] in {'M', 'N'}:
185		if name[i + 1 : i + 2] in {'D', 'G'}:
186	1	name = name[: i + 1] + name[i] + name[i + 2 :]
187	1	code = '5'
188	1	elif name[i] == 'R':
189	1	if name[i + 1 : i + 2] in self._uc_vy_set or i + 1 == len(
190	1	name
191	1	):
192	1	code = '6'
193	1
194	1	if code != last and code != '0' and i != 0:
195	1	name_code += code
196	1
197		last = name_code[-1]
198
199	1	if self._zero_pad:
200	1	name_code += '0' * self._max_length
201	1	if not name_code:
202	1	name_code = '0'
203	1	return name_code[: self._max_length]
204	1
205	1
206		if __name__ == '__main__':
207		import doctest
208	1
209		doctest.testmod()
210

chrislit / abydos

abydos.phonetic._phonex.Phonex.encode_alpha() A last analyzed 2020-12-31 20:10 UTC

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like

abydos.phonetic._phonex.Phonex.encode_alpha() A
last analyzed 2020-12-31 20:10 UTC