abydos.phonetic._phonic.PHONIC.encode_alpha() - Code Metrics - Inspection of "0.4.1" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#225)

by Chris

created 2019-07-12 00:08 UTC

abydos.phonetic._phonic.PHONIC.encode_alpha() A

↳ Parent: abydos.phonetic._phonic

Complexity

Conditions

Size

Total Lines	37
Code Lines	9

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	9
CRAP Score	1

Importance

Changes

Metric	Value
eloc	9
dl	0
loc	37
ccs	9
cts	9
cp	1
rs	9.95
c	0
b	0
f	0
cc	1
nop	2
crap	1

# -*- coding: utf-8 -*-

# Copyright 2019 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._phonic.

Phonic
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from ._phonetic import _Phonetic

__all__ = ['PHONIC']


class PHONIC(_Phonetic):
    """PHONIC code.

    PHONIC is a Soundex-like algorithm defined in :cite:`Taft:1970`.


    .. versionadded:: 0.4.1
    """

    _trans2 = {
        'CH': '6',
        'SH': '6',
        'PH': '8',
        'CE': '0',
        'CI': '0',
        'CY': '0',
    }

    _trans1 = {
        'D': '1',
        'T': '1',
        'N': '2',
        'M': '3',
        'R': '4',
        'L': '5',
        'J': '6',
        'C': '7',
        'K': '7',
        'G': '7',
        'Q': '7',
        'X': '7',
        'F': '8',
        'V': '8',
        'B': '9',
        'P': '9',
        'S': '0',
        'Z': '0',
    }

    _alphabetic = dict(zip((ord(_) for _ in '0123456789'), 'STNMRLJKFP'))


    def __init__(self, max_length=5, zero_pad=True, extended=False):
        """Initialize PHONIC instance.

        Parameters
        ----------
        max_length : int
            The length of the code returned (defaults to 5)
        zero_pad : bool
            Pad the end of the return value with 0s to achieve a max_length
            string
        extended : bool
            If True, this uses Taft's 'Extended PHONIC coding' mode, which
            simply omits the first character of the code.


        .. versionadded:: 0.4.1

        """
        # Require a max_length of at least 5 and not more than 64
        if max_length != -1:
            self._max_length = min(max(5, max_length), 64)
        else:
            self._max_length = 64

        self._zero_pad = zero_pad
        self._extended = extended

    def encode_alpha(self, word):
        """Return the alphabetic PHONIC code for a word.

        Parameters
        ----------
        word : str
            The word to transform

        Returns
        -------
        str
            The alphabetic PHONIC value

        Examples
        --------
        >>> pe = PHONIC()
        >>> pe.encode_alpha('Christopher')
        'JRSTF'
        >>> pe.encode_alpha('Niall')
        'NL'
        >>> pe.encode_alpha('Smith')
        'SMT'
        >>> pe.encode_alpha('Schmidt')
        'SJMT'


        .. versionadded:: 0.4.1

        """
        save_pad = self._zero_pad
        save_ext = self._extended
        self._zero_pad = False
        self._extended = True
        code = self.encode(word)
        self._zero_pad = save_pad
        self._extended = save_ext
        return code.translate(self._alphabetic)

    def encode(self, word):
        """Return the PHONIC code for a word.

        Parameters
        ----------
        word : str
            The word to transform

        Returns
        -------
        str
            The PHONIC code

        Examples
        --------
        >>> pe = PHONIC()
        >>> pe.encode('Christopher')
        'C6401'
        >>> pe.encode('Niall')
        'N2500'
        >>> pe.encode('Smith')
        'S0310'
        >>> pe.encode('Schmidt')
        'S0631'


        .. versionadded:: 0.4.1

        """
        # uppercase
        word = word.upper()

        code = []
        pos = 0
        while pos < len(word):
            if word[pos : pos + 2] in self._trans2:
                code.append(self._trans2[word[pos : pos + 2]])
                pos += 1
            elif word[pos] in self._trans1:
                code.append(self._trans1[word[pos]])
            else:
                code.append('.')
            pos += 1

        code = ''.join(code)
        code = self._delete_consecutive_repeats(code)
        code = code.replace('.', '')

        if self._zero_pad:
            code += '0' * (self._max_length - 1 - len(code))

        if not self._extended:
            code = word[:1] + code

        return code[: self._max_length]


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2019 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic._phonic.
20
21		Phonic
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from ._phonetic import _Phonetic
32
33	1	__all__ = ['PHONIC']
34
35
36	1	class PHONIC(_Phonetic):
37		"""PHONIC code.
38
39		PHONIC is a Soundex-like algorithm defined in :cite:`Taft:1970`.
40
41
42		.. versionadded:: 0.4.1
43		"""
44
45	1	_trans2 = {
46		'CH': '6',
47		'SH': '6',
48		'PH': '8',
49		'CE': '0',
50		'CI': '0',
51		'CY': '0',
52		}
53
54	1	_trans1 = {
55		'D': '1',
56		'T': '1',
57		'N': '2',
58		'M': '3',
59		'R': '4',
60		'L': '5',
61		'J': '6',
62		'C': '7',
63		'K': '7',
64		'G': '7',
65		'Q': '7',
66		'X': '7',
67		'F': '8',
68		'V': '8',
69		'B': '9',
70		'P': '9',
71		'S': '0',
72		'Z': '0',
73		}
74
75	1	_alphabetic = dict(zip((ord(_) for _ in '0123456789'), 'STNMRLJKFP'))
		0 ignored issues – show Comprehensibility Best Practice introduced 2019-07-12 00:17 UTC by Report Bug Copy Issue Report The variable `_` does not seem to be defined. Loading history...
76
77	1	def __init__(self, max_length=5, zero_pad=True, extended=False):
78		"""Initialize PHONIC instance.
79
80		Parameters
81		----------
82		max_length : int
83		The length of the code returned (defaults to 5)
84		zero_pad : bool
85		Pad the end of the return value with 0s to achieve a max_length
86		string
87		extended : bool
88		If True, this uses Taft's 'Extended PHONIC coding' mode, which
89		simply omits the first character of the code.
90
91
92		.. versionadded:: 0.4.1
93
94		"""
95		# Require a max_length of at least 5 and not more than 64
96	1	if max_length != -1:
97	1	self._max_length = min(max(5, max_length), 64)
98		else:
99	1	self._max_length = 64
100
101	1	self._zero_pad = zero_pad
102	1	self._extended = extended
103
104	1	def encode_alpha(self, word):
105		"""Return the alphabetic PHONIC code for a word.
106
107		Parameters
108		----------
109		word : str
110		The word to transform
111
112		Returns
113		-------
114		str
115		The alphabetic PHONIC value
116
117		Examples
118		--------
119		>>> pe = PHONIC()
120		>>> pe.encode_alpha('Christopher')
121		'JRSTF'
122		>>> pe.encode_alpha('Niall')
123		'NL'
124		>>> pe.encode_alpha('Smith')
125		'SMT'
126		>>> pe.encode_alpha('Schmidt')
127		'SJMT'
128
129
130		.. versionadded:: 0.4.1
131
132		"""
133	1	save_pad = self._zero_pad
134	1	save_ext = self._extended
135	1	self._zero_pad = False
136	1	self._extended = True
137	1	code = self.encode(word)
138	1	self._zero_pad = save_pad
139	1	self._extended = save_ext
140	1	return code.translate(self._alphabetic)
141
142	1	def encode(self, word):
143		"""Return the PHONIC code for a word.
144
145		Parameters
146		----------
147		word : str
148		The word to transform
149
150		Returns
151		-------
152		str
153		The PHONIC code
154
155		Examples
156		--------
157		>>> pe = PHONIC()
158		>>> pe.encode('Christopher')
159		'C6401'
160		>>> pe.encode('Niall')
161		'N2500'
162		>>> pe.encode('Smith')
163		'S0310'
164		>>> pe.encode('Schmidt')
165		'S0631'
166
167
168		.. versionadded:: 0.4.1
169
170		"""
171		# uppercase
172	1	word = word.upper()
173
174	1	code = []
175	1	pos = 0
176	1	while pos < len(word):
177	1	if word[pos : pos + 2] in self._trans2:
178	1	code.append(self._trans2[word[pos : pos + 2]])
179	1	pos += 1
180	1	elif word[pos] in self._trans1:
181	1	code.append(self._trans1[word[pos]])
182		else:
183	1	code.append('.')
184	1	pos += 1
185
186	1	code = ''.join(code)
187	1	code = self._delete_consecutive_repeats(code)
188	1	code = code.replace('.', '')
189
190	1	if self._zero_pad:
191	1	code += '0' * (self._max_length - 1 - len(code))
192
193	1	if not self._extended:
194	1	code = word[:1] + code
195
196	1	return code[: self._max_length]
197
198
199		if __name__ == '__main__':
200		import doctest
201
202		doctest.testmod()
203

chrislit / abydos

Pull Request — master (#225)

abydos.phonetic._phonic.PHONIC.encode_alpha() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like