abydos.phonetic._phonix.Phonix.encode() - Code Metrics - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

abydos.phonetic._phonix.Phonix.encode() F
last analyzed 2020-12-31 20:10 UTC

↳ Parent: abydos.phonetic._phonix

Complexity

Conditions

Size

Total Lines	197
Code Lines	63

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	50
CRAP Score	23

Importance

Changes

Metric	Value
eloc	63
dl	0
loc	197
ccs	50
cts	50
cp	1
rs	0
c	0
b	0
f	0
cc	23
nop	2
crap	23

How to fix Long Method Complexity

# Copyright 2014-2020 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._phonix.

Phonix
"""

from typing import Any, Optional, Set, Tuple
from unicodedata import normalize as unicode_normalize

from ._phonetic import _Phonetic

__all__ = ['Phonix']


class Phonix(_Phonetic):
    """Phonix code.

    Phonix is a Soundex-like algorithm defined in :cite:`Gadd:1990`.

    This implementation is based on:
    - :cite:`Pfeifer:2000`
    - :cite:`Christen:2011`
    - :cite:`Kollar:2007`

    .. versionadded:: 0.3.6
    """

    _trans = dict(
        zip(
            (ord(_) for _ in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'),

            '01230720022455012683070808',
        )
    )

    _alphabetic = dict(zip((ord(_) for _ in '012345678'), 'APKTLNRFS'))

    def __init__(self, max_length: int = 4, zero_pad: bool = True) -> None:
        """Initialize Phonix instance.

        Parameters
        ----------
        max_length : int
            The length of the code returned (defaults to 4)
        zero_pad : bool
            Pad the end of the return value with 0s to achieve a max_length
            string


        .. versionadded:: 0.3.6

        """
        self._uc_c_set = (
            super(Phonix, self)._uc_set - super(Phonix, self)._uc_v_set
        )

        self._substitutions = (
            (3, 'DG', 'G'),
            (3, 'CO', 'KO'),
            (3, 'CA', 'KA'),
            (3, 'CU', 'KU'),
            (3, 'CY', 'SI'),
            (3, 'CI', 'SI'),
            (3, 'CE', 'SE'),
            (0, 'CL', 'KL', super(Phonix, self)._uc_v_set),
            (3, 'CK', 'K'),
            (1, 'GC', 'K'),
            (1, 'JC', 'K'),
            (0, 'CHR', 'KR', super(Phonix, self)._uc_v_set),
            (0, 'CR', 'KR', super(Phonix, self)._uc_v_set),
            (0, 'WR', 'R'),
            (3, 'NC', 'NK'),
            (3, 'CT', 'KT'),
            (3, 'PH', 'F'),
            (3, 'AA', 'AR'),
            (3, 'SCH', 'SH'),
            (3, 'BTL', 'TL'),
            (3, 'GHT', 'T'),
            (3, 'AUGH', 'ARF'),
            (
                2,
                'LJ',
                'LD',
                super(Phonix, self)._uc_v_set,
                super(Phonix, self)._uc_v_set,
            ),
            (3, 'LOUGH', 'LOW'),
            (0, 'Q', 'KW'),
            (0, 'KN', 'N'),
            (1, 'GN', 'N'),
            (3, 'GHN', 'N'),
            (1, 'GNE', 'N'),
            (3, 'GHNE', 'NE'),
            (1, 'GNES', 'NS'),
            (0, 'GN', 'N'),
            (2, 'GN', 'N', None, self._uc_c_set),
            (1, 'GN', 'N'),
            (0, 'PS', 'S'),
            (0, 'PT', 'T'),
            (0, 'CZ', 'C'),
            (2, 'WZ', 'Z', super(Phonix, self)._uc_v_set),
            (2, 'CZ', 'CH'),
            (3, 'LZ', 'LSH'),
            (3, 'RZ', 'RSH'),
            (2, 'Z', 'S', None, super(Phonix, self)._uc_v_set),
            (3, 'ZZ', 'TS'),
            (2, 'Z', 'TS', self._uc_c_set),
            (3, 'HROUG', 'REW'),
            (3, 'OUGH', 'OF'),
            (
                2,
                'Q',
                'KW',
                super(Phonix, self)._uc_v_set,
                super(Phonix, self)._uc_v_set,
            ),
            (
                2,
                'J',
                'Y',
                super(Phonix, self)._uc_v_set,
                super(Phonix, self)._uc_v_set,
            ),
            (0, 'YJ', 'Y', super(Phonix, self)._uc_v_set),
            (0, 'GH', 'G'),
            (1, 'GH', 'E', super(Phonix, self)._uc_v_set),
            (0, 'CY', 'S'),
            (3, 'NX', 'NKS'),
            (0, 'PF', 'F'),
            (1, 'DT', 'T'),
            (1, 'TL', 'TIL'),
            (1, 'DL', 'DIL'),
            (3, 'YTH', 'ITH'),
            (0, 'TJ', 'CH', super(Phonix, self)._uc_v_set),
            (0, 'TSJ', 'CH', super(Phonix, self)._uc_v_set),
            (0, 'TS', 'T', super(Phonix, self)._uc_v_set),
            (3, 'TCH', 'CH'),
            (2, 'WSK', 'VSKIE', super(Phonix, self)._uc_v_set),
            (1, 'WSK', 'VSKIE', super(Phonix, self)._uc_v_set),
            (0, 'MN', 'N', super(Phonix, self)._uc_v_set),
            (0, 'PN', 'N', super(Phonix, self)._uc_v_set),
            (2, 'STL', 'SL', super(Phonix, self)._uc_v_set),
            (1, 'STL', 'SL', super(Phonix, self)._uc_v_set),
            (1, 'TNT', 'ENT'),
            (1, 'EAUX', 'OH'),
            (3, 'EXCI', 'ECS'),
            (3, 'X', 'ECS'),
            (1, 'NED', 'ND'),
            (3, 'JR', 'DR'),
            (1, 'EE', 'EA'),
            (3, 'ZS', 'S'),
            (2, 'R', 'AH', super(Phonix, self)._uc_v_set, self._uc_c_set),
            (1, 'R', 'AH', super(Phonix, self)._uc_v_set),
            (2, 'HR', 'AH', super(Phonix, self)._uc_v_set, self._uc_c_set),
            (1, 'HR', 'AH', super(Phonix, self)._uc_v_set),
            (1, 'HR', 'AH', super(Phonix, self)._uc_v_set),
            (1, 'RE', 'AR'),
            (1, 'R', 'AH', super(Phonix, self)._uc_v_set),
            (3, 'LLE', 'LE'),
            (1, 'LE', 'ILE', self._uc_c_set),
            (1, 'LES', 'ILES', self._uc_c_set),
            (1, 'E', ''),
            (1, 'ES', 'S'),
            (1, 'SS', 'AS', super(Phonix, self)._uc_v_set),
            (1, 'MB', 'M', super(Phonix, self)._uc_v_set),
            (3, 'MPTS', 'MPS'),
            (3, 'MPS', 'MS'),
            (3, 'MPT', 'MT'),
        )  # type: Tuple[Tuple[Any, ...], ...]

        # Clamp max_length to [4, 64]
        if max_length != -1:
            self._max_length = min(max(4, max_length), 64)
        else:
            self._max_length = 64

        self._zero_pad = zero_pad

    def encode_alpha(self, word: str) -> str:
        """Return the alphabetic Phonix code for a word.

        Parameters
        ----------
        word : str
            The word to transform

        Returns
        -------
        str
            The alphabetic Phonix value

        Examples
        --------
        >>> pe = Phonix()
        >>> pe.encode_alpha('Christopher')
        'KRST'
        >>> pe.encode_alpha('Niall')
        'NL'
        >>> pe.encode_alpha('Smith')
        'SNT'
        >>> pe.encode_alpha('Schmidt')
        'SNT'


        .. versionadded:: 0.4.0

        """
        code = self.encode(word).rstrip('0')
        return code[:1] + code[1:].translate(self._alphabetic)

    def encode(self, word: str) -> str:
        """Return the Phonix code for a word.

        Parameters
        ----------
        word : str
            The word to transform

        Returns
        -------
        str
            The Phonix value

        Examples
        --------
        >>> pe = Phonix()
        >>> pe.encode('Christopher')
        'K683'
        >>> pe.encode('Niall')
        'N400'
        >>> pe.encode('Smith')
        'S530'
        >>> pe.encode('Schmidt')
        'S530'


        .. versionadded:: 0.1.0
        .. versionchanged:: 0.3.6
            Encapsulated in class

        """

        def _start_repl(
            word: str, src: str, tar: str, post: Optional[Set[str]] = None
        ) -> str:
            """Replace src with tar at the start of word.

            Parameters
            ----------
            word : str
                The word to modify
            src : str
                Substring to match
            tar : str
                Substring to substitute
            post : set
                Following characters

            Returns
            -------
            str
                Modified string

            .. versionadded:: 0.1.0

            """
            if post:
                for i in post:
                    if word.startswith(src + i):
                        return tar + word[len(src) :]
            elif word.startswith(src):
                return tar + word[len(src) :]
            return word

        def _end_repl(
            word: str, src: str, tar: str, pre: Optional[Set[str]] = None
        ) -> str:
            """Replace src with tar at the end of word.

            Parameters
            ----------
            word : str
                The word to modify
            src : str
                Substring to match
            tar : str
                Substring to substitute
            pre : set
                Preceding characters

            Returns
            -------
            str
                Modified string

            .. versionadded:: 0.1.0

            """
            if pre:
                for i in pre:
                    if word.endswith(i + src):
                        return word[: -len(src)] + tar
            elif word.endswith(src):
                return word[: -len(src)] + tar
            return word

        def _mid_repl(
            word: str,
            src: str,
            tar: str,
            pre: Optional[Set[str]] = None,
            post: Optional[Set[str]] = None,
        ) -> str:
            """Replace src with tar in the middle of word.

            Parameters
            ----------
            word : str
                The word to modify
            src : str
                Substring to match
            tar : str
                Substring to substitute
            pre : set
                Preceding characters
            post : set
                Following characters

            Returns
            -------
            str
                Modified string

            .. versionadded:: 0.1.0

            """
            if pre or post:
                if not pre:
                    return word[0] + _all_repl(word[1:], src, tar, pre, post)
                elif not post:
                    return _all_repl(word[:-1], src, tar, pre, post) + word[-1]
                return _all_repl(word, src, tar, pre, post)
            return (
                word[0] + _all_repl(word[1:-1], src, tar, pre, post) + word[-1]
            )

        def _all_repl(
            word: str,
            src: str,
            tar: str,
            pre: Optional[Set[str]] = None,
            post: Optional[Set[str]] = None,
        ) -> str:
            """Replace src with tar anywhere in word.

            Parameters
            ----------
            word : str
                The word to modify
            src : str
                Substring to match
            tar : str
                Substring to substitute
            pre : set
                Preceding characters
            post : set
                Following characters

            Returns
            -------
            str
                Modified string

            .. versionadded:: 0.1.0

            """
            if pre or post:
                post = post if post else {''}
                pre = pre if pre else {''}

                for i, j in ((i, j) for i in pre for j in post):
                    word = word.replace(i + src + j, i + tar + j)
                return word
            else:
                return word.replace(src, tar)

        repl_at = (_start_repl, _end_repl, _mid_repl, _all_repl)

        sdx = ''

        word = unicode_normalize('NFKD', word.upper())
        word = ''.join(c for c in word if c in self._uc_set)
        if word:
            for trans in self._substitutions:
                word = repl_at[trans[0]](word, *trans[1:])
            if word[0] in self._uc_vy_set:
                sdx = 'v' + word[1:].translate(self._trans)
            else:
                sdx = word[0] + word[1:].translate(self._trans)
            sdx = self._delete_consecutive_repeats(sdx)
            sdx = sdx.replace('0', '')

        if self._zero_pad:
            sdx += '0' * self._max_length
        if not sdx:
            sdx = '0'
        return sdx[: self._max_length]


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# Copyright 2014-2020 by Christopher C. Little.
2		# This file is part of Abydos.
3		#
4		# Abydos is free software: you can redistribute it and/or modify
5		# it under the terms of the GNU General Public License as published by
6		# the Free Software Foundation, either version 3 of the License, or
7		# (at your option) any later version.
8		#
9		# Abydos is distributed in the hope that it will be useful,
10		# but WITHOUT ANY WARRANTY; without even the implied warranty of
11		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12		# GNU General Public License for more details.
13		#
14		# You should have received a copy of the GNU General Public License
15		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17		"""abydos.phonetic._phonix.
18
19	1	Phonix
20		"""
21
22		from typing import Any, Optional, Set, Tuple
23		from unicodedata import normalize as unicode_normalize
24	1
25		from ._phonetic import _Phonetic
26
27		__all__ = ['Phonix']
28
29
30		class Phonix(_Phonetic):
31	1	"""Phonix code.
32
33	1	Phonix is a Soundex-like algorithm defined in :cite:`Gadd:1990`.
34
35	1	This implementation is based on:
36		- :cite:`Pfeifer:2000`
37	1	- :cite:`Christen:2011`
38	1	- :cite:`Kollar:2007`
39
40	1	.. versionadded:: 0.3.6
41		"""
42
43	1	_trans = dict(
44		zip(
45		(ord(_) for _ in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'),
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report The variable `_` does not seem to be defined. Loading history...
46		'01230720022455012683070808',
47		)
48		)
49
50		_alphabetic = dict(zip((ord(_) for _ in '012345678'), 'APKTLNRFS'))
51
52		def __init__(self, max_length: int = 4, zero_pad: bool = True) -> None:
53		"""Initialize Phonix instance.
54
55		Parameters
56	1	----------
57		max_length : int
58	1	The length of the code returned (defaults to 4)
59		zero_pad : bool
60	1	Pad the end of the return value with 0s to achieve a max_length
61		string
62
63
64		.. versionadded:: 0.3.6
65
66		"""
67	1	self._uc_c_set = (
68		super(Phonix, self)._uc_set - super(Phonix, self)._uc_v_set
69	1	)
70
71		self._substitutions = (
72		(3, 'DG', 'G'),
73		(3, 'CO', 'KO'),
74		(3, 'CA', 'KA'),
75		(3, 'CU', 'KU'),
76		(3, 'CY', 'SI'),
77		(3, 'CI', 'SI'),
78		(3, 'CE', 'SE'),
79		(0, 'CL', 'KL', super(Phonix, self)._uc_v_set),
80		(3, 'CK', 'K'),
81		(1, 'GC', 'K'),
82		(1, 'JC', 'K'),
83		(0, 'CHR', 'KR', super(Phonix, self)._uc_v_set),
84	1	(0, 'CR', 'KR', super(Phonix, self)._uc_v_set),
85		(0, 'WR', 'R'),
86		(3, 'NC', 'NK'),
87		(3, 'CT', 'KT'),
88	1	(3, 'PH', 'F'),
89		(3, 'AA', 'AR'),
90		(3, 'SCH', 'SH'),
91		(3, 'BTL', 'TL'),
92		(3, 'GHT', 'T'),
93		(3, 'AUGH', 'ARF'),
94		(
95		2,
96		'LJ',
97		'LD',
98		super(Phonix, self)._uc_v_set,
99		super(Phonix, self)._uc_v_set,
100		),
101		(3, 'LOUGH', 'LOW'),
102		(0, 'Q', 'KW'),
103		(0, 'KN', 'N'),
104		(1, 'GN', 'N'),
105		(3, 'GHN', 'N'),
106		(1, 'GNE', 'N'),
107		(3, 'GHNE', 'NE'),
108		(1, 'GNES', 'NS'),
109		(0, 'GN', 'N'),
110		(2, 'GN', 'N', None, self._uc_c_set),
111		(1, 'GN', 'N'),
112		(0, 'PS', 'S'),
113		(0, 'PT', 'T'),
114		(0, 'CZ', 'C'),
115		(2, 'WZ', 'Z', super(Phonix, self)._uc_v_set),
116		(2, 'CZ', 'CH'),
117		(3, 'LZ', 'LSH'),
118		(3, 'RZ', 'RSH'),
119		(2, 'Z', 'S', None, super(Phonix, self)._uc_v_set),
120		(3, 'ZZ', 'TS'),
121		(2, 'Z', 'TS', self._uc_c_set),
122		(3, 'HROUG', 'REW'),
123		(3, 'OUGH', 'OF'),
124		(
125		2,
126		'Q',
127		'KW',
128		super(Phonix, self)._uc_v_set,
129		super(Phonix, self)._uc_v_set,
130		),
131		(
132		2,
133		'J',
134		'Y',
135		super(Phonix, self)._uc_v_set,
136		super(Phonix, self)._uc_v_set,
137		),
138		(0, 'YJ', 'Y', super(Phonix, self)._uc_v_set),
139		(0, 'GH', 'G'),
140		(1, 'GH', 'E', super(Phonix, self)._uc_v_set),
141		(0, 'CY', 'S'),
142		(3, 'NX', 'NKS'),
143		(0, 'PF', 'F'),
144		(1, 'DT', 'T'),
145		(1, 'TL', 'TIL'),
146		(1, 'DL', 'DIL'),
147		(3, 'YTH', 'ITH'),
148		(0, 'TJ', 'CH', super(Phonix, self)._uc_v_set),
149		(0, 'TSJ', 'CH', super(Phonix, self)._uc_v_set),
150		(0, 'TS', 'T', super(Phonix, self)._uc_v_set),
151		(3, 'TCH', 'CH'),
152		(2, 'WSK', 'VSKIE', super(Phonix, self)._uc_v_set),
153		(1, 'WSK', 'VSKIE', super(Phonix, self)._uc_v_set),
154		(0, 'MN', 'N', super(Phonix, self)._uc_v_set),
155		(0, 'PN', 'N', super(Phonix, self)._uc_v_set),
156		(2, 'STL', 'SL', super(Phonix, self)._uc_v_set),
157		(1, 'STL', 'SL', super(Phonix, self)._uc_v_set),
158		(1, 'TNT', 'ENT'),
159		(1, 'EAUX', 'OH'),
160		(3, 'EXCI', 'ECS'),
161		(3, 'X', 'ECS'),
162		(1, 'NED', 'ND'),
163		(3, 'JR', 'DR'),
164		(1, 'EE', 'EA'),
165		(3, 'ZS', 'S'),
166		(2, 'R', 'AH', super(Phonix, self)._uc_v_set, self._uc_c_set),
167		(1, 'R', 'AH', super(Phonix, self)._uc_v_set),
168		(2, 'HR', 'AH', super(Phonix, self)._uc_v_set, self._uc_c_set),
169		(1, 'HR', 'AH', super(Phonix, self)._uc_v_set),
170		(1, 'HR', 'AH', super(Phonix, self)._uc_v_set),
171		(1, 'RE', 'AR'),
172		(1, 'R', 'AH', super(Phonix, self)._uc_v_set),
173		(3, 'LLE', 'LE'),
174		(1, 'LE', 'ILE', self._uc_c_set),
175		(1, 'LES', 'ILES', self._uc_c_set),
176		(1, 'E', ''),
177		(1, 'ES', 'S'),
178		(1, 'SS', 'AS', super(Phonix, self)._uc_v_set),
179		(1, 'MB', 'M', super(Phonix, self)._uc_v_set),
180		(3, 'MPTS', 'MPS'),
181		(3, 'MPS', 'MS'),
182		(3, 'MPT', 'MT'),
183		) # type: Tuple[Tuple[Any, ...], ...]
184
185		# Clamp max_length to [4, 64]
186		if max_length != -1:
187		self._max_length = min(max(4, max_length), 64)
188		else:
189		self._max_length = 64
190
191		self._zero_pad = zero_pad
192
193		def encode_alpha(self, word: str) -> str:
194		"""Return the alphabetic Phonix code for a word.
195
196		Parameters
197		----------
198		word : str
199		The word to transform
200
201		Returns
202		-------
203	1	str
204	1	The alphabetic Phonix value
205
206	1	Examples
207		--------
208	1	>>> pe = Phonix()
209		>>> pe.encode_alpha('Christopher')
210	1	'KRST'
211		>>> pe.encode_alpha('Niall')
212		'NL'
213		>>> pe.encode_alpha('Smith')
214		'SNT'
215		>>> pe.encode_alpha('Schmidt')
216		'SNT'
217
218
219		.. versionadded:: 0.4.0
220
221		"""
222		code = self.encode(word).rstrip('0')
223		return code[:1] + code[1:].translate(self._alphabetic)
224
225		def encode(self, word: str) -> str:
226		"""Return the Phonix code for a word.
227
228		Parameters
229		----------
230		word : str
231		The word to transform
232
233		Returns
234		-------
235		str
236		The Phonix value
237
238		Examples
239	1	--------
240	1	>>> pe = Phonix()
241		>>> pe.encode('Christopher')
242	1	'K683'
243		>>> pe.encode('Niall')
244		'N400'
245		>>> pe.encode('Smith')
246		'S530'
247		>>> pe.encode('Schmidt')
248		'S530'
249
250
251		.. versionadded:: 0.1.0
252		.. versionchanged:: 0.3.6
253		Encapsulated in class
254
255		"""
256
257		def _start_repl(
258		word: str, src: str, tar: str, post: Optional[Set[str]] = None
259		) -> str:
260		"""Replace src with tar at the start of word.
261
262		Parameters
263		----------
264		word : str
265		The word to modify
266		src : str
267		Substring to match
268		tar : str
269		Substring to substitute
270		post : set
271		Following characters
272
273		Returns
274	1	-------
275		str
276		Modified string
277
278		.. versionadded:: 0.1.0
279
280		"""
281		if post:
282		for i in post:
283		if word.startswith(src + i):
284		return tar + word[len(src) :]
285		elif word.startswith(src):
286		return tar + word[len(src) :]
287		return word
288
289		def _end_repl(
290		word: str, src: str, tar: str, pre: Optional[Set[str]] = None
291		) -> str:
292		"""Replace src with tar at the end of word.
293
294		Parameters
295		----------
296	1	word : str
297	1	The word to modify
298	1	src : str
299	1	Substring to match
300	1	tar : str
301	1	Substring to substitute
302	1	pre : set
303		Preceding characters
304	1
305		Returns
306		-------
307		str
308		Modified string
309
310		.. versionadded:: 0.1.0
311
312		"""
313		if pre:
314		for i in pre:
315		if word.endswith(i + src):
316		return word[: -len(src)] + tar
317		elif word.endswith(src):
318		return word[: -len(src)] + tar
319		return word
320
321		def _mid_repl(
322		word: str,
323		src: str,
324		tar: str,
325		pre: Optional[Set[str]] = None,
326	1	post: Optional[Set[str]] = None,
327	1	) -> str:
328	1	"""Replace src with tar in the middle of word.
329	1
330	1	Parameters
331	1	----------
332	1	word : str
333		The word to modify
334	1	src : str
335		Substring to match
336		tar : str
337		Substring to substitute
338		pre : set
339		Preceding characters
340		post : set
341		Following characters
342
343		Returns
344		-------
345		str
346		Modified string
347
348		.. versionadded:: 0.1.0
349
350		"""
351		if pre or post:
352		if not pre:
353		return word[0] + _all_repl(word[1:], src, tar, pre, post)
354		elif not post:
355		return _all_repl(word[:-1], src, tar, pre, post) + word[-1]
356		return _all_repl(word, src, tar, pre, post)
357		return (
358	1	word[0] + _all_repl(word[1:-1], src, tar, pre, post) + word[-1]
359	1	)
360	1
361	1	def _all_repl(
362	1	word: str,
363	1	src: str,
364	1	tar: str,
365		pre: Optional[Set[str]] = None,
366		post: Optional[Set[str]] = None,
367		) -> str:
368	1	"""Replace src with tar anywhere in word.
369
370		Parameters
371		----------
372		word : str
373		The word to modify
374		src : str
375		Substring to match
376		tar : str
377		Substring to substitute
378		pre : set
379		Preceding characters
380		post : set
381		Following characters
382
383		Returns
384		-------
385		str
386		Modified string
387
388		.. versionadded:: 0.1.0
389
390		"""
391		if pre or post:
392	1	post = post if post else {''}
393	1	pre = pre if pre else {''}
394	1
395		for i, j in ((i, j) for i in pre for j in post):
396	1	word = word.replace(i + src + j, i + tar + j)
397	1	return word
398	1	else:
399		return word.replace(src, tar)
400	1
401		repl_at = (_start_repl, _end_repl, _mid_repl, _all_repl)
402	1
403	1	sdx = ''
404	1
405		word = unicode_normalize('NFKD', word.upper())
406	1	word = ''.join(c for c in word if c in self._uc_set)
407		if word:
408	1	for trans in self._substitutions:
409		word = repl_at[trans[0]](word, *trans[1:])
410	1	if word[0] in self._uc_vy_set:
411		sdx = 'v' + word[1:].translate(self._trans)
412	1	else:
413	1	sdx = word[0] + word[1:].translate(self._trans)
414	1	sdx = self._delete_consecutive_repeats(sdx)
415	1	sdx = sdx.replace('0', '')
416	1
417	1	if self._zero_pad:
418	1	sdx += '0' * self._max_length
419	1	if not sdx:
420		sdx = '0'
421	1	return sdx[: self._max_length]
422	1
423	1
424		if __name__ == '__main__':
425	1	import doctest
426	1
427		doctest.testmod()
428

chrislit / abydos

abydos.phonetic._phonix.Phonix.encode() F last analyzed 2020-12-31 20:10 UTC

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like

abydos.phonetic._phonix.Phonix.encode() F
last analyzed 2020-12-31 20:10 UTC