abydos.phonetic._sfinx_bis.SfinxBis.encode() - Code Metrics - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

abydos.phonetic._sfinx_bis.SfinxBis.encode() F
last analyzed 2020-12-31 20:10 UTC

↳ Parent: abydos.phonetic._sfinx_bis

Complexity

Conditions

Size

Total Lines	211
Code Lines	96

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	76
CRAP Score	32

Importance

Changes

Metric	Value
eloc	96
dl	0
loc	211
ccs	76
cts	76
cp	1
rs	0
c	0
b	0
f	0
cc	32
nop	2
crap	32

How to fix Long Method Complexity

# Copyright 2014-2020 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._sfinx_bis.

SfinxBis
"""

from unicodedata import normalize as unicode_normalize

from ._phonetic import _Phonetic

__all__ = ['SfinxBis']


class SfinxBis(_Phonetic):
    """SfinxBis code.

    SfinxBis is a Soundex-like algorithm defined in :cite:`Axelsson:2009`.

    This implementation follows the reference implementation:
    :cite:`Sjoo:2009`.

    SfinxBis is intended chiefly for Swedish names.

    .. versionadded:: 0.3.6
    """

    _adelstitler = (
        ' DE LA ',
        ' DE LAS ',
        ' DE LOS ',
        ' VAN DE ',
        ' VAN DEN ',
        ' VAN DER ',
        ' VON DEM ',
        ' VON DER ',
        ' AF ',
        ' AV ',
        ' DA ',
        ' DE ',
        ' DEL ',
        ' DEN ',
        ' DES ',
        ' DI ',
        ' DO ',
        ' DON ',
        ' DOS ',
        ' DU ',
        ' E ',
        ' IN ',
        ' LA ',
        ' LE ',
        ' MAC ',
        ' MC ',
        ' VAN ',
        ' VON ',
        ' Y ',
        ' S:T ',
    )

    _harde_vokaler = {'A', 'O', 'U', 'Å'}
    _mjuka_vokaler = {'E', 'I', 'Y', 'Ä', 'Ö'}
    _uc_c_set = {
        'B',
        'C',
        'D',
        'F',
        'G',
        'H',
        'J',
        'K',
        'L',
        'M',
        'N',
        'P',
        'Q',
        'R',
        'S',
        'T',
        'V',
        'W',
        'X',
        'Z',
    }
    _uc_set = {
        'A',
        'B',
        'C',
        'D',
        'E',
        'F',
        'G',
        'H',
        'I',
        'J',
        'K',
        'L',
        'M',
        'N',
        'O',
        'P',
        'Q',
        'R',
        'S',
        'T',
        'U',
        'V',
        'W',
        'X',
        'Y',
        'Z',
        'Ä',
        'Å',
        'Ö',
    }

    _trans = dict(
        zip(
            (ord(_) for _ in 'BCDFGHJKLMNPQRSTVZAOUÅEIYÄÖ'),

            '123729224551268378999999999',
        )
    )

    _substitutions = dict(
        zip(
            (ord(_) for _ in 'WZÀÁÂÃÆÇÈÉÊËÌÍÎÏÑÒÓÔÕØÙÚÛÜÝ'),
            'VSAAAAÄCEEEEIIIINOOOOÖUUUYY',
        )
    )

    _alphabetic = dict(zip((ord(_) for _ in '123456789#'), 'PKTLNRFSAŠ'))

    def __init__(self, max_length: int = -1) -> None:
        """Initialize SfinxBis instance.

        Parameters
        ----------
        max_length : int
            The length of the code returned (defaults to unlimited)


        .. versionadded:: 0.4.0

        """
        self._max_length = max_length

    def encode_alpha(self, word: str) -> str:
        """Return the alphabetic SfinxBis code for a word.

        Parameters
        ----------
        word : str
            The word to transform

        Returns
        -------
        str
            The alphabetic SfinxBis value

        Examples
        --------
        >>> pe = SfinxBis()
        >>> pe.encode_alpha('Christopher')
        'KRSTFR'
        >>> pe.encode_alpha('Niall')
        'NL'
        >>> pe.encode_alpha('Smith')
        'SNT'
        >>> pe.encode_alpha('Schmidt')
        'SNT'

        >>> pe.encode_alpha('Johansson')
        'JNSN'
        >>> pe.encode_alpha('Sjöberg')
        'ŠPRK'


        .. versionadded:: 0.4.0
        .. versionchanged:: 0.6.0
            Made return a str only (comma-separated)

        """
        return self.encode(word).translate(self._alphabetic)

    def encode(self, word: str) -> str:
        """Return the SfinxBis code for a word.

        Parameters
        ----------
        word : str
            The word to transform

        Returns
        -------
        str
            The SfinxBis value

        Examples
        --------
        >>> pe = SfinxBis()
        >>> pe.encode('Christopher')
        'K68376'
        >>> pe.encode('Niall')
        'N4'
        >>> pe.encode('Smith')
        'S53'
        >>> pe.encode('Schmidt')
        'S53'

        >>> pe.encode('Johansson')
        'J585'
        >>> pe.encode('Sjöberg')
        '#162'


        .. versionadded:: 0.1.0
        .. versionchanged:: 0.3.6
            Encapsulated in class
        .. versionchanged:: 0.6.0
            Made return a str only (comma-separated)

        """

        def _foersvensker(lokal_ordet: str) -> str:
            """Return the Swedish-ized form of the word.

            Parameters
            ----------
            lokal_ordet : str
                Word to transform

            Returns
            -------
            str
                Transformed word

            .. versionadded:: 0.1.0

            """
            lokal_ordet = lokal_ordet.replace('STIERN', 'STJÄRN')
            lokal_ordet = lokal_ordet.replace('HIE', 'HJ')
            lokal_ordet = lokal_ordet.replace('SIÖ', 'SJÖ')
            lokal_ordet = lokal_ordet.replace('SCH', 'SH')
            lokal_ordet = lokal_ordet.replace('QU', 'KV')
            lokal_ordet = lokal_ordet.replace('IO', 'JO')
            lokal_ordet = lokal_ordet.replace('PH', 'F')

            for i in self._harde_vokaler:
                lokal_ordet = lokal_ordet.replace(i + 'Ü', i + 'J')
                lokal_ordet = lokal_ordet.replace(i + 'Y', i + 'J')
                lokal_ordet = lokal_ordet.replace(i + 'I', i + 'J')
            for i in self._mjuka_vokaler:
                lokal_ordet = lokal_ordet.replace(i + 'Ü', i + 'J')
                lokal_ordet = lokal_ordet.replace(i + 'Y', i + 'J')
                lokal_ordet = lokal_ordet.replace(i + 'I', i + 'J')

            if 'H' in lokal_ordet:
                for i in self._uc_c_set:
                    lokal_ordet = lokal_ordet.replace('H' + i, i)

            lokal_ordet = lokal_ordet.translate(self._substitutions)

            lokal_ordet = lokal_ordet.replace('Ð', 'ETH')
            lokal_ordet = lokal_ordet.replace('Þ', 'TH')

            return lokal_ordet

        def _koda_foersta_ljudet(lokal_ordet: str) -> str:
            """Return the word with the first sound coded.

            Parameters
            ----------
            lokal_ordet : str
                Word to transform

            Returns
            -------
            str
                Transformed word

            .. versionadded:: 0.1.0

            """
            if (
                lokal_ordet[0:1] in self._mjuka_vokaler
                or lokal_ordet[0:1] in self._harde_vokaler
            ):
                lokal_ordet = '$' + lokal_ordet[1:]
            elif lokal_ordet[0:2] in ('DJ', 'GJ', 'HJ', 'LJ'):
                lokal_ordet = 'J' + lokal_ordet[2:]
            elif (
                lokal_ordet[0:1] == 'G'
                and lokal_ordet[1:2] in self._mjuka_vokaler
            ):
                lokal_ordet = 'J' + lokal_ordet[1:]
            elif lokal_ordet[0:1] == 'Q':
                lokal_ordet = 'K' + lokal_ordet[1:]
            elif lokal_ordet[0:2] == 'CH' and lokal_ordet[2:3] in frozenset(
                self._mjuka_vokaler | self._harde_vokaler
            ):
                lokal_ordet = '#' + lokal_ordet[2:]
            elif (
                lokal_ordet[0:1] == 'C'
                and lokal_ordet[1:2] in self._harde_vokaler
            ):
                lokal_ordet = 'K' + lokal_ordet[1:]
            elif (
                lokal_ordet[0:1] == 'C' and lokal_ordet[1:2] in self._uc_c_set
            ):
                lokal_ordet = 'K' + lokal_ordet[1:]
            elif lokal_ordet[0:1] == 'X':
                lokal_ordet = 'S' + lokal_ordet[1:]
            elif (
                lokal_ordet[0:1] == 'C'
                and lokal_ordet[1:2] in self._mjuka_vokaler
            ):
                lokal_ordet = 'S' + lokal_ordet[1:]
            elif lokal_ordet[0:3] in ('SKJ', 'STJ', 'SCH'):
                lokal_ordet = '#' + lokal_ordet[3:]
            elif lokal_ordet[0:2] in ('SH', 'KJ', 'TJ', 'SJ'):
                lokal_ordet = '#' + lokal_ordet[2:]
            elif (
                lokal_ordet[0:2] == 'SK'
                and lokal_ordet[2:3] in self._mjuka_vokaler
            ):
                lokal_ordet = '#' + lokal_ordet[2:]
            elif (
                lokal_ordet[0:1] == 'K'
                and lokal_ordet[1:2] in self._mjuka_vokaler
            ):
                lokal_ordet = '#' + lokal_ordet[1:]
            return lokal_ordet

        # Steg 1, Versaler
        word = unicode_normalize('NFC', word.upper())
        word = word.replace('-', ' ')

        # Steg 2, Ta bort adelsprefix
        for adelstitel in self._adelstitler:
            while adelstitel in word:
                word = word.replace(adelstitel, ' ')
            if word.startswith(adelstitel[1:]):
                word = word[len(adelstitel) - 1 :]

        # Split word into tokens
        ordlista = word.split()

        # Steg 3, Ta bort dubbelteckning i början på namnet
        ordlista = [
            self._delete_consecutive_repeats(ordet) for ordet in ordlista
        ]
        if not ordlista:
            # noinspection PyRedundantParentheses
            return ''

        # Steg 4, Försvenskning
        ordlista = [_foersvensker(ordet) for ordet in ordlista]

        # Steg 5, Ta bort alla tecken som inte är A-Ö (65-90,196,197,214)
        ordlista = [
            ''.join(c for c in ordet if c in self._uc_set)
            for ordet in ordlista
        ]

        # Steg 6, Koda första ljudet
        ordlista = [_koda_foersta_ljudet(ordet) for ordet in ordlista]

        # Steg 7, Dela upp namnet i två delar
        rest = [ordet[1:] for ordet in ordlista]

        # Steg 8, Utför fonetisk transformation i resten
        rest = [ordet.replace('DT', 'T') for ordet in rest]
        rest = [ordet.replace('X', 'KS') for ordet in rest]

        # Steg 9, Koda resten till en sifferkod
        for vokal in self._mjuka_vokaler:
            rest = [ordet.replace('C' + vokal, '8' + vokal) for ordet in rest]
        rest = [ordet.translate(self._trans) for ordet in rest]

        # Steg 10, Ta bort intilliggande dubbletter
        rest = [self._delete_consecutive_repeats(ordet) for ordet in rest]

        # Steg 11, Ta bort alla "9"
        rest = [ordet.replace('9', '') for ordet in rest]

        # Steg 12, Sätt ihop delarna igen
        ordlista = [
            ''.join(ordet) for ordet in zip((_[0:1] for _ in ordlista), rest)

        ]

        # truncate, if max_length is set
        if self._max_length > 0:
            ordlista = [ordet[: self._max_length] for ordet in ordlista]

        return ','.join(ordlista)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# Copyright 2014-2020 by Christopher C. Little.
2		# This file is part of Abydos.
3		#
4		# Abydos is free software: you can redistribute it and/or modify
5		# it under the terms of the GNU General Public License as published by
6		# the Free Software Foundation, either version 3 of the License, or
7		# (at your option) any later version.
8		#
9		# Abydos is distributed in the hope that it will be useful,
10		# but WITHOUT ANY WARRANTY; without even the implied warranty of
11		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12		# GNU General Public License for more details.
13		#
14		# You should have received a copy of the GNU General Public License
15		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17		"""abydos.phonetic._sfinx_bis.
18
19	1	SfinxBis
20		"""
21
22		from unicodedata import normalize as unicode_normalize
23
24	1	from ._phonetic import _Phonetic
25
26		__all__ = ['SfinxBis']
27
28
29		class SfinxBis(_Phonetic):
30		"""SfinxBis code.
31	1
32		SfinxBis is a Soundex-like algorithm defined in :cite:`Axelsson:2009`.
33	1
34		This implementation follows the reference implementation:
35	1	:cite:`Sjoo:2009`.
36
37	1	SfinxBis is intended chiefly for Swedish names.
38	1
39		.. versionadded:: 0.3.6
40	1	"""
41
42		_adelstitler = (
43	1	' DE LA ',
44		' DE LAS ',
45		' DE LOS ',
46		' VAN DE ',
47		' VAN DEN ',
48		' VAN DER ',
49		' VON DEM ',
50		' VON DER ',
51		' AF ',
52		' AV ',
53		' DA ',
54		' DE ',
55		' DEL ',
56	1	' DEN ',
57		' DES ',
58		' DI ',
59		' DO ',
60		' DON ',
61		' DOS ',
62		' DU ',
63		' E ',
64		' IN ',
65		' LA ',
66		' LE ',
67		' MAC ',
68		' MC ',
69		' VAN ',
70		' VON ',
71		' Y ',
72		' S:T ',
73		)
74
75		_harde_vokaler = {'A', 'O', 'U', 'Å'}
76		_mjuka_vokaler = {'E', 'I', 'Y', 'Ä', 'Ö'}
77		_uc_c_set = {
78		'B',
79		'C',
80		'D',
81		'F',
82		'G',
83		'H',
84		'J',
85		'K',
86		'L',
87		'M',
88		'N',
89	1	'P',
90	1	'Q',
91	1	'R',
92		'S',
93		'T',
94		'V',
95		'W',
96		'X',
97		'Z',
98		}
99		_uc_set = {
100		'A',
101		'B',
102		'C',
103		'D',
104		'E',
105		'F',
106		'G',
107		'H',
108		'I',
109		'J',
110		'K',
111		'L',
112		'M',
113	1	'N',
114		'O',
115		'P',
116		'Q',
117		'R',
118		'S',
119		'T',
120		'U',
121		'V',
122		'W',
123		'X',
124		'Y',
125		'Z',
126		'Ä',
127		'Å',
128		'Ö',
129		}
130
131		_trans = dict(
132		zip(
133		(ord(_) for _ in 'BCDFGHJKLMNPQRSTVZAOUÅEIYÄÖ'),
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report The variable `_` does not seem to be defined. Loading history...
134		'123729224551268378999999999',
135		)
136		)
137
138		_substitutions = dict(
139		zip(
140		(ord(_) for _ in 'WZÀÁÂÃÆÇÈÉÊËÌÍÎÏÑÒÓÔÕØÙÚÛÜÝ'),
141		'VSAAAAÄCEEEEIIIINOOOOÖUUUYY',
142		)
143		)
144
145	1	_alphabetic = dict(zip((ord(_) for _ in '123456789#'), 'PKTLNRFSAŠ'))
146
147		def __init__(self, max_length: int = -1) -> None:
148		"""Initialize SfinxBis instance.
149
150		Parameters
151		----------
152	1	max_length : int
153		The length of the code returned (defaults to unlimited)
154
155
156		.. versionadded:: 0.4.0
157
158		"""
159	1	self._max_length = max_length
160
161	1	def encode_alpha(self, word: str) -> str:
162		"""Return the alphabetic SfinxBis code for a word.
163
164		Parameters
165		----------
166		word : str
167		The word to transform
168
169		Returns
170		-------
171		str
172		The alphabetic SfinxBis value
173	1
174		Examples
175	1	--------
176		>>> pe = SfinxBis()
177		>>> pe.encode_alpha('Christopher')
178		'KRSTFR'
179		>>> pe.encode_alpha('Niall')
180		'NL'
181		>>> pe.encode_alpha('Smith')
182		'SNT'
183		>>> pe.encode_alpha('Schmidt')
184		'SNT'
185
186		>>> pe.encode_alpha('Johansson')
187		'JNSN'
188		>>> pe.encode_alpha('Sjöberg')
189		'ŠPRK'
190
191
192		.. versionadded:: 0.4.0
193		.. versionchanged:: 0.6.0
194		Made return a str only (comma-separated)
195
196		"""
197		return self.encode(word).translate(self._alphabetic)
198
199		def encode(self, word: str) -> str:
200		"""Return the SfinxBis code for a word.
201
202		Parameters
203		----------
204		word : str
205		The word to transform
206
207		Returns
208		-------
209	1	str
210		The SfinxBis value
211
212		Examples
213	1	--------
214		>>> pe = SfinxBis()
215		>>> pe.encode('Christopher')
216		'K68376'
217		>>> pe.encode('Niall')
218		'N4'
219		>>> pe.encode('Smith')
220		'S53'
221		>>> pe.encode('Schmidt')
222		'S53'
223
224		>>> pe.encode('Johansson')
225		'J585'
226		>>> pe.encode('Sjöberg')
227		'#162'
228
229
230		.. versionadded:: 0.1.0
231		.. versionchanged:: 0.3.6
232		Encapsulated in class
233		.. versionchanged:: 0.6.0
234		Made return a str only (comma-separated)
235
236		"""
237
238		def _foersvensker(lokal_ordet: str) -> str:
239		"""Return the Swedish-ized form of the word.
240
241		Parameters
242		----------
243		lokal_ordet : str
244		Word to transform
245
246		Returns
247		-------
248		str
249		Transformed word
250	1
251		.. versionadded:: 0.1.0
252
253		"""
254		lokal_ordet = lokal_ordet.replace('STIERN', 'STJÄRN')
255		lokal_ordet = lokal_ordet.replace('HIE', 'HJ')
256		lokal_ordet = lokal_ordet.replace('SIÖ', 'SJÖ')
257		lokal_ordet = lokal_ordet.replace('SCH', 'SH')
258		lokal_ordet = lokal_ordet.replace('QU', 'KV')
259		lokal_ordet = lokal_ordet.replace('IO', 'JO')
260		lokal_ordet = lokal_ordet.replace('PH', 'F')
261
262		for i in self._harde_vokaler:
263		lokal_ordet = lokal_ordet.replace(i + 'Ü', i + 'J')
264		lokal_ordet = lokal_ordet.replace(i + 'Y', i + 'J')
265		lokal_ordet = lokal_ordet.replace(i + 'I', i + 'J')
266	1	for i in self._mjuka_vokaler:
267	1	lokal_ordet = lokal_ordet.replace(i + 'Ü', i + 'J')
268	1	lokal_ordet = lokal_ordet.replace(i + 'Y', i + 'J')
269	1	lokal_ordet = lokal_ordet.replace(i + 'I', i + 'J')
270	1
271	1	if 'H' in lokal_ordet:
272	1	for i in self._uc_c_set:
273		lokal_ordet = lokal_ordet.replace('H' + i, i)
274	1
275	1	lokal_ordet = lokal_ordet.translate(self._substitutions)
276	1
277	1	lokal_ordet = lokal_ordet.replace('Ð', 'ETH')
278	1	lokal_ordet = lokal_ordet.replace('Þ', 'TH')
279	1
280	1	return lokal_ordet
281	1
282		def _koda_foersta_ljudet(lokal_ordet: str) -> str:
283	1	"""Return the word with the first sound coded.
284	1
285	1	Parameters
286		----------
287	1	lokal_ordet : str
288		Word to transform
289	1
290	1	Returns
291	1	-------
292		str
293	1	Transformed word
294
295	1	.. versionadded:: 0.1.0
296
297		"""
298		if (
299		lokal_ordet[0:1] in self._mjuka_vokaler
300		or lokal_ordet[0:1] in self._harde_vokaler
301		):
302		lokal_ordet = '$' + lokal_ordet[1:]
303		elif lokal_ordet[0:2] in ('DJ', 'GJ', 'HJ', 'LJ'):
304		lokal_ordet = 'J' + lokal_ordet[2:]
305		elif (
306		lokal_ordet[0:1] == 'G'
307		and lokal_ordet[1:2] in self._mjuka_vokaler
308		):
309		lokal_ordet = 'J' + lokal_ordet[1:]
310		elif lokal_ordet[0:1] == 'Q':
311	1	lokal_ordet = 'K' + lokal_ordet[1:]
312		elif lokal_ordet[0:2] == 'CH' and lokal_ordet[2:3] in frozenset(
313		self._mjuka_vokaler \| self._harde_vokaler
314		):
315	1	lokal_ordet = '#' + lokal_ordet[2:]
316	1	elif (
317	1	lokal_ordet[0:1] == 'C'
318	1	and lokal_ordet[1:2] in self._harde_vokaler
319		):
320		lokal_ordet = 'K' + lokal_ordet[1:]
321		elif (
322	1	lokal_ordet[0:1] == 'C' and lokal_ordet[1:2] in self._uc_c_set
323	1	):
324	1	lokal_ordet = 'K' + lokal_ordet[1:]
325	1	elif lokal_ordet[0:1] == 'X':
326		lokal_ordet = 'S' + lokal_ordet[1:]
327		elif (
328	1	lokal_ordet[0:1] == 'C'
329	1	and lokal_ordet[1:2] in self._mjuka_vokaler
330		):
331		lokal_ordet = 'S' + lokal_ordet[1:]
332		elif lokal_ordet[0:3] in ('SKJ', 'STJ', 'SCH'):
333	1	lokal_ordet = '#' + lokal_ordet[3:]
334	1	elif lokal_ordet[0:2] in ('SH', 'KJ', 'TJ', 'SJ'):
335		lokal_ordet = '#' + lokal_ordet[2:]
336		elif (
337	1	lokal_ordet[0:2] == 'SK'
338	1	and lokal_ordet[2:3] in self._mjuka_vokaler
339	1	):
340	1	lokal_ordet = '#' + lokal_ordet[2:]
341		elif (
342		lokal_ordet[0:1] == 'K'
343		and lokal_ordet[1:2] in self._mjuka_vokaler
344	1	):
345	1	lokal_ordet = '#' + lokal_ordet[1:]
346	1	return lokal_ordet
347	1
348	1	# Steg 1, Versaler
349	1	word = unicode_normalize('NFC', word.upper())
350		word = word.replace('-', ' ')
351
352		# Steg 2, Ta bort adelsprefix
353	1	for adelstitel in self._adelstitler:
354	1	while adelstitel in word:
355		word = word.replace(adelstitel, ' ')
356		if word.startswith(adelstitel[1:]):
357		word = word[len(adelstitel) - 1 :]
358	1
359	1	# Split word into tokens
360		ordlista = word.split()
361
362	1	# Steg 3, Ta bort dubbelteckning i början på namnet
363	1	ordlista = [
364	1	self._delete_consecutive_repeats(ordet) for ordet in ordlista
365		]
366		if not ordlista:
367	1	# noinspection PyRedundantParentheses
368	1	return ''
369	1
370	1	# Steg 4, Försvenskning
371	1	ordlista = [_foersvensker(ordet) for ordet in ordlista]
372
373		# Steg 5, Ta bort alla tecken som inte är A-Ö (65-90,196,197,214)
374	1	ordlista = [
375		''.join(c for c in ordet if c in self._uc_set)
376		for ordet in ordlista
377	1	]
378
379		# Steg 6, Koda första ljudet
380	1	ordlista = [_koda_foersta_ljudet(ordet) for ordet in ordlista]
381
382	1	# Steg 7, Dela upp namnet i två delar
383		rest = [ordet[1:] for ordet in ordlista]
384
385	1	# Steg 8, Utför fonetisk transformation i resten
386		rest = [ordet.replace('DT', 'T') for ordet in rest]
387		rest = [ordet.replace('X', 'KS') for ordet in rest]
388	1
389		# Steg 9, Koda resten till en sifferkod
390		for vokal in self._mjuka_vokaler:
391		rest = [ordet.replace('C' + vokal, '8' + vokal) for ordet in rest]
392		rest = [ordet.translate(self._trans) for ordet in rest]
393
394	1	# Steg 10, Ta bort intilliggande dubbletter
395		rest = [self._delete_consecutive_repeats(ordet) for ordet in rest]
396
397	1	# Steg 11, Ta bort alla "9"
398		rest = [ordet.replace('9', '') for ordet in rest]
399
400	1	# Steg 12, Sätt ihop delarna igen
401	1	ordlista = [
402		''.join(ordet) for ordet in zip((_[0:1] for _ in ordlista), rest)
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `_` does not seem to be defined. Loading history...
403		]
404	1
405	1	# truncate, if max_length is set
406	1	if self._max_length > 0:
407		ordlista = [ordet[: self._max_length] for ordet in ordlista]
408
409	1	return ','.join(ordlista)
410
411
412	1	if __name__ == '__main__':
413		import doctest
414
415		doctest.testmod()
416

chrislit / abydos

abydos.phonetic._sfinx_bis.SfinxBis.encode() F last analyzed 2020-12-31 20:10 UTC

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like

abydos.phonetic._sfinx_bis.SfinxBis.encode() F
last analyzed 2020-12-31 20:10 UTC