Issues in _phonem.py (master) - Issues in master - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Issues (140)

abydos/phonetic/_phonem.py (1 issue)

Labels

Severity

Minor 1

# Copyright 2014-2020 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic._phonem.

Phonem
"""

from unicodedata import normalize as unicode_normalize

from ._phonetic import _Phonetic

__all__ = ['Phonem']


class Phonem(_Phonetic):
    """Phonem.

    Phonem is defined in :cite:`Wilde:1988`.

    This version is based on the Perl implementation documented at
    :cite:`Wilz:2005`.
    It includes some enhancements presented in the Java port at
    :cite:`dcm4che:2011`.

    Phonem is intended chiefly for German names/words.

    .. versionadded:: 0.3.6
    """

    _substitutions = (
        ('SC', 'C'),
        ('SZ', 'C'),
        ('CZ', 'C'),
        ('TZ', 'C'),
        ('TS', 'C'),
        ('KS', 'X'),
        ('PF', 'V'),
        ('QU', 'KW'),
        ('PH', 'V'),
        ('UE', 'Y'),
        ('AE', 'E'),
        ('OE', 'Ö'),
        ('EI', 'AY'),
        ('EY', 'AY'),
        ('EU', 'OY'),
        ('AU', 'A§'),
        ('OU', '§'),
    )

    _trans = dict(
        zip(
            (ord(_) for _ in 'ZKGQÇÑßFWPTÁÀÂÃÅÄÆÉÈÊËIJÌÍÎÏÜÝ§ÚÙÛÔÒÓÕØ'),

            'CCCCCNSVVBDAAAAAEEEEEEYYYYYYYYUUUUOOOOÖ',
        )
    )

    _uc_set = set('ABCDLMNORSUVWXYÖ')

    def encode(self, word: str) -> str:
        """Return the Phonem code for a word.

        Parameters
        ----------
        word : str
        The word to transform

        Returns
        -------
        str
            The Phonem value

        Examples
        --------
        >>> pe = Phonem()
        >>> pe.encode('Christopher')
        'CRYSDOVR'
        >>> pe.encode('Niall')
        'NYAL'
        >>> pe.encode('Smith')
        'SMYD'
        >>> pe.encode('Schmidt')
        'CMYD'


        .. versionadded:: 0.1.0
        .. versionchanged:: 0.3.6
            Encapsulated in class

        """
        word = unicode_normalize('NFC', word.upper())
        for i, j in self._substitutions:
            word = word.replace(i, j)
        word = word.translate(self._trans)

        return ''.join(
            c
            for c in self._delete_consecutive_repeats(word)
            if c in self._uc_set
        )


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# Copyright 2014-2020 by Christopher C. Little.
2		# This file is part of Abydos.
3		#
4		# Abydos is free software: you can redistribute it and/or modify
5		# it under the terms of the GNU General Public License as published by
6		# the Free Software Foundation, either version 3 of the License, or
7		# (at your option) any later version.
8		#
9		# Abydos is distributed in the hope that it will be useful,
10		# but WITHOUT ANY WARRANTY; without even the implied warranty of
11		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12		# GNU General Public License for more details.
13		#
14		# You should have received a copy of the GNU General Public License
15		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17		"""abydos.phonetic._phonem.
18
19	1	Phonem
20		"""
21
22		from unicodedata import normalize as unicode_normalize
23
24	1	from ._phonetic import _Phonetic
25
26		__all__ = ['Phonem']
27
28
29		class Phonem(_Phonetic):
30		"""Phonem.
31	1
32		Phonem is defined in :cite:`Wilde:1988`.
33	1
34		This version is based on the Perl implementation documented at
35	1	:cite:`Wilz:2005`.
36		It includes some enhancements presented in the Java port at
37	1	:cite:`dcm4che:2011`.
38	1
39		Phonem is intended chiefly for German names/words.
40	1
41		.. versionadded:: 0.3.6
42		"""
43	1
44		_substitutions = (
45		('SC', 'C'),
46		('SZ', 'C'),
47		('CZ', 'C'),
48		('TZ', 'C'),
49		('TS', 'C'),
50		('KS', 'X'),
51		('PF', 'V'),
52		('QU', 'KW'),
53		('PH', 'V'),
54		('UE', 'Y'),
55		('AE', 'E'),
56		('OE', 'Ö'),
57		('EI', 'AY'),
58	1	('EY', 'AY'),
59		('EU', 'OY'),
60		('AU', 'A§'),
61		('OU', '§'),
62		)
63
64		_trans = dict(
65		zip(
66		(ord(_) for _ in 'ZKGQÇÑßFWPTÁÀÂÃÅÄÆÉÈÊËIJÌÍÎÏÜÝ§ÚÙÛÔÒÓÕØ'),
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Show Similar Issues like this The variable `_` does not seem to be defined. Loading history...
67		'CCCCCNSVVBDAAAAAEEEEEEYYYYYYYYUUUUOOOOÖ',
68		)
69		)
70
71		_uc_set = set('ABCDLMNORSUVWXYÖ')
72
73		def encode(self, word: str) -> str:
74		"""Return the Phonem code for a word.
75
76		Parameters
77		----------
78	1	word : str
79		The word to transform
80
81		Returns
82		-------
83		str
84		The Phonem value
85	1
86		Examples
87	1	--------
88		>>> pe = Phonem()
89		>>> pe.encode('Christopher')
90		'CRYSDOVR'
91		>>> pe.encode('Niall')
92		'NYAL'
93		>>> pe.encode('Smith')
94		'SMYD'
95		>>> pe.encode('Schmidt')
96		'CMYD'
97
98
99		.. versionadded:: 0.1.0
100		.. versionchanged:: 0.3.6
101		Encapsulated in class
102
103		"""
104		word = unicode_normalize('NFC', word.upper())
105		for i, j in self._substitutions:
106		word = word.replace(i, j)
107		word = word.translate(self._trans)
108
109		return ''.join(
110		c
111		for c in self._delete_consecutive_repeats(word)
112		if c in self._uc_set
113		)
114
115
116		if __name__ == '__main__':
117		import doctest
118	1
119		doctest.testmod()
120

chrislit / abydos

Issues (140)

abydos/phonetic/_phonem.py (1 issue)

Labels

Severity

Introduced By

Duplication Side-by-Side

Filter issues like