abydos.stemmer._clef_german_plus.clef_german_plus() - Code Metrics - Inspection of "Merge pull request #149 from chrislit/0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( f43547...71985b )

by Chris

created 2018-11-17 08:52 UTC

clef_german_plus() A

↳ Parent: abydos.stemmer._clef_german_plus

Complexity

Conditions

Size

Total Lines	27
Code Lines	2

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	2
CRAP Score	1

Importance

Changes

Metric	Value
cc	1
eloc	2
nop	1
dl	0
loc	27
ccs	2
cts	2
cp	1
crap	1
rs	10
c	0
b	0
f	0

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.stemmer._clef_german_plus.

CLEF German plus stemmer
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from unicodedata import normalize

from six import text_type

from ._stemmer import _Stemmer

__all__ = ['CLEFGermanPlus', 'clef_german_plus']


class CLEFGermanPlus(_Stemmer):

    """CLEF German stemmer plus.

    The CLEF German stemmer plus is defined at :cite:`Savoy:2005`.
    """

    _st_ending = {'b', 'd', 'f', 'g', 'h', 'k', 'l', 'm', 'n', 't'}

    _accents = dict(
        zip((ord(_) for _ in 'äàáâöòóôïìíîüùúû'), 'aaaaooooiiiiuuuu')

    )

    def stem(self, word):

        """Return 'CLEF German stemmer plus' stem.

        Parameters
        ----------
        word : str
            The word to stem

        Returns
        -------
        str
            Word stem

        Examples
        --------
        >>> stmr = CLEFGermanPlus()
        >>> clef_german_plus('lesen')
        'les'
        >>> clef_german_plus('graues')
        'grau'
        >>> clef_german_plus('buchstabieren')
        'buchstabi'

        """
        # lowercase, normalize, and compose
        word = normalize('NFC', text_type(word.lower()))

        # remove umlauts
        word = word.translate(self._accents)

        # Step 1
        wlen = len(word) - 1
        if wlen > 4 and word[-3:] == 'ern':
            word = word[:-3]
        elif wlen > 3 and word[-2:] in {'em', 'en', 'er', 'es'}:
            word = word[:-2]
        elif wlen > 2 and (
            word[-1] == 'e'

            or (word[-1] == 's' and word[-2] in self._st_ending)

        ):
            word = word[:-1]

        # Step 2
        wlen = len(word) - 1
        if wlen > 4 and word[-3:] == 'est':
            word = word[:-3]
        elif wlen > 3 and (
            word[-2:] in {'er', 'en'}

            or (word[-2:] == 'st' and word[-3] in self._st_ending)

        ):
            word = word[:-2]

        return word


def clef_german_plus(word):
    """Return 'CLEF German stemmer plus' stem.

    This is a wrapper for :py:meth:`CLEFGermanPlus.stem`.

    Parameters
    ----------
    word : str
        The word to stem

    Returns
    -------
    str
        Word stem

    Examples
    --------
    >>> stmr = CLEFGermanPlus()
    >>> clef_german_plus('lesen')
    'les'
    >>> clef_german_plus('graues')
    'grau'
    >>> clef_german_plus('buchstabieren')
    'buchstabi'

    """
    return CLEFGermanPlus().stem(word)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.stemmer._clef_german_plus.
20
21		CLEF German plus stemmer
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from unicodedata import normalize
32
33	1	from six import text_type
34
35	1	from ._stemmer import _Stemmer
36
37	1	__all__ = ['CLEFGermanPlus', 'clef_german_plus']
38
39
40	1	class CLEFGermanPlus(_Stemmer):
		0 ignored issues – show Unused Code introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
41		"""CLEF German stemmer plus.
42
43		The CLEF German stemmer plus is defined at :cite:`Savoy:2005`.
44		"""
45
46	1	_st_ending = {'b', 'd', 'f', 'g', 'h', 'k', 'l', 'm', 'n', 't'}
47
48	1	_accents = dict(
49		zip((ord(_) for _ in 'äàáâöòóôïìíîüùúû'), 'aaaaooooiiiiuuuu')
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report The variable `_` does not seem to be defined. Loading history...
50		)
51
52	1	def stem(self, word):
		0 ignored issues – show Bug introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'stem' method Loading history...
53		"""Return 'CLEF German stemmer plus' stem.
54
55		Parameters
56		----------
57		word : str
58		The word to stem
59
60		Returns
61		-------
62		str
63		Word stem
64
65		Examples
66		--------
67		>>> stmr = CLEFGermanPlus()
68		>>> clef_german_plus('lesen')
69		'les'
70		>>> clef_german_plus('graues')
71		'grau'
72		>>> clef_german_plus('buchstabieren')
73		'buchstabi'
74
75		"""
76		# lowercase, normalize, and compose
77	1	word = normalize('NFC', text_type(word.lower()))
78
79		# remove umlauts
80	1	word = word.translate(self._accents)
81
82		# Step 1
83	1	wlen = len(word) - 1
84	1	if wlen > 4 and word[-3:] == 'ern':
85	1	word = word[:-3]
86	1	elif wlen > 3 and word[-2:] in {'em', 'en', 'er', 'es'}:
87	1	word = word[:-2]
88	1	elif wlen > 2 and (
89		word[-1] == 'e'
		0 ignored issues – show Coding Style introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
90		or (word[-1] == 's' and word[-2] in self._st_ending)
		0 ignored issues – show Coding Style introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
91		):
92	1	word = word[:-1]
93
94		# Step 2
95	1	wlen = len(word) - 1
96	1	if wlen > 4 and word[-3:] == 'est':
97	1	word = word[:-3]
98	1	elif wlen > 3 and (
99		word[-2:] in {'er', 'en'}
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
100		or (word[-2:] == 'st' and word[-3] in self._st_ending)
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
101		):
102	1	word = word[:-2]
103
104	1	return word
105
106
107	1	def clef_german_plus(word):
108		"""Return 'CLEF German stemmer plus' stem.
109
110		This is a wrapper for :py:meth:`CLEFGermanPlus.stem`.
111
112		Parameters
113		----------
114		word : str
115		The word to stem
116
117		Returns
118		-------
119		str
120		Word stem
121
122		Examples
123		--------
124		>>> stmr = CLEFGermanPlus()
125		>>> clef_german_plus('lesen')
126		'les'
127		>>> clef_german_plus('graues')
128		'grau'
129		>>> clef_german_plus('buchstabieren')
130		'buchstabi'
131
132		"""
133	1	return CLEFGermanPlus().stem(word)
134
135
136		if __name__ == '__main__':
137		import doctest
138
139		doctest.testmod()
140

chrislit / abydos

Push — master ( f43547...71985b )

clef_german_plus() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like