Completed
Push — master ( f43547...71985b )
by Chris
12:00 queued 10s
created

abydos.stemmer._clef_german_plus   A

Complexity

Total Complexity 16

Size/Duplication

Total Lines 140
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
eloc 39
dl 0
loc 140
ccs 27
cts 27
cp 1
rs 10
c 0
b 0
f 0
wmc 16

1 Function

Rating   Name   Duplication   Size   Complexity  
A clef_german_plus() 0 27 1

1 Method

Rating   Name   Duplication   Size   Complexity  
F CLEFGermanPlus.stem() 0 53 15
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.stemmer._clef_german_plus.
20
21
CLEF German plus stemmer
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from unicodedata import normalize
32
33 1
from six import text_type
34
35 1
from ._stemmer import _Stemmer
36
37 1
__all__ = ['CLEFGermanPlus', 'clef_german_plus']
38
39
40 1
class CLEFGermanPlus(_Stemmer):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
41
    """CLEF German stemmer plus.
42
43
    The CLEF German stemmer plus is defined at :cite:`Savoy:2005`.
44
    """
45
46 1
    _st_ending = {'b', 'd', 'f', 'g', 'h', 'k', 'l', 'm', 'n', 't'}
47
48 1
    _accents = dict(
49
        zip((ord(_) for _ in 'äàáâöòóôïìíîüùúû'), 'aaaaooooiiiiuuuu')
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable _ does not seem to be defined.
Loading history...
50
    )
51
52 1
    def stem(self, word):
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'stem' method
Loading history...
53
        """Return 'CLEF German stemmer plus' stem.
54
55
        Parameters
56
        ----------
57
        word : str
58
            The word to stem
59
60
        Returns
61
        -------
62
        str
63
            Word stem
64
65
        Examples
66
        --------
67
        >>> stmr = CLEFGermanPlus()
68
        >>> clef_german_plus('lesen')
69
        'les'
70
        >>> clef_german_plus('graues')
71
        'grau'
72
        >>> clef_german_plus('buchstabieren')
73
        'buchstabi'
74
75
        """
76
        # lowercase, normalize, and compose
77 1
        word = normalize('NFC', text_type(word.lower()))
78
79
        # remove umlauts
80 1
        word = word.translate(self._accents)
81
82
        # Step 1
83 1
        wlen = len(word) - 1
84 1
        if wlen > 4 and word[-3:] == 'ern':
85 1
            word = word[:-3]
86 1
        elif wlen > 3 and word[-2:] in {'em', 'en', 'er', 'es'}:
87 1
            word = word[:-2]
88 1
        elif wlen > 2 and (
89
            word[-1] == 'e'
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
90
            or (word[-1] == 's' and word[-2] in self._st_ending)
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
91
        ):
92 1
            word = word[:-1]
93
94
        # Step 2
95 1
        wlen = len(word) - 1
96 1
        if wlen > 4 and word[-3:] == 'est':
97 1
            word = word[:-3]
98 1
        elif wlen > 3 and (
99
            word[-2:] in {'er', 'en'}
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
100
            or (word[-2:] == 'st' and word[-3] in self._st_ending)
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
101
        ):
102 1
            word = word[:-2]
103
104 1
        return word
105
106
107 1
def clef_german_plus(word):
108
    """Return 'CLEF German stemmer plus' stem.
109
110
    This is a wrapper for :py:meth:`CLEFGermanPlus.stem`.
111
112
    Parameters
113
    ----------
114
    word : str
115
        The word to stem
116
117
    Returns
118
    -------
119
    str
120
        Word stem
121
122
    Examples
123
    --------
124
    >>> stmr = CLEFGermanPlus()
125
    >>> clef_german_plus('lesen')
126
    'les'
127
    >>> clef_german_plus('graues')
128
    'grau'
129
    >>> clef_german_plus('buchstabieren')
130
    'buchstabi'
131
132
    """
133 1
    return CLEFGermanPlus().stem(word)
134
135
136
if __name__ == '__main__':
137
    import doctest
138
139
    doctest.testmod()
140