abydos.stemmer._snowball_danish.SnowballDanish.stem() - Code Metrics - Inspection of "0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#141)

by Chris

created 2018-11-10 03:25 UTC

SnowballDanish.stem() F

↳ Parent: abydos.stemmer._snowball_danish

Complexity

Conditions

Size

Total Lines	99
Code Lines	65

Duplication

Lines	36
Ratio	36.36 %

Code Coverage

Tests	44
CRAP Score	23

Importance

Changes

Metric	Value
cc	23
eloc	65
nop	2
dl	36
loc	99
ccs	44
cts	44
cp	1
crap	23
rs	0
c	0
b	0
f	0

How to fix Long Method Complexity

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.stemmer._snowball_danish.

Snowball Danish stemmer
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from unicodedata import normalize

from six import text_type

from ._snowball import _Snowball

__all__ = ['SnowballDanish', 'sb_danish']


class SnowballDanish(_Snowball):

    """Snowball Danish stemmer.

    The Snowball Danish stemmer is defined at:
    http://snowball.tartarus.org/algorithms/danish/stemmer.html
    """

    _vowels = {'a', 'e', 'i', 'o', 'u', 'y', 'å', 'æ', 'ø'}
    _s_endings = {
        'a',
        'b',
        'c',
        'd',
        'f',
        'g',
        'h',
        'j',
        'k',
        'l',
        'm',
        'n',
        'o',
        'p',
        'r',
        't',
        'v',
        'y',
        'z',
        'å',
    }

    def stem(self, word):

        """Return Snowball Danish stem.

        Args:
            word (str): The word to stem

        Returns:
            str: Word stem

        Examples:
            >>> stmr = SnowballDanish()
            >>> stmr.stem('underviser')
            'undervis'
            >>> stmr.stem('suspension')
            'suspension'
            >>> stmr.stem('sikkerhed')
            'sikker'

        """
        # lowercase, normalize, and compose
        word = normalize('NFC', text_type(word.lower()))

        r1_start = min(max(3, self._sb_r1(word)), len(word))

        # Step 1
        _r1 = word[r1_start:]
        if _r1[-7:] == 'erendes':

            word = word[:-7]
        elif _r1[-6:] in {'erende', 'hedens'}:
            word = word[:-6]
        elif _r1[-5:] in {
            'ethed',

            'erede',

            'heden',

            'heder',

            'endes',

            'ernes',

            'erens',

            'erets',

        }:
            word = word[:-5]
        elif _r1[-4:] in {
            'ered',

            'ende',

            'erne',

            'eren',

            'erer',

            'heds',

            'enes',

            'eres',

            'eret',

        }:
            word = word[:-4]
        elif _r1[-3:] in {'hed', 'ene', 'ere', 'ens', 'ers', 'ets'}:
            word = word[:-3]
        elif _r1[-2:] in {'en', 'er', 'es', 'et'}:
            word = word[:-2]
        elif _r1[-1:] == 'e':
            word = word[:-1]
        elif _r1[-1:] == 's':
            if len(word) > 1 and word[-2] in self._s_endings:
                word = word[:-1]

        # Step 2
        if word[r1_start:][-2:] in {'gd', 'dt', 'gt', 'kt'}:
            word = word[:-1]

        # Step 3
        if word[-4:] == 'igst':
            word = word[:-2]

        _r1 = word[r1_start:]
        repeat_step2 = False
        if _r1[-4:] == 'elig':
            word = word[:-4]
            repeat_step2 = True
        elif _r1[-4:] == 'løst':
            word = word[:-1]
        elif _r1[-3:] in {'lig', 'els'}:
            word = word[:-3]
            repeat_step2 = True
        elif _r1[-2:] == 'ig':
            word = word[:-2]
            repeat_step2 = True

        if repeat_step2:
            if word[r1_start:][-2:] in {'gd', 'dt', 'gt', 'kt'}:
                word = word[:-1]

        # Step 4
        if (
            len(word[r1_start:]) >= 1

            and len(word) >= 2

            and word[-1] == word[-2]

            and word[-1] not in self._vowels

        ):
            word = word[:-1]

        return word


def sb_danish(word):
    """Return Snowball Danish stem.

    This is a wrapper for :py:meth:`SnowballDanish.stem`.

    Args:
        word (str): The word to stem

    Returns:
        str: Word stem

    Examples:
        >>> sb_danish('underviser')
        'undervis'
        >>> sb_danish('suspension')
        'suspension'
        >>> sb_danish('sikkerhed')
        'sikker'

    """
    return SnowballDanish().stem(word)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1			# -- coding: utf-8 --
2
3			# Copyright 2014-2018 by Christopher C. Little.
4			# This file is part of Abydos.
5			#
6			# Abydos is free software: you can redistribute it and/or modify
7			# it under the terms of the GNU General Public License as published by
8			# the Free Software Foundation, either version 3 of the License, or
9			# (at your option) any later version.
10			#
11			# Abydos is distributed in the hope that it will be useful,
12			# but WITHOUT ANY WARRANTY; without even the implied warranty of
13			# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14			# GNU General Public License for more details.
15			#
16			# You should have received a copy of the GNU General Public License
17			# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1		"""abydos.stemmer._snowball_danish.
20
21			Snowball Danish stemmer
22			"""
23
24	1		from __future__ import (
25			absolute_import,
26			division,
27			print_function,
28			unicode_literals,
29			)
30
31	1		from unicodedata import normalize
32
33	1		from six import text_type
34
35	1		from ._snowball import _Snowball
36
37	1		__all__ = ['SnowballDanish', 'sb_danish']
38
39
40	1		class SnowballDanish(_Snowball):
			0 ignored issues – show Unused Code introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
41			"""Snowball Danish stemmer.
42
43			The Snowball Danish stemmer is defined at:
44			http://snowball.tartarus.org/algorithms/danish/stemmer.html
45			"""
46
47	1		_vowels = {'a', 'e', 'i', 'o', 'u', 'y', 'å', 'æ', 'ø'}
48	1		_s_endings = {
49			'a',
50			'b',
51			'c',
52			'd',
53			'f',
54			'g',
55			'h',
56			'j',
57			'k',
58			'l',
59			'm',
60			'n',
61			'o',
62			'p',
63			'r',
64			't',
65			'v',
66			'y',
67			'z',
68			'å',
69			}
70
71	1		def stem(self, word):
			0 ignored issues – show Bug introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'stem' method Loading history...
72			"""Return Snowball Danish stem.
73
74			Args:
75			word (str): The word to stem
76
77			Returns:
78			str: Word stem
79
80			Examples:
81			>>> stmr = SnowballDanish()
82			>>> stmr.stem('underviser')
83			'undervis'
84			>>> stmr.stem('suspension')
85			'suspension'
86			>>> stmr.stem('sikkerhed')
87			'sikker'
88
89			"""
90			# lowercase, normalize, and compose
91	1		word = normalize('NFC', text_type(word.lower()))
92
93	1		r1_start = min(max(3, self._sb_r1(word)), len(word))
94
95			# Step 1
96	1		_r1 = word[r1_start:]
97	1	View Code Duplication	if _r1[-7:] == 'erendes':
			0 ignored issues – show Duplication introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
98	1		word = word[:-7]
99	1		elif _r1[-6:] in {'erende', 'hedens'}:
100	1		word = word[:-6]
101	1		elif _r1[-5:] in {
102			'ethed',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
103			'erede',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
104			'heden',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
105			'heder',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
106			'endes',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
107			'ernes',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
108			'erens',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
109			'erets',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
110			}:
111	1		word = word[:-5]
112	1		elif _r1[-4:] in {
113			'ered',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
114			'ende',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
115			'erne',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
116			'eren',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
117			'erer',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
118			'heds',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
119			'enes',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
120			'eres',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
121			'eret',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
122			}:
123	1		word = word[:-4]
124	1		elif _r1[-3:] in {'hed', 'ene', 'ere', 'ens', 'ers', 'ets'}:
125	1		word = word[:-3]
126	1		elif _r1[-2:] in {'en', 'er', 'es', 'et'}:
127	1		word = word[:-2]
128	1		elif _r1[-1:] == 'e':
129	1		word = word[:-1]
130	1		elif _r1[-1:] == 's':
131	1		if len(word) > 1 and word[-2] in self._s_endings:
132	1		word = word[:-1]
133
134			# Step 2
135	1		if word[r1_start:][-2:] in {'gd', 'dt', 'gt', 'kt'}:
136	1		word = word[:-1]
137
138			# Step 3
139	1		if word[-4:] == 'igst':
140	1		word = word[:-2]
141
142	1		_r1 = word[r1_start:]
143	1		repeat_step2 = False
144	1		if _r1[-4:] == 'elig':
145	1		word = word[:-4]
146	1		repeat_step2 = True
147	1		elif _r1[-4:] == 'løst':
148	1		word = word[:-1]
149	1		elif _r1[-3:] in {'lig', 'els'}:
150	1		word = word[:-3]
151	1		repeat_step2 = True
152	1		elif _r1[-2:] == 'ig':
153	1		word = word[:-2]
154	1		repeat_step2 = True
155
156	1		if repeat_step2:
157	1		if word[r1_start:][-2:] in {'gd', 'dt', 'gt', 'kt'}:
158	1		word = word[:-1]
159
160			# Step 4
161	1		if (
162			len(word[r1_start:]) >= 1
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
163			and len(word) >= 2
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
164			and word[-1] == word[-2]
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
165			and word[-1] not in self._vowels
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
166			):
167	1		word = word[:-1]
168
169	1		return word
170
171
172	1		def sb_danish(word):
173			"""Return Snowball Danish stem.
174
175			This is a wrapper for :py:meth:`SnowballDanish.stem`.
176
177			Args:
178			word (str): The word to stem
179
180			Returns:
181			str: Word stem
182
183			Examples:
184			>>> sb_danish('underviser')
185			'undervis'
186			>>> sb_danish('suspension')
187			'suspension'
188			>>> sb_danish('sikkerhed')
189			'sikker'
190
191			"""
192	1		return SnowballDanish().stem(word)
193
194
195			if __name__ == '__main__':
196			import doctest
197
198			doctest.testmod()
199

chrislit / abydos

Pull Request — master (#141)

SnowballDanish.stem() F

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like