abydos.stemmer._snowball_swedish.SnowballSwedish.stem() - Code Metrics - Inspection of "Merge pull request #149 from chrislit/0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( f43547...71985b )

by Chris

created 2018-11-17 08:52 UTC

SnowballSwedish.stem() F

↳ Parent: abydos.stemmer._snowball_swedish

Complexity

Conditions

Size

Total Lines	85
Code Lines	51

Duplication

Lines	37
Ratio	43.53 %

Code Coverage

Tests	33
CRAP Score	16

Importance

Changes

Metric	Value
cc	16
eloc	51
nop	2
dl	37
loc	85
ccs	33
cts	33
cp	1
crap	16
rs	2.4
c	0
b	0
f	0

How to fix Long Method Complexity

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.stemmer._snowball_swedish.

Snowball Swedish stemmer
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from unicodedata import normalize

from six import text_type

from ._snowball import _Snowball

__all__ = ['SnowballSwedish', 'sb_swedish']


class SnowballSwedish(_Snowball):

    """Snowball Swedish stemmer.

    The Snowball Swedish stemmer is defined at:
    http://snowball.tartarus.org/algorithms/swedish/stemmer.html
    """

    _vowels = {'a', 'e', 'i', 'o', 'u', 'y', 'ä', 'å', 'ö'}
    _s_endings = {
        'b',
        'c',
        'd',
        'f',
        'g',
        'h',
        'j',
        'k',
        'l',
        'm',
        'n',
        'o',
        'p',
        'r',
        't',
        'v',
        'y',
    }

    def stem(self, word):

        """Return Snowball Swedish stem.

        Parameters
        ----------
        word : str
            The word to stem

        Returns
        -------
        str
            Word stem

        Examples
        --------
        >>> stmr = SnowballSwedish()
        >>> stmr.stem('undervisa')
        'undervis'
        >>> stmr.stem('suspension')
        'suspension'
        >>> stmr.stem('visshet')
        'viss'

        """
        # lowercase, normalize, and compose
        word = normalize('NFC', text_type(word.lower()))

        r1_start = min(max(3, self._sb_r1(word)), len(word))

        # Step 1
        _r1 = word[r1_start:]
        if _r1[-7:] == 'heterna':

            word = word[:-7]
        elif _r1[-6:] == 'hetens':
            word = word[:-6]
        elif _r1[-5:] in {
            'anden',

            'heten',

            'heter',

            'arnas',

            'ernas',

            'ornas',

            'andes',

            'arens',

            'andet',

        }:
            word = word[:-5]
        elif _r1[-4:] in {
            'arna',

            'erna',

            'orna',

            'ande',

            'arne',

            'aste',

            'aren',

            'ades',

            'erns',

        }:
            word = word[:-4]
        elif _r1[-3:] in {'ade', 'are', 'ern', 'ens', 'het', 'ast'}:
            word = word[:-3]
        elif _r1[-2:] in {'ad', 'en', 'ar', 'er', 'or', 'as', 'es', 'at'}:
            word = word[:-2]
        elif _r1[-1:] in {'a', 'e'}:
            word = word[:-1]
        elif _r1[-1:] == 's':
            if len(word) > 1 and word[-2] in self._s_endings:
                word = word[:-1]

        # Step 2
        if word[r1_start:][-2:] in {'dd', 'gd', 'nn', 'dt', 'gt', 'kt', 'tt'}:
            word = word[:-1]

        # Step 3
        _r1 = word[r1_start:]
        if _r1[-5:] == 'fullt':
            word = word[:-1]
        elif _r1[-4:] == 'löst':
            word = word[:-1]
        elif _r1[-3:] in {'lig', 'els'}:
            word = word[:-3]
        elif _r1[-2:] == 'ig':
            word = word[:-2]

        return word


def sb_swedish(word):
    """Return Snowball Swedish stem.

    This is a wrapper for :py:meth:`SnowballSwedish.stem`.

    Parameters
    ----------
    word : str
        The word to stem

    Returns
    -------
    str
        Word stem

    Examples
    --------
    >>> sb_swedish('undervisa')
    'undervis'
    >>> sb_swedish('suspension')
    'suspension'
    >>> sb_swedish('visshet')
    'viss'

    """
    return SnowballSwedish().stem(word)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1			# -- coding: utf-8 --
2
3			# Copyright 2014-2018 by Christopher C. Little.
4			# This file is part of Abydos.
5			#
6			# Abydos is free software: you can redistribute it and/or modify
7			# it under the terms of the GNU General Public License as published by
8			# the Free Software Foundation, either version 3 of the License, or
9			# (at your option) any later version.
10			#
11			# Abydos is distributed in the hope that it will be useful,
12			# but WITHOUT ANY WARRANTY; without even the implied warranty of
13			# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14			# GNU General Public License for more details.
15			#
16			# You should have received a copy of the GNU General Public License
17			# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1		"""abydos.stemmer._snowball_swedish.
20
21			Snowball Swedish stemmer
22			"""
23
24	1		from __future__ import (
25			absolute_import,
26			division,
27			print_function,
28			unicode_literals,
29			)
30
31	1		from unicodedata import normalize
32
33	1		from six import text_type
34
35	1		from ._snowball import _Snowball
36
37	1		__all__ = ['SnowballSwedish', 'sb_swedish']
38
39
40	1		class SnowballSwedish(_Snowball):
			0 ignored issues – show Unused Code introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
41			"""Snowball Swedish stemmer.
42
43			The Snowball Swedish stemmer is defined at:
44			http://snowball.tartarus.org/algorithms/swedish/stemmer.html
45			"""
46
47	1		_vowels = {'a', 'e', 'i', 'o', 'u', 'y', 'ä', 'å', 'ö'}
48	1		_s_endings = {
49			'b',
50			'c',
51			'd',
52			'f',
53			'g',
54			'h',
55			'j',
56			'k',
57			'l',
58			'm',
59			'n',
60			'o',
61			'p',
62			'r',
63			't',
64			'v',
65			'y',
66			}
67
68	1		def stem(self, word):
			0 ignored issues – show Bug introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'stem' method Loading history...
69			"""Return Snowball Swedish stem.
70
71			Parameters
72			----------
73			word : str
74			The word to stem
75
76			Returns
77			-------
78			str
79			Word stem
80
81			Examples
82			--------
83			>>> stmr = SnowballSwedish()
84			>>> stmr.stem('undervisa')
85			'undervis'
86			>>> stmr.stem('suspension')
87			'suspension'
88			>>> stmr.stem('visshet')
89			'viss'
90
91			"""
92			# lowercase, normalize, and compose
93	1		word = normalize('NFC', text_type(word.lower()))
94
95	1		r1_start = min(max(3, self._sb_r1(word)), len(word))
96
97			# Step 1
98	1		_r1 = word[r1_start:]
99	1	View Code Duplication	if _r1[-7:] == 'heterna':
			0 ignored issues – show Duplication introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
100	1		word = word[:-7]
101	1		elif _r1[-6:] == 'hetens':
102	1		word = word[:-6]
103	1		elif _r1[-5:] in {
104			'anden',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
105			'heten',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
106			'heter',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
107			'arnas',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
108			'ernas',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
109			'ornas',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
110			'andes',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
111			'arens',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
112			'andet',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
113			}:
114	1		word = word[:-5]
115	1		elif _r1[-4:] in {
116			'arna',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
117			'erna',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
118			'orna',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
119			'ande',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
120			'arne',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
121			'aste',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
122			'aren',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
123			'ades',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
124			'erns',
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
125			}:
126	1		word = word[:-4]
127	1		elif _r1[-3:] in {'ade', 'are', 'ern', 'ens', 'het', 'ast'}:
128	1		word = word[:-3]
129	1		elif _r1[-2:] in {'ad', 'en', 'ar', 'er', 'or', 'as', 'es', 'at'}:
130	1		word = word[:-2]
131	1		elif _r1[-1:] in {'a', 'e'}:
132	1		word = word[:-1]
133	1		elif _r1[-1:] == 's':
134	1		if len(word) > 1 and word[-2] in self._s_endings:
135	1		word = word[:-1]
136
137			# Step 2
138	1		if word[r1_start:][-2:] in {'dd', 'gd', 'nn', 'dt', 'gt', 'kt', 'tt'}:
139	1		word = word[:-1]
140
141			# Step 3
142	1		_r1 = word[r1_start:]
143	1		if _r1[-5:] == 'fullt':
144	1		word = word[:-1]
145	1		elif _r1[-4:] == 'löst':
146	1		word = word[:-1]
147	1		elif _r1[-3:] in {'lig', 'els'}:
148	1		word = word[:-3]
149	1		elif _r1[-2:] == 'ig':
150	1		word = word[:-2]
151
152	1		return word
153
154
155	1		def sb_swedish(word):
156			"""Return Snowball Swedish stem.
157
158			This is a wrapper for :py:meth:`SnowballSwedish.stem`.
159
160			Parameters
161			----------
162			word : str
163			The word to stem
164
165			Returns
166			-------
167			str
168			Word stem
169
170			Examples
171			--------
172			>>> sb_swedish('undervisa')
173			'undervis'
174			>>> sb_swedish('suspension')
175			'suspension'
176			>>> sb_swedish('visshet')
177			'viss'
178
179			"""
180	1		return SnowballSwedish().stem(word)
181
182
183			if __name__ == '__main__':
184			import doctest
185
186			doctest.testmod()
187

chrislit / abydos

Push — master ( f43547...71985b )

SnowballSwedish.stem() F

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like