abydos.stemmer._snowball_norwegian.SnowballNorwegian.stem() - Code Metrics - Inspection of "Merge pull request #149 from chrislit/0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( f43547...71985b )

by Chris

created 2018-11-17 08:52 UTC

SnowballNorwegian.stem() F

↳ Parent: abydos.stemmer._snowball_norwegian

Complexity

Conditions

Size

Total Lines	85
Code Lines	49

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	37
CRAP Score	21

Importance

Changes

Metric	Value
cc	21
eloc	49
nop	2
dl	0
loc	85
ccs	37
cts	37
cp	1
crap	21
rs	0
c	0
b	0
f	0

How to fix Long Method Complexity

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.stemmer._snowball_norwegian.

Snowball Norwegian stemmer
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from unicodedata import normalize

from six import text_type

from ._snowball import _Snowball

__all__ = ['SnowballNorwegian', 'sb_norwegian']


class SnowballNorwegian(_Snowball):

    """Snowball Norwegian stemmer.

    The Snowball Norwegian stemmer is defined at:
    http://snowball.tartarus.org/algorithms/norwegian/stemmer.html
    """

    _vowels = {'a', 'e', 'i', 'o', 'u', 'y', 'å', 'æ', 'ø'}
    _s_endings = {
        'b',
        'c',
        'd',
        'f',
        'g',
        'h',
        'j',
        'l',
        'm',
        'n',
        'o',
        'p',
        'r',
        't',
        'v',
        'y',
        'z',
    }

    def stem(self, word):

        """Return Snowball Norwegian stem.

        Parameters
        ----------
        word : str
            The word to stem

        Returns
        -------
        str
            Word stem

        Examples
        --------
        >>> stmr = SnowballNorwegian()
        >>> stmr.stem('lese')
        'les'
        >>> stmr.stem('suspensjon')
        'suspensjon'
        >>> stmr.stem('sikkerhet')
        'sikker'

        """
        # lowercase, normalize, and compose
        word = normalize('NFC', text_type(word.lower()))

        r1_start = min(max(3, self._sb_r1(word)), len(word))

        # Step 1
        _r1 = word[r1_start:]
        if _r1[-7:] == 'hetenes':
            word = word[:-7]
        elif _r1[-6:] in {'hetene', 'hetens'}:
            word = word[:-6]
        elif _r1[-5:] in {'heten', 'heter', 'endes'}:
            word = word[:-5]
        elif _r1[-4:] in {'ande', 'ende', 'edes', 'enes', 'erte'}:
            if word[-4:] == 'erte':
                word = word[:-2]
            else:
                word = word[:-4]
        elif _r1[-3:] in {
            'ede',

            'ane',

            'ene',

            'ens',

            'ers',

            'ets',

            'het',

            'ast',

            'ert',

        }:
            if word[-3:] == 'ert':
                word = word[:-1]
            else:
                word = word[:-3]
        elif _r1[-2:] in {'en', 'ar', 'er', 'as', 'es', 'et'}:
            word = word[:-2]
        elif _r1[-1:] in {'a', 'e'}:
            word = word[:-1]
        elif _r1[-1:] == 's':
            if (len(word) > 1 and word[-2] in self._s_endings) or (
                len(word) > 2

                and word[-2] == 'k'

                and word[-3] not in self._vowels

            ):
                word = word[:-1]

        # Step 2
        if word[r1_start:][-2:] in {'dt', 'vt'}:
            word = word[:-1]

        # Step 3
        _r1 = word[r1_start:]
        if _r1[-7:] == 'hetslov':
            word = word[:-7]
        elif _r1[-4:] in {'eleg', 'elig', 'elov', 'slov'}:
            word = word[:-4]
        elif _r1[-3:] in {'leg', 'eig', 'lig', 'els', 'lov'}:
            word = word[:-3]
        elif _r1[-2:] == 'ig':
            word = word[:-2]

        return word


def sb_norwegian(word):
    """Return Snowball Norwegian stem.

    This is a wrapper for :py:meth:`SnowballNorwegian.stem`.

    Parameters
    ----------
    word : str
        The word to stem

    Returns
    -------
    str
        Word stem

    Examples
    --------
    >>> sb_norwegian('lese')
    'les'
    >>> sb_norwegian('suspensjon')
    'suspensjon'
    >>> sb_norwegian('sikkerhet')
    'sikker'

    """
    return SnowballNorwegian().stem(word)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.stemmer._snowball_norwegian.
20
21		Snowball Norwegian stemmer
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from unicodedata import normalize
32
33	1	from six import text_type
34
35	1	from ._snowball import _Snowball
36
37	1	__all__ = ['SnowballNorwegian', 'sb_norwegian']
38
39
40	1	class SnowballNorwegian(_Snowball):
		0 ignored issues – show Unused Code introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
41		"""Snowball Norwegian stemmer.
42
43		The Snowball Norwegian stemmer is defined at:
44		http://snowball.tartarus.org/algorithms/norwegian/stemmer.html
45		"""
46
47	1	_vowels = {'a', 'e', 'i', 'o', 'u', 'y', 'å', 'æ', 'ø'}
48	1	_s_endings = {
49		'b',
50		'c',
51		'd',
52		'f',
53		'g',
54		'h',
55		'j',
56		'l',
57		'm',
58		'n',
59		'o',
60		'p',
61		'r',
62		't',
63		'v',
64		'y',
65		'z',
66		}
67
68	1	def stem(self, word):
		0 ignored issues – show Bug introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'stem' method Loading history...
69		"""Return Snowball Norwegian stem.
70
71		Parameters
72		----------
73		word : str
74		The word to stem
75
76		Returns
77		-------
78		str
79		Word stem
80
81		Examples
82		--------
83		>>> stmr = SnowballNorwegian()
84		>>> stmr.stem('lese')
85		'les'
86		>>> stmr.stem('suspensjon')
87		'suspensjon'
88		>>> stmr.stem('sikkerhet')
89		'sikker'
90
91		"""
92		# lowercase, normalize, and compose
93	1	word = normalize('NFC', text_type(word.lower()))
94
95	1	r1_start = min(max(3, self._sb_r1(word)), len(word))
96
97		# Step 1
98	1	_r1 = word[r1_start:]
99	1	if _r1[-7:] == 'hetenes':
100	1	word = word[:-7]
101	1	elif _r1[-6:] in {'hetene', 'hetens'}:
102	1	word = word[:-6]
103	1	elif _r1[-5:] in {'heten', 'heter', 'endes'}:
104	1	word = word[:-5]
105	1	elif _r1[-4:] in {'ande', 'ende', 'edes', 'enes', 'erte'}:
106	1	if word[-4:] == 'erte':
107	1	word = word[:-2]
108		else:
109	1	word = word[:-4]
110	1	elif _r1[-3:] in {
111		'ede',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
112		'ane',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
113		'ene',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
114		'ens',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
115		'ers',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
116		'ets',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
117		'het',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
118		'ast',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
119		'ert',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
120		}:
121	1	if word[-3:] == 'ert':
122	1	word = word[:-1]
123		else:
124	1	word = word[:-3]
125	1	elif _r1[-2:] in {'en', 'ar', 'er', 'as', 'es', 'et'}:
126	1	word = word[:-2]
127	1	elif _r1[-1:] in {'a', 'e'}:
128	1	word = word[:-1]
129	1	elif _r1[-1:] == 's':
130	1	if (len(word) > 1 and word[-2] in self._s_endings) or (
131		len(word) > 2
		0 ignored issues – show Coding Style introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
132		and word[-2] == 'k'
		0 ignored issues – show Coding Style introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
133		and word[-3] not in self._vowels
		0 ignored issues – show Coding Style introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
134		):
135	1	word = word[:-1]
136
137		# Step 2
138	1	if word[r1_start:][-2:] in {'dt', 'vt'}:
139	1	word = word[:-1]
140
141		# Step 3
142	1	_r1 = word[r1_start:]
143	1	if _r1[-7:] == 'hetslov':
144	1	word = word[:-7]
145	1	elif _r1[-4:] in {'eleg', 'elig', 'elov', 'slov'}:
146	1	word = word[:-4]
147	1	elif _r1[-3:] in {'leg', 'eig', 'lig', 'els', 'lov'}:
148	1	word = word[:-3]
149	1	elif _r1[-2:] == 'ig':
150	1	word = word[:-2]
151
152	1	return word
153
154
155	1	def sb_norwegian(word):
156		"""Return Snowball Norwegian stem.
157
158		This is a wrapper for :py:meth:`SnowballNorwegian.stem`.
159
160		Parameters
161		----------
162		word : str
163		The word to stem
164
165		Returns
166		-------
167		str
168		Word stem
169
170		Examples
171		--------
172		>>> sb_norwegian('lese')
173		'les'
174		>>> sb_norwegian('suspensjon')
175		'suspensjon'
176		>>> sb_norwegian('sikkerhet')
177		'sikker'
178
179		"""
180	1	return SnowballNorwegian().stem(word)
181
182
183		if __name__ == '__main__':
184		import doctest
185
186		doctest.testmod()
187

chrislit / abydos

Push — master ( f43547...71985b )

SnowballNorwegian.stem() F

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like