abydos.stemmer._Porter2.Porter2.stem() - Code Metrics - Inspection of "0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#141)

by Chris

created 2018-11-10 01:31 UTC

abydos.stemmer._Porter2.Porter2.stem() F

↳ Parent: abydos.stemmer._Porter2

Complexity

Conditions

127

Size

Total Lines	285
Code Lines	218

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	187
CRAP Score	127

Importance

Changes

Metric	Value
cc	127
eloc	218
nop	3
dl	0
loc	285
ccs	187
cts	187
cp	1
crap	127
rs	0
c	0
b	0
f	0

How to fix Long Method Complexity

# -*- coding: utf-8 -*-


# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.stemmer._Porter2.

Porter2 (Snowball English) stemmer
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from unicodedata import normalize

from six import text_type
from six.moves import range

from ._Snowball import _Snowball

__all__ = ['Porter2', 'porter2']


class Porter2(_Snowball):

    """Porter2 (Snowball English) stemmer.

    The Porter2 (Snowball English) stemmer is defined in :cite:`Porter:2002`.
    """

    _doubles = {'bb', 'dd', 'ff', 'gg', 'mm', 'nn', 'pp', 'rr', 'tt'}
    _li = {'c', 'd', 'e', 'g', 'h', 'k', 'm', 'n', 'r', 't'}

    # R1 prefixes should be in order from longest to shortest to prevent
    # masking
    _r1_prefixes = ('commun', 'gener', 'arsen')
    _exception1dict = {  # special changes:
        'skis': 'ski',
        'skies': 'sky',
        'dying': 'die',
        'lying': 'lie',
        'tying': 'tie',
        # special -LY cases:
        'idly': 'idl',
        'gently': 'gentl',
        'ugly': 'ugli',
        'early': 'earli',
        'only': 'onli',
        'singly': 'singl',
    }
    _exception1set = {
        'sky',
        'news',
        'howe',
        'atlas',
        'cosmos',
        'bias',
        'andes',
    }
    _exception2set = {
        'inning',
        'outing',
        'canning',
        'herring',
        'earring',
        'proceed',
        'exceed',
        'succeed',
    }

    def stem(self, word, early_english=False):

        """Return the Porter2 (Snowball English) stem.

        Args:
            word (str): The word to stem
            early_english (bool): Set to True in order to remove -eth & -est
                (2nd & 3rd person singular verbal agreement suffixes)

        Returns:
            str: Word stem

        Examples:
            >>> stmr = Porter2()
            >>> stmr.stem('reading')
            'read'
            >>> stmr.stem('suspension')
            'suspens'
            >>> stmr.stem('elusiveness')
            'elus'

            >>> stmr.stem('eateth', early_english=True)
            'eat'

        """
        # lowercase, normalize, and compose
        word = normalize('NFC', text_type(word.lower()))
        # replace apostrophe-like characters with U+0027, per
        # http://snowball.tartarus.org/texts/apostrophe.html
        word = word.replace('’', '\'')
        word = word.replace('’', '\'')

        # Exceptions 1
        if word in self._exception1dict:
            return self._exception1dict[word]
        elif word in self._exception1set:
            return word

        # Return word if stem is shorter than 3
        if len(word) < 3:
            return word

        # Remove initial ', if present.
        while word and word[0] == '\'':
            word = word[1:]
            # Return word if stem is shorter than 2
            if len(word) < 2:
                return word

        # Re-map vocalic Y to y (Y will be C, y will be V)
        if word[0] == 'y':
            word = 'Y' + word[1:]
        for i in range(1, len(word)):
            if word[i] == 'y' and word[i - 1] in self._vowels:
                word = word[:i] + 'Y' + word[i + 1 :]

        r1_start = self._sb_r1(word, self._r1_prefixes)
        r2_start = self._sb_r2(word, self._r1_prefixes)

        # Step 0
        if word[-3:] == '\'s\'':
            word = word[:-3]
        elif word[-2:] == '\'s':
            word = word[:-2]
        elif word[-1:] == '\'':
            word = word[:-1]
        # Return word if stem is shorter than 2
        if len(word) < 3:
            return word

        # Step 1a
        if word[-4:] == 'sses':
            word = word[:-2]
        elif word[-3:] in {'ied', 'ies'}:
            if len(word) > 4:
                word = word[:-2]
            else:
                word = word[:-1]
        elif word[-2:] in {'us', 'ss'}:
            pass
        elif word[-1] == 's':
            if self._sb_has_vowel(word[:-2]):
                word = word[:-1]

        # Exceptions 2
        if word in self._exception2set:
            return word

        # Step 1b
        step1b_flag = False
        if word[-5:] == 'eedly':
            if len(word[r1_start:]) >= 5:
                word = word[:-3]
        elif word[-5:] == 'ingly':
            if self._sb_has_vowel(word[:-5]):
                word = word[:-5]
                step1b_flag = True
        elif word[-4:] == 'edly':
            if self._sb_has_vowel(word[:-4]):
                word = word[:-4]
                step1b_flag = True
        elif word[-3:] == 'eed':
            if len(word[r1_start:]) >= 3:
                word = word[:-1]
        elif word[-3:] == 'ing':
            if self._sb_has_vowel(word[:-3]):
                word = word[:-3]
                step1b_flag = True
        elif word[-2:] == 'ed':
            if self._sb_has_vowel(word[:-2]):
                word = word[:-2]
                step1b_flag = True
        elif early_english:
            if word[-3:] == 'est':
                if self._sb_has_vowel(word[:-3]):
                    word = word[:-3]
                    step1b_flag = True
            elif word[-3:] == 'eth':
                if self._sb_has_vowel(word[:-3]):
                    word = word[:-3]
                    step1b_flag = True

        if step1b_flag:
            if word[-2:] in {'at', 'bl', 'iz'}:
                word += 'e'
            elif word[-2:] in self._doubles:
                word = word[:-1]
            elif self._sb_short_word(word, self._r1_prefixes):
                word += 'e'

        # Step 1c
        if (
            len(word) > 2

            and word[-1] in {'Y', 'y'}

            and word[-2] not in self._vowels

        ):
            word = word[:-1] + 'i'

        # Step 2
        if word[-2] == 'a':
            if word[-7:] == 'ational':
                if len(word[r1_start:]) >= 7:
                    word = word[:-5] + 'e'
            elif word[-6:] == 'tional':
                if len(word[r1_start:]) >= 6:
                    word = word[:-2]
        elif word[-2] == 'c':
            if word[-4:] in {'enci', 'anci'}:
                if len(word[r1_start:]) >= 4:
                    word = word[:-1] + 'e'
        elif word[-2] == 'e':
            if word[-4:] == 'izer':
                if len(word[r1_start:]) >= 4:
                    word = word[:-1]
        elif word[-2] == 'g':
            if word[-3:] == 'ogi':
                if (
                    r1_start >= 1

                    and len(word[r1_start:]) >= 3

                    and word[-4] == 'l'

                ):
                    word = word[:-1]
        elif word[-2] == 'l':
            if word[-6:] == 'lessli':
                if len(word[r1_start:]) >= 6:
                    word = word[:-2]
            elif word[-5:] in {'entli', 'fulli', 'ousli'}:
                if len(word[r1_start:]) >= 5:
                    word = word[:-2]
            elif word[-4:] == 'abli':
                if len(word[r1_start:]) >= 4:
                    word = word[:-1] + 'e'
            elif word[-4:] == 'alli':
                if len(word[r1_start:]) >= 4:
                    word = word[:-2]
            elif word[-3:] == 'bli':
                if len(word[r1_start:]) >= 3:
                    word = word[:-1] + 'e'
            elif word[-2:] == 'li':
                if (
                    r1_start >= 1

                    and len(word[r1_start:]) >= 2

                    and word[-3] in self._li

                ):
                    word = word[:-2]
        elif word[-2] == 'o':
            if word[-7:] == 'ization':
                if len(word[r1_start:]) >= 7:
                    word = word[:-5] + 'e'
            elif word[-5:] == 'ation':
                if len(word[r1_start:]) >= 5:
                    word = word[:-3] + 'e'
            elif word[-4:] == 'ator':
                if len(word[r1_start:]) >= 4:
                    word = word[:-2] + 'e'
        elif word[-2] == 's':
            if word[-7:] in {'fulness', 'ousness', 'iveness'}:
                if len(word[r1_start:]) >= 7:
                    word = word[:-4]
            elif word[-5:] == 'alism':
                if len(word[r1_start:]) >= 5:
                    word = word[:-3]
        elif word[-2] == 't':
            if word[-6:] == 'biliti':
                if len(word[r1_start:]) >= 6:
                    word = word[:-5] + 'le'
            elif word[-5:] == 'aliti':
                if len(word[r1_start:]) >= 5:
                    word = word[:-3]
            elif word[-5:] == 'iviti':
                if len(word[r1_start:]) >= 5:
                    word = word[:-3] + 'e'

        # Step 3
        if word[-7:] == 'ational':
            if len(word[r1_start:]) >= 7:
                word = word[:-5] + 'e'
        elif word[-6:] == 'tional':
            if len(word[r1_start:]) >= 6:
                word = word[:-2]
        elif word[-5:] in {'alize', 'icate', 'iciti'}:
            if len(word[r1_start:]) >= 5:
                word = word[:-3]
        elif word[-5:] == 'ative':
            if len(word[r2_start:]) >= 5:
                word = word[:-5]
        elif word[-4:] == 'ical':
            if len(word[r1_start:]) >= 4:
                word = word[:-2]
        elif word[-4:] == 'ness':
            if len(word[r1_start:]) >= 4:
                word = word[:-4]
        elif word[-3:] == 'ful':
            if len(word[r1_start:]) >= 3:
                word = word[:-3]

        # Step 4
        for suffix in (
            'ement',

            'ance',

            'ence',

            'able',

            'ible',

            'ment',

            'ant',

            'ent',

            'ism',

            'ate',

            'iti',

            'ous',

            'ive',

            'ize',

            'al',

            'er',

            'ic',

        ):
            if word[-len(suffix) :] == suffix:
                if len(word[r2_start:]) >= len(suffix):
                    word = word[: -len(suffix)]
                break
        else:
            if word[-3:] == 'ion':
                if (
                    len(word[r2_start:]) >= 3

                    and len(word) >= 4

                    and word[-4] in tuple('st')

                ):
                    word = word[:-3]

        # Step 5
        if word[-1] == 'e':
            if len(word[r2_start:]) >= 1 or (
                len(word[r1_start:]) >= 1

                and not self._sb_ends_in_short_syllable(word[:-1])

            ):
                word = word[:-1]
        elif word[-1] == 'l':
            if len(word[r2_start:]) >= 1 and word[-2] == 'l':
                word = word[:-1]

        # Change 'Y' back to 'y' if it survived stemming
        for i in range(0, len(word)):
            if word[i] == 'Y':
                word = word[:i] + 'y' + word[i + 1 :]

        return word


def porter2(word, early_english=False):
    """Return the Porter2 (Snowball English) stem.

    This is a wrapper for :py:meth:`Porter2.stem`.

    Args:
        word (str): The word to stem
        early_english (bool): Set to True in order to remove -eth & -est (2nd &
            3rd person singular verbal agreement suffixes)

    Returns:
        str: Word stem

    Examples:
        >>> porter2('reading')
        'read'
        >>> porter2('suspension')
        'suspens'
        >>> porter2('elusiveness')
        'elus'

        >>> porter2('eateth', early_english=True)
        'eat'

    """
    return Porter2().stem(word, early_english)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
		0 ignored issues – show Coding Style Naming introduced 2018-11-10 01:42 UTC by Report Bug Copy Issue Report The name `_Porter2` does not conform to the module naming conventions (`(([a-z_][a-z0-9_]*)\|([A-Z][a-zA-Z0-9]+))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.stemmer._Porter2.
20
21		Porter2 (Snowball English) stemmer
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from unicodedata import normalize
32
33	1	from six import text_type
34	1	from six.moves import range
35
36	1	from ._Snowball import _Snowball
37
38	1	__all__ = ['Porter2', 'porter2']
39
40
41	1	class Porter2(_Snowball):
		0 ignored issues – show Unused Code introduced 2018-11-10 01:42 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
42		"""Porter2 (Snowball English) stemmer.
43
44		The Porter2 (Snowball English) stemmer is defined in :cite:`Porter:2002`.
45		"""
46
47	1	_doubles = {'bb', 'dd', 'ff', 'gg', 'mm', 'nn', 'pp', 'rr', 'tt'}
48	1	_li = {'c', 'd', 'e', 'g', 'h', 'k', 'm', 'n', 'r', 't'}
49
50		# R1 prefixes should be in order from longest to shortest to prevent
51		# masking
52	1	_r1_prefixes = ('commun', 'gener', 'arsen')
53	1	_exception1dict = { # special changes:
54		'skis': 'ski',
55		'skies': 'sky',
56		'dying': 'die',
57		'lying': 'lie',
58		'tying': 'tie',
59		# special -LY cases:
60		'idly': 'idl',
61		'gently': 'gentl',
62		'ugly': 'ugli',
63		'early': 'earli',
64		'only': 'onli',
65		'singly': 'singl',
66		}
67	1	_exception1set = {
68		'sky',
69		'news',
70		'howe',
71		'atlas',
72		'cosmos',
73		'bias',
74		'andes',
75		}
76	1	_exception2set = {
77		'inning',
78		'outing',
79		'canning',
80		'herring',
81		'earring',
82		'proceed',
83		'exceed',
84		'succeed',
85		}
86
87	1	def stem(self, word, early_english=False):
		0 ignored issues – show Bug introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'stem' method Loading history... best-practice introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Too many return statements (7/6) Loading history...
88		"""Return the Porter2 (Snowball English) stem.
89
90		Args:
91		word (str): The word to stem
92		early_english (bool): Set to True in order to remove -eth & -est
93		(2nd & 3rd person singular verbal agreement suffixes)
94
95		Returns:
96		str: Word stem
97
98		Examples:
99		>>> stmr = Porter2()
100		>>> stmr.stem('reading')
101		'read'
102		>>> stmr.stem('suspension')
103		'suspens'
104		>>> stmr.stem('elusiveness')
105		'elus'
106
107		>>> stmr.stem('eateth', early_english=True)
108		'eat'
109
110		"""
111		# lowercase, normalize, and compose
112	1	word = normalize('NFC', text_type(word.lower()))
113		# replace apostrophe-like characters with U+0027, per
114		# http://snowball.tartarus.org/texts/apostrophe.html
115	1	word = word.replace('’', '\'')
116	1	word = word.replace('’', '\'')
117
118		# Exceptions 1
119	1	if word in self._exception1dict:
120	1	return self._exception1dict[word]
121	1	elif word in self._exception1set:
122	1	return word
123
124		# Return word if stem is shorter than 3
125	1	if len(word) < 3:
126	1	return word
127
128		# Remove initial ', if present.
129	1	while word and word[0] == '\'':
130	1	word = word[1:]
131		# Return word if stem is shorter than 2
132	1	if len(word) < 2:
133	1	return word
134
135		# Re-map vocalic Y to y (Y will be C, y will be V)
136	1	if word[0] == 'y':
137	1	word = 'Y' + word[1:]
138	1	for i in range(1, len(word)):
139	1	if word[i] == 'y' and word[i - 1] in self._vowels:
140	1	word = word[:i] + 'Y' + word[i + 1 :]
141
142	1	r1_start = self._sb_r1(word, self._r1_prefixes)
143	1	r2_start = self._sb_r2(word, self._r1_prefixes)
144
145		# Step 0
146	1	if word[-3:] == '\'s\'':
147	1	word = word[:-3]
148	1	elif word[-2:] == '\'s':
149	1	word = word[:-2]
150	1	elif word[-1:] == '\'':
151	1	word = word[:-1]
152		# Return word if stem is shorter than 2
153	1	if len(word) < 3:
154	1	return word
155
156		# Step 1a
157	1	if word[-4:] == 'sses':
158	1	word = word[:-2]
159	1	elif word[-3:] in {'ied', 'ies'}:
160	1	if len(word) > 4:
161	1	word = word[:-2]
162		else:
163	1	word = word[:-1]
164	1	elif word[-2:] in {'us', 'ss'}:
165	1	pass
166	1	elif word[-1] == 's':
167	1	if self._sb_has_vowel(word[:-2]):
168	1	word = word[:-1]
169
170		# Exceptions 2
171	1	if word in self._exception2set:
172	1	return word
173
174		# Step 1b
175	1	step1b_flag = False
176	1	if word[-5:] == 'eedly':
177	1	if len(word[r1_start:]) >= 5:
178	1	word = word[:-3]
179	1	elif word[-5:] == 'ingly':
180	1	if self._sb_has_vowel(word[:-5]):
181	1	word = word[:-5]
182	1	step1b_flag = True
183	1	elif word[-4:] == 'edly':
184	1	if self._sb_has_vowel(word[:-4]):
185	1	word = word[:-4]
186	1	step1b_flag = True
187	1	elif word[-3:] == 'eed':
188	1	if len(word[r1_start:]) >= 3:
189	1	word = word[:-1]
190	1	elif word[-3:] == 'ing':
191	1	if self._sb_has_vowel(word[:-3]):
192	1	word = word[:-3]
193	1	step1b_flag = True
194	1	elif word[-2:] == 'ed':
195	1	if self._sb_has_vowel(word[:-2]):
196	1	word = word[:-2]
197	1	step1b_flag = True
198	1	elif early_english:
199	1	if word[-3:] == 'est':
200	1	if self._sb_has_vowel(word[:-3]):
201	1	word = word[:-3]
202	1	step1b_flag = True
203	1	elif word[-3:] == 'eth':
204	1	if self._sb_has_vowel(word[:-3]):
205	1	word = word[:-3]
206	1	step1b_flag = True
207
208	1	if step1b_flag:
209	1	if word[-2:] in {'at', 'bl', 'iz'}:
210	1	word += 'e'
211	1	elif word[-2:] in self._doubles:
212	1	word = word[:-1]
213	1	elif self._sb_short_word(word, self._r1_prefixes):
214	1	word += 'e'
215
216		# Step 1c
217	1	if (
218		len(word) > 2
		0 ignored issues – show Coding Style introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
219		and word[-1] in {'Y', 'y'}
		0 ignored issues – show Coding Style introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
220		and word[-2] not in self._vowels
		0 ignored issues – show Coding Style introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
221		):
222	1	word = word[:-1] + 'i'
223
224		# Step 2
225	1	if word[-2] == 'a':
226	1	if word[-7:] == 'ational':
227	1	if len(word[r1_start:]) >= 7:
228	1	word = word[:-5] + 'e'
229	1	elif word[-6:] == 'tional':
230	1	if len(word[r1_start:]) >= 6:
231	1	word = word[:-2]
232	1	elif word[-2] == 'c':
233	1	if word[-4:] in {'enci', 'anci'}:
234	1	if len(word[r1_start:]) >= 4:
235	1	word = word[:-1] + 'e'
236	1	elif word[-2] == 'e':
237	1	if word[-4:] == 'izer':
238	1	if len(word[r1_start:]) >= 4:
239	1	word = word[:-1]
240	1	elif word[-2] == 'g':
241	1	if word[-3:] == 'ogi':
242	1	if (
243		r1_start >= 1
		0 ignored issues – show Coding Style introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
244		and len(word[r1_start:]) >= 3
		0 ignored issues – show Coding Style introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
245		and word[-4] == 'l'
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
246		):
247	1	word = word[:-1]
248	1	elif word[-2] == 'l':
249	1	if word[-6:] == 'lessli':
250	1	if len(word[r1_start:]) >= 6:
251	1	word = word[:-2]
252	1	elif word[-5:] in {'entli', 'fulli', 'ousli'}:
253	1	if len(word[r1_start:]) >= 5:
254	1	word = word[:-2]
255	1	elif word[-4:] == 'abli':
256	1	if len(word[r1_start:]) >= 4:
257	1	word = word[:-1] + 'e'
258	1	elif word[-4:] == 'alli':
259	1	if len(word[r1_start:]) >= 4:
260	1	word = word[:-2]
261	1	elif word[-3:] == 'bli':
262	1	if len(word[r1_start:]) >= 3:
263	1	word = word[:-1] + 'e'
264	1	elif word[-2:] == 'li':
265	1	if (
266		r1_start >= 1
		0 ignored issues – show Coding Style introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
267		and len(word[r1_start:]) >= 2
		0 ignored issues – show Coding Style introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
268		and word[-3] in self._li
		0 ignored issues – show Coding Style introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
269		):
270	1	word = word[:-2]
271	1	elif word[-2] == 'o':
272	1	if word[-7:] == 'ization':
273	1	if len(word[r1_start:]) >= 7:
274	1	word = word[:-5] + 'e'
275	1	elif word[-5:] == 'ation':
276	1	if len(word[r1_start:]) >= 5:
277	1	word = word[:-3] + 'e'
278	1	elif word[-4:] == 'ator':
279	1	if len(word[r1_start:]) >= 4:
280	1	word = word[:-2] + 'e'
281	1	elif word[-2] == 's':
282	1	if word[-7:] in {'fulness', 'ousness', 'iveness'}:
283	1	if len(word[r1_start:]) >= 7:
284	1	word = word[:-4]
285	1	elif word[-5:] == 'alism':
286	1	if len(word[r1_start:]) >= 5:
287	1	word = word[:-3]
288	1	elif word[-2] == 't':
289	1	if word[-6:] == 'biliti':
290	1	if len(word[r1_start:]) >= 6:
291	1	word = word[:-5] + 'le'
292	1	elif word[-5:] == 'aliti':
293	1	if len(word[r1_start:]) >= 5:
294	1	word = word[:-3]
295	1	elif word[-5:] == 'iviti':
296	1	if len(word[r1_start:]) >= 5:
297	1	word = word[:-3] + 'e'
298
299		# Step 3
300	1	if word[-7:] == 'ational':
301	1	if len(word[r1_start:]) >= 7:
302	1	word = word[:-5] + 'e'
303	1	elif word[-6:] == 'tional':
304	1	if len(word[r1_start:]) >= 6:
305	1	word = word[:-2]
306	1	elif word[-5:] in {'alize', 'icate', 'iciti'}:
307	1	if len(word[r1_start:]) >= 5:
308	1	word = word[:-3]
309	1	elif word[-5:] == 'ative':
310	1	if len(word[r2_start:]) >= 5:
311	1	word = word[:-5]
312	1	elif word[-4:] == 'ical':
313	1	if len(word[r1_start:]) >= 4:
314	1	word = word[:-2]
315	1	elif word[-4:] == 'ness':
316	1	if len(word[r1_start:]) >= 4:
317	1	word = word[:-4]
318	1	elif word[-3:] == 'ful':
319	1	if len(word[r1_start:]) >= 3:
320	1	word = word[:-3]
321
322		# Step 4
323	1	for suffix in (
324		'ement',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
325		'ance',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
326		'ence',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
327		'able',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
328		'ible',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
329		'ment',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
330		'ant',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
331		'ent',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
332		'ism',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
333		'ate',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
334		'iti',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
335		'ous',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
336		'ive',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
337		'ize',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
338		'al',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
339		'er',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
340		'ic',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
341		):
342	1	if word[-len(suffix) :] == suffix:
343	1	if len(word[r2_start:]) >= len(suffix):
344	1	word = word[: -len(suffix)]
345	1	break
346		else:
347	1	if word[-3:] == 'ion':
348	1	if (
349		len(word[r2_start:]) >= 3
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
350		and len(word) >= 4
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
351		and word[-4] in tuple('st')
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
352		):
353	1	word = word[:-3]
354
355		# Step 5
356	1	if word[-1] == 'e':
357	1	if len(word[r2_start:]) >= 1 or (
358		len(word[r1_start:]) >= 1
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
359		and not self._sb_ends_in_short_syllable(word[:-1])
		0 ignored issues – show Coding Style introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
360		):
361	1	word = word[:-1]
362	1	elif word[-1] == 'l':
363	1	if len(word[r2_start:]) >= 1 and word[-2] == 'l':
364	1	word = word[:-1]
365
366		# Change 'Y' back to 'y' if it survived stemming
367	1	for i in range(0, len(word)):
368	1	if word[i] == 'Y':
369	1	word = word[:i] + 'y' + word[i + 1 :]
370
371	1	return word
372
373
374	1	def porter2(word, early_english=False):
375		"""Return the Porter2 (Snowball English) stem.
376
377		This is a wrapper for :py:meth:`Porter2.stem`.
378
379		Args:
380		word (str): The word to stem
381		early_english (bool): Set to True in order to remove -eth & -est (2nd &
382		3rd person singular verbal agreement suffixes)
383
384		Returns:
385		str: Word stem
386
387		Examples:
388		>>> porter2('reading')
389		'read'
390		>>> porter2('suspension')
391		'suspens'
392		>>> porter2('elusiveness')
393		'elus'
394
395		>>> porter2('eateth', early_english=True)
396		'eat'
397
398		"""
399	1	return Porter2().stem(word, early_english)
400
401
402		if __name__ == '__main__':
403		import doctest
404
405		doctest.testmod()
406

chrislit / abydos

Pull Request — master (#141)

abydos.stemmer._Porter2.Porter2.stem() F

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like