abydos.stemmer._Porter - Code Metrics - Inspection of "0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#141)

by Chris

created 2018-11-10 01:31 UTC

abydos.stemmer._Porter F

↳ Parent: Project

Complexity

Total Complexity

126

Size/Duplication

Total Lines	384
Duplicated Lines	0 %

Test Coverage

Coverage

100%

Importance

Changes

Metric	Value
eloc	220
dl	0
loc	384
ccs	202
cts	202
cp	1
rs	2
c	0
b	0
f	0
wmc	126

5 Methods

Rating	Name	Size	Complexity
A	Porter._ends_in_cvc()	16	1
F	Porter.stem()	227	116
A	Porter._m_degree()	22	4
A	Porter._ends_in_doubled_cons()	15	1
A	Porter._has_vowel()	15	3

1 Function

Rating	Name	Duplication	Size	Complexity
A	porter()	0	26	1

How to fix Complexity

# -*- coding: utf-8 -*-


# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.stemmer._Porter.

Porter stemmer
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from unicodedata import normalize

from six import text_type
from six.moves import range

from ._Stemmer import _Stemmer

__all__ = ['Porter', 'porter']


class Porter(_Stemmer):

    """Porter stemmer.

    The Porter stemmer is described in :cite:`Porter:1980`.
    """

    _vowels = {'a', 'e', 'i', 'o', 'u', 'y'}

    def _m_degree(self, term):
        """Return Porter helper function _m_degree value.

        m-degree is equal to the number of V to C transitions

        Args:
            term (str): The word for which to calculate the m-degree

        Returns:
            int: The m-degree as defined in the Porter stemmer definition

        """
        mdeg = 0
        last_was_vowel = False
        for letter in term:
            if letter in self._vowels:
                last_was_vowel = True
            else:
                if last_was_vowel:
                    mdeg += 1
                last_was_vowel = False
        return mdeg

    def _has_vowel(self, term):
        """Return Porter helper function _has_vowel value.

        Args:
            term (str): The word to scan for vowels

        Returns:
            bool: True iff a vowel exists in the term (as defined in the Porter
                stemmer definition)

        """
        for letter in term:
            if letter in self._vowels:
                return True
        return False

    def _ends_in_doubled_cons(self, term):
        """Return Porter helper function _ends_in_doubled_cons value.

        Args:
            term (str): The word to check for a final doubled consonant

        Returns:
            bool: True iff the stem ends in a doubled consonant (as defined in
                the Porter stemmer definition)

        """
        return (
            len(term) > 1
            and term[-1] not in self._vowels
            and term[-2] == term[-1]
        )

    def _ends_in_cvc(self, term):
        """Return Porter helper function _ends_in_cvc value.

        Args:
            term (str): The word to scan for cvc

        Returns:
            bool: True iff the stem ends in cvc (as defined in the Porter
                stemmer definition)

        """
        return len(term) > 2 and (
            term[-1] not in self._vowels
            and term[-2] in self._vowels
            and term[-3] not in self._vowels
            and term[-1] not in tuple('wxY')
        )

    def stem(self, word, early_english=False):

        """Return Porter stem.

        Args:
            word (str): The word to stem
            early_english (bool): Set to True in order to remove -eth & -est
                (2nd & 3rd person singular verbal agreement suffixes)

        Returns:
            str: Word stem

        Examples:
            >>> stmr = Porter()
            >>> stmr.stem('reading')
            'read'
            >>> stmr.stem('suspension')
            'suspens'
            >>> stmr.stem('elusiveness')
            'elus'

            >>> stmr.stem('eateth', early_english=True)
            'eat'

        """
        # lowercase, normalize, and compose
        word = normalize('NFC', text_type(word.lower()))

        # Return word if stem is shorter than 2
        if len(word) < 3:
            return word

        # Re-map consonantal y to Y (Y will be C, y will be V)
        if word[0] == 'y':
            word = 'Y' + word[1:]
        for i in range(1, len(word)):
            if word[i] == 'y' and word[i - 1] in self._vowels:
                word = word[:i] + 'Y' + word[i + 1 :]

        # Step 1a
        if word[-1] == 's':
            if word[-4:] == 'sses':
                word = word[:-2]
            elif word[-3:] == 'ies':
                word = word[:-2]
            elif word[-2:] == 'ss':
                pass
            else:
                word = word[:-1]

        # Step 1b
        step1b_flag = False
        if word[-3:] == 'eed':
            if self._m_degree(word[:-3]) > 0:
                word = word[:-1]
        elif word[-2:] == 'ed':
            if self._has_vowel(word[:-2]):
                word = word[:-2]
                step1b_flag = True
        elif word[-3:] == 'ing':
            if self._has_vowel(word[:-3]):
                word = word[:-3]
                step1b_flag = True
        elif early_english:
            if word[-3:] == 'est':
                if self._has_vowel(word[:-3]):
                    word = word[:-3]
                    step1b_flag = True
            elif word[-3:] == 'eth':
                if self._has_vowel(word[:-3]):
                    word = word[:-3]
                    step1b_flag = True

        if step1b_flag:
            if word[-2:] in {'at', 'bl', 'iz'}:
                word += 'e'
            elif self._ends_in_doubled_cons(word) and word[-1] not in {
                'l',

                's',

                'z',

            }:
                word = word[:-1]
            elif self._m_degree(word) == 1 and self._ends_in_cvc(word):
                word += 'e'

        # Step 1c
        if word[-1] in {'Y', 'y'} and self._has_vowel(word[:-1]):
            word = word[:-1] + 'i'

        # Step 2
        if len(word) > 1:
            if word[-2] == 'a':
                if word[-7:] == 'ational':
                    if self._m_degree(word[:-7]) > 0:
                        word = word[:-5] + 'e'
                elif word[-6:] == 'tional':
                    if self._m_degree(word[:-6]) > 0:
                        word = word[:-2]
            elif word[-2] == 'c':
                if word[-4:] in {'enci', 'anci'}:
                    if self._m_degree(word[:-4]) > 0:
                        word = word[:-1] + 'e'
            elif word[-2] == 'e':
                if word[-4:] == 'izer':
                    if self._m_degree(word[:-4]) > 0:
                        word = word[:-1]
            elif word[-2] == 'g':
                if word[-4:] == 'logi':
                    if self._m_degree(word[:-4]) > 0:
                        word = word[:-1]
            elif word[-2] == 'l':
                if word[-3:] == 'bli':
                    if self._m_degree(word[:-3]) > 0:
                        word = word[:-1] + 'e'
                elif word[-4:] == 'alli':
                    if self._m_degree(word[:-4]) > 0:
                        word = word[:-2]
                elif word[-5:] == 'entli':
                    if self._m_degree(word[:-5]) > 0:
                        word = word[:-2]
                elif word[-3:] == 'eli':
                    if self._m_degree(word[:-3]) > 0:
                        word = word[:-2]
                elif word[-5:] == 'ousli':
                    if self._m_degree(word[:-5]) > 0:
                        word = word[:-2]
            elif word[-2] == 'o':
                if word[-7:] == 'ization':
                    if self._m_degree(word[:-7]) > 0:
                        word = word[:-5] + 'e'
                elif word[-5:] == 'ation':
                    if self._m_degree(word[:-5]) > 0:
                        word = word[:-3] + 'e'
                elif word[-4:] == 'ator':
                    if self._m_degree(word[:-4]) > 0:
                        word = word[:-2] + 'e'
            elif word[-2] == 's':
                if word[-5:] == 'alism':
                    if self._m_degree(word[:-5]) > 0:
                        word = word[:-3]
                elif word[-7:] in {'iveness', 'fulness', 'ousness'}:
                    if self._m_degree(word[:-7]) > 0:
                        word = word[:-4]
            elif word[-2] == 't':
                if word[-5:] == 'aliti':
                    if self._m_degree(word[:-5]) > 0:
                        word = word[:-3]
                elif word[-5:] == 'iviti':
                    if self._m_degree(word[:-5]) > 0:
                        word = word[:-3] + 'e'
                elif word[-6:] == 'biliti':
                    if self._m_degree(word[:-6]) > 0:
                        word = word[:-5] + 'le'

        # Step 3
        if word[-5:] in 'icate':
            if self._m_degree(word[:-5]) > 0:
                word = word[:-3]
        elif word[-5:] == 'ative':
            if self._m_degree(word[:-5]) > 0:
                word = word[:-5]
        elif word[-5:] in {'alize', 'iciti'}:
            if self._m_degree(word[:-5]) > 0:
                word = word[:-3]
        elif word[-4:] == 'ical':
            if self._m_degree(word[:-4]) > 0:
                word = word[:-2]
        elif word[-3:] == 'ful':
            if self._m_degree(word[:-3]) > 0:
                word = word[:-3]
        elif word[-4:] == 'ness':
            if self._m_degree(word[:-4]) > 0:
                word = word[:-4]

        # Step 4
        if word[-2:] == 'al':
            if self._m_degree(word[:-2]) > 1:
                word = word[:-2]
        elif word[-4:] in {'ance', 'ence'}:
            if self._m_degree(word[:-4]) > 1:
                word = word[:-4]
        elif word[-2:] in {'er', 'ic'}:
            if self._m_degree(word[:-2]) > 1:
                word = word[:-2]
        elif word[-4:] in {'able', 'ible'}:
            if self._m_degree(word[:-4]) > 1:
                word = word[:-4]
        elif word[-3:] == 'ant':
            if self._m_degree(word[:-3]) > 1:
                word = word[:-3]
        elif word[-5:] == 'ement':
            if self._m_degree(word[:-5]) > 1:
                word = word[:-5]
        elif word[-4:] == 'ment':
            if self._m_degree(word[:-4]) > 1:
                word = word[:-4]
        elif word[-3:] == 'ent':
            if self._m_degree(word[:-3]) > 1:
                word = word[:-3]
        elif word[-4:] in {'sion', 'tion'}:
            if self._m_degree(word[:-3]) > 1:
                word = word[:-3]
        elif word[-2:] == 'ou':
            if self._m_degree(word[:-2]) > 1:
                word = word[:-2]
        elif word[-3:] in {'ism', 'ate', 'iti', 'ous', 'ive', 'ize'}:
            if self._m_degree(word[:-3]) > 1:
                word = word[:-3]

        # Step 5a
        if word[-1] == 'e':
            if self._m_degree(word[:-1]) > 1:
                word = word[:-1]
            elif self._m_degree(word[:-1]) == 1 and not self._ends_in_cvc(
                word[:-1]

            ):
                word = word[:-1]

        # Step 5b
        if word[-2:] == 'll' and self._m_degree(word) > 1:
            word = word[:-1]

        # Change 'Y' back to 'y' if it survived stemming
        for i in range(len(word)):

            if word[i] == 'Y':
                word = word[:i] + 'y' + word[i + 1 :]

        return word


def porter(word, early_english=False):
    """Return Porter stem.

    This is a wrapper for :py:meth:`Porter.stem`.

    Args:
        word (str): The word to stem
        early_english (bool): Set to True in order to remove -eth & -est
                (2nd & 3rd person singular verbal agreement suffixes)

    Returns:
        str: Word stem

    Examples:
        >>> porter('reading')
        'read'
        >>> porter('suspension')
        'suspens'
        >>> porter('elusiveness')
        'elus'

        >>> porter('eateth', early_english=True)
        'eat'

    """
    return Porter().stem(word, early_english)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
		0 ignored issues – show Coding Style Naming introduced 2018-11-10 01:42 UTC by Report Bug Copy Issue Report The name `_Porter` does not conform to the module naming conventions (`(([a-z_][a-z0-9_]*)\|([A-Z][a-zA-Z0-9]+))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.stemmer._Porter.
20
21		Porter stemmer
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from unicodedata import normalize
32
33	1	from six import text_type
34	1	from six.moves import range
35
36	1	from ._Stemmer import _Stemmer
37
38	1	__all__ = ['Porter', 'porter']
39
40
41	1	class Porter(_Stemmer):
		0 ignored issues – show Unused Code introduced 2018-11-10 01:42 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
42		"""Porter stemmer.
43
44		The Porter stemmer is described in :cite:`Porter:1980`.
45		"""
46
47	1	_vowels = {'a', 'e', 'i', 'o', 'u', 'y'}
48
49	1	def _m_degree(self, term):
50		"""Return Porter helper function _m_degree value.
51
52		m-degree is equal to the number of V to C transitions
53
54		Args:
55		term (str): The word for which to calculate the m-degree
56
57		Returns:
58		int: The m-degree as defined in the Porter stemmer definition
59
60		"""
61	1	mdeg = 0
62	1	last_was_vowel = False
63	1	for letter in term:
64	1	if letter in self._vowels:
65	1	last_was_vowel = True
66		else:
67	1	if last_was_vowel:
68	1	mdeg += 1
69	1	last_was_vowel = False
70	1	return mdeg
71
72	1	def _has_vowel(self, term):
73		"""Return Porter helper function _has_vowel value.
74
75		Args:
76		term (str): The word to scan for vowels
77
78		Returns:
79		bool: True iff a vowel exists in the term (as defined in the Porter
80		stemmer definition)
81
82		"""
83	1	for letter in term:
84	1	if letter in self._vowels:
85	1	return True
86	1	return False
87
88	1	def _ends_in_doubled_cons(self, term):
89		"""Return Porter helper function _ends_in_doubled_cons value.
90
91		Args:
92		term (str): The word to check for a final doubled consonant
93
94		Returns:
95		bool: True iff the stem ends in a doubled consonant (as defined in
96		the Porter stemmer definition)
97
98		"""
99	1	return (
100		len(term) > 1
101		and term[-1] not in self._vowels
102		and term[-2] == term[-1]
103		)
104
105	1	def _ends_in_cvc(self, term):
106		"""Return Porter helper function _ends_in_cvc value.
107
108		Args:
109		term (str): The word to scan for cvc
110
111		Returns:
112		bool: True iff the stem ends in cvc (as defined in the Porter
113		stemmer definition)
114
115		"""
116	1	return len(term) > 2 and (
117		term[-1] not in self._vowels
118		and term[-2] in self._vowels
119		and term[-3] not in self._vowels
120		and term[-1] not in tuple('wxY')
121		)
122
123	1	def stem(self, word, early_english=False):
		0 ignored issues – show Bug introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'stem' method Loading history...
124		"""Return Porter stem.
125
126		Args:
127		word (str): The word to stem
128		early_english (bool): Set to True in order to remove -eth & -est
129		(2nd & 3rd person singular verbal agreement suffixes)
130
131		Returns:
132		str: Word stem
133
134		Examples:
135		>>> stmr = Porter()
136		>>> stmr.stem('reading')
137		'read'
138		>>> stmr.stem('suspension')
139		'suspens'
140		>>> stmr.stem('elusiveness')
141		'elus'
142
143		>>> stmr.stem('eateth', early_english=True)
144		'eat'
145
146		"""
147		# lowercase, normalize, and compose
148	1	word = normalize('NFC', text_type(word.lower()))
149
150		# Return word if stem is shorter than 2
151	1	if len(word) < 3:
152	1	return word
153
154		# Re-map consonantal y to Y (Y will be C, y will be V)
155	1	if word[0] == 'y':
156	1	word = 'Y' + word[1:]
157	1	for i in range(1, len(word)):
158	1	if word[i] == 'y' and word[i - 1] in self._vowels:
159	1	word = word[:i] + 'Y' + word[i + 1 :]
160
161		# Step 1a
162	1	if word[-1] == 's':
163	1	if word[-4:] == 'sses':
164	1	word = word[:-2]
165	1	elif word[-3:] == 'ies':
166	1	word = word[:-2]
167	1	elif word[-2:] == 'ss':
168	1	pass
169		else:
170	1	word = word[:-1]
171
172		# Step 1b
173	1	step1b_flag = False
174	1	if word[-3:] == 'eed':
175	1	if self._m_degree(word[:-3]) > 0:
176	1	word = word[:-1]
177	1	elif word[-2:] == 'ed':
178	1	if self._has_vowel(word[:-2]):
179	1	word = word[:-2]
180	1	step1b_flag = True
181	1	elif word[-3:] == 'ing':
182	1	if self._has_vowel(word[:-3]):
183	1	word = word[:-3]
184	1	step1b_flag = True
185	1	elif early_english:
186	1	if word[-3:] == 'est':
187	1	if self._has_vowel(word[:-3]):
188	1	word = word[:-3]
189	1	step1b_flag = True
190	1	elif word[-3:] == 'eth':
191	1	if self._has_vowel(word[:-3]):
192	1	word = word[:-3]
193	1	step1b_flag = True
194
195	1	if step1b_flag:
196	1	if word[-2:] in {'at', 'bl', 'iz'}:
197	1	word += 'e'
198	1	elif self._ends_in_doubled_cons(word) and word[-1] not in {
199		'l',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
200		's',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
201		'z',
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
202		}:
203	1	word = word[:-1]
204	1	elif self._m_degree(word) == 1 and self._ends_in_cvc(word):
205	1	word += 'e'
206
207		# Step 1c
208	1	if word[-1] in {'Y', 'y'} and self._has_vowel(word[:-1]):
209	1	word = word[:-1] + 'i'
210
211		# Step 2
212	1	if len(word) > 1:
213	1	if word[-2] == 'a':
214	1	if word[-7:] == 'ational':
215	1	if self._m_degree(word[:-7]) > 0:
216	1	word = word[:-5] + 'e'
217	1	elif word[-6:] == 'tional':
218	1	if self._m_degree(word[:-6]) > 0:
219	1	word = word[:-2]
220	1	elif word[-2] == 'c':
221	1	if word[-4:] in {'enci', 'anci'}:
222	1	if self._m_degree(word[:-4]) > 0:
223	1	word = word[:-1] + 'e'
224	1	elif word[-2] == 'e':
225	1	if word[-4:] == 'izer':
226	1	if self._m_degree(word[:-4]) > 0:
227	1	word = word[:-1]
228	1	elif word[-2] == 'g':
229	1	if word[-4:] == 'logi':
230	1	if self._m_degree(word[:-4]) > 0:
231	1	word = word[:-1]
232	1	elif word[-2] == 'l':
233	1	if word[-3:] == 'bli':
234	1	if self._m_degree(word[:-3]) > 0:
235	1	word = word[:-1] + 'e'
236	1	elif word[-4:] == 'alli':
237	1	if self._m_degree(word[:-4]) > 0:
238	1	word = word[:-2]
239	1	elif word[-5:] == 'entli':
240	1	if self._m_degree(word[:-5]) > 0:
241	1	word = word[:-2]
242	1	elif word[-3:] == 'eli':
243	1	if self._m_degree(word[:-3]) > 0:
244	1	word = word[:-2]
245	1	elif word[-5:] == 'ousli':
246	1	if self._m_degree(word[:-5]) > 0:
247	1	word = word[:-2]
248	1	elif word[-2] == 'o':
249	1	if word[-7:] == 'ization':
250	1	if self._m_degree(word[:-7]) > 0:
251	1	word = word[:-5] + 'e'
252	1	elif word[-5:] == 'ation':
253	1	if self._m_degree(word[:-5]) > 0:
254	1	word = word[:-3] + 'e'
255	1	elif word[-4:] == 'ator':
256	1	if self._m_degree(word[:-4]) > 0:
257	1	word = word[:-2] + 'e'
258	1	elif word[-2] == 's':
259	1	if word[-5:] == 'alism':
260	1	if self._m_degree(word[:-5]) > 0:
261	1	word = word[:-3]
262	1	elif word[-7:] in {'iveness', 'fulness', 'ousness'}:
263	1	if self._m_degree(word[:-7]) > 0:
264	1	word = word[:-4]
265	1	elif word[-2] == 't':
266	1	if word[-5:] == 'aliti':
267	1	if self._m_degree(word[:-5]) > 0:
268	1	word = word[:-3]
269	1	elif word[-5:] == 'iviti':
270	1	if self._m_degree(word[:-5]) > 0:
271	1	word = word[:-3] + 'e'
272	1	elif word[-6:] == 'biliti':
273	1	if self._m_degree(word[:-6]) > 0:
274	1	word = word[:-5] + 'le'
275
276		# Step 3
277	1	if word[-5:] in 'icate':
278	1	if self._m_degree(word[:-5]) > 0:
279	1	word = word[:-3]
280	1	elif word[-5:] == 'ative':
281	1	if self._m_degree(word[:-5]) > 0:
282	1	word = word[:-5]
283	1	elif word[-5:] in {'alize', 'iciti'}:
284	1	if self._m_degree(word[:-5]) > 0:
285	1	word = word[:-3]
286	1	elif word[-4:] == 'ical':
287	1	if self._m_degree(word[:-4]) > 0:
288	1	word = word[:-2]
289	1	elif word[-3:] == 'ful':
290	1	if self._m_degree(word[:-3]) > 0:
291	1	word = word[:-3]
292	1	elif word[-4:] == 'ness':
293	1	if self._m_degree(word[:-4]) > 0:
294	1	word = word[:-4]
295
296		# Step 4
297	1	if word[-2:] == 'al':
298	1	if self._m_degree(word[:-2]) > 1:
299	1	word = word[:-2]
300	1	elif word[-4:] in {'ance', 'ence'}:
301	1	if self._m_degree(word[:-4]) > 1:
302	1	word = word[:-4]
303	1	elif word[-2:] in {'er', 'ic'}:
304	1	if self._m_degree(word[:-2]) > 1:
305	1	word = word[:-2]
306	1	elif word[-4:] in {'able', 'ible'}:
307	1	if self._m_degree(word[:-4]) > 1:
308	1	word = word[:-4]
309	1	elif word[-3:] == 'ant':
310	1	if self._m_degree(word[:-3]) > 1:
311	1	word = word[:-3]
312	1	elif word[-5:] == 'ement':
313	1	if self._m_degree(word[:-5]) > 1:
314	1	word = word[:-5]
315	1	elif word[-4:] == 'ment':
316	1	if self._m_degree(word[:-4]) > 1:
317	1	word = word[:-4]
318	1	elif word[-3:] == 'ent':
319	1	if self._m_degree(word[:-3]) > 1:
320	1	word = word[:-3]
321	1	elif word[-4:] in {'sion', 'tion'}:
322	1	if self._m_degree(word[:-3]) > 1:
323	1	word = word[:-3]
324	1	elif word[-2:] == 'ou':
325	1	if self._m_degree(word[:-2]) > 1:
326	1	word = word[:-2]
327	1	elif word[-3:] in {'ism', 'ate', 'iti', 'ous', 'ive', 'ize'}:
328	1	if self._m_degree(word[:-3]) > 1:
329	1	word = word[:-3]
330
331		# Step 5a
332	1	if word[-1] == 'e':
333	1	if self._m_degree(word[:-1]) > 1:
334	1	word = word[:-1]
335	1	elif self._m_degree(word[:-1]) == 1 and not self._ends_in_cvc(
336		word[:-1]
		0 ignored issues – show Coding Style introduced 2018-11-05 04:21 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
337		):
338	1	word = word[:-1]
339
340		# Step 5b
341	1	if word[-2:] == 'll' and self._m_degree(word) > 1:
342	1	word = word[:-1]
343
344		# Change 'Y' back to 'y' if it survived stemming
345	1	for i in range(len(word)):
		0 ignored issues – show unused-code introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Consider using enumerate instead of iterating with range and len Loading history...
346	1	if word[i] == 'Y':
347	1	word = word[:i] + 'y' + word[i + 1 :]
348
349	1	return word
350
351
352	1	def porter(word, early_english=False):
353		"""Return Porter stem.
354
355		This is a wrapper for :py:meth:`Porter.stem`.
356
357		Args:
358		word (str): The word to stem
359		early_english (bool): Set to True in order to remove -eth & -est
360		(2nd & 3rd person singular verbal agreement suffixes)
361
362		Returns:
363		str: Word stem
364
365		Examples:
366		>>> porter('reading')
367		'read'
368		>>> porter('suspension')
369		'suspens'
370		>>> porter('elusiveness')
371		'elus'
372
373		>>> porter('eateth', early_english=True)
374		'eat'
375
376		"""
377	1	return Porter().stem(word, early_english)
378
379
380		if __name__ == '__main__':
381		import doctest
382
383		doctest.testmod()
384

chrislit / abydos

Pull Request — master (#141)

abydos.stemmer._Porter F

Complexity

Size/Duplication

Test Coverage

Importance

5 Methods

1 Function

How to fix Complexity

Complexity

Duplication Side-by-Side

Filter issues like