Completed
Pull Request — master (#141)
by Chris
11:04
created

abydos.stemmer._snowball._Snowball._sb_has_vowel()   A

Complexity

Conditions 3

Size

Total Lines 15
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 3

Importance

Changes 0
Metric Value
cc 3
eloc 5
nop 2
dl 0
loc 15
ccs 5
cts 5
cp 1
crap 3
rs 10
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.stemmer._snowball.
20
21
Snowball Stemmer base class
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from six.moves import range
32
33 1
from ._stemmer import _Stemmer
34
35
36 1
class _Snowball(_Stemmer):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
37
    """Snowball stemmer base class."""
38
39 1
    _vowels = set('aeiouy')
40 1
    _codanonvowels = set('\'bcdfghjklmnpqrstvz')
41
42 1
    def _sb_r1(self, term, r1_prefixes=None):
43
        """Return the R1 region, as defined in the Porter2 specification.
44
45
        Args:
46
            term (str): The term to examine
47
            r1_prefixes (set): Prefixes to consider
48
49
        Returns:
50
            int: Length of the R1 region
51
52
        """
53 1
        vowel_found = False
54 1
        if hasattr(r1_prefixes, '__iter__'):
55 1
            for prefix in r1_prefixes:
56 1
                if term[: len(prefix)] == prefix:
57 1
                    return len(prefix)
58
59 1
        for i in range(len(term)):
0 ignored issues
show
unused-code introduced by
Consider using enumerate instead of iterating with range and len
Loading history...
60 1
            if not vowel_found and term[i] in self._vowels:
61 1
                vowel_found = True
62 1
            elif vowel_found and term[i] not in self._vowels:
63 1
                return i + 1
64 1
        return len(term)
65
66 1
    def _sb_r2(self, term, r1_prefixes=None):
67
        """Return the R2 region, as defined in the Porter2 specification.
68
69
        Args:
70
            term (str): The term to examine
71
            r1_prefixes (set): Prefixes to consider
72
73
        Returns:
74
            int: Length of the R1 region
75
76
        """
77 1
        r1_start = self._sb_r1(term, r1_prefixes)
78 1
        return r1_start + self._sb_r1(term[r1_start:])
79
80 1
    def _sb_ends_in_short_syllable(self, term):
81
        """Return True iff term ends in a short syllable.
82
83
        (...according to the Porter2 specification.)
84
85
        NB: This is akin to the CVC test from the Porter stemmer. The
86
        description is unfortunately poor/ambiguous.
87
88
        Args:
89
            term (str): The term to examine
90
91
        Returns:
92
            bool: True iff term ends in a short syllable
93
94
        """
95 1
        if not term:
96 1
            return False
97 1
        if len(term) == 2:
98 1
            if term[-2] in self._vowels and term[-1] not in self._vowels:
99 1
                return True
100 1
        elif len(term) >= 3:
101 1
            if (
102
                term[-3] not in self._vowels
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
103
                and term[-2] in self._vowels
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
104
                and term[-1] in self._codanonvowels
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
105
            ):
106 1
                return True
107 1
        return False
108
109 1
    def _sb_short_word(self, term, r1_prefixes=None):
110
        """Return True iff term is a short word.
111
112
        (...according to the Porter2 specification.)
113
114
        Args:
115
            term (str): The term to examine
116
            r1_prefixes (set): Prefixes to consider
117
118
        Returns:
119
            bool: True iff term is a short word
120
121
        """
122 1
        if self._sb_r1(term, r1_prefixes) == len(
123
            term
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
124
        ) and self._sb_ends_in_short_syllable(term):
125 1
            return True
126 1
        return False
127
128 1
    def _sb_has_vowel(self, term):
129
        """Return Porter helper function _sb_has_vowel value.
130
131
        Args:
132
            term (str): The term to examine
133
134
        Returns:
135
            bool: True iff a vowel exists in the term (as defined in the Porter
136
                stemmer definition)
137
138
        """
139 1
        for letter in term:
140 1
            if letter in self._vowels:
141 1
                return True
142 1
        return False
143
144
145
if __name__ == '__main__':
146
    import doctest
147
148
    doctest.testmod()
149