score_length()   F
last analyzed

Complexity

Conditions 11

Size

Total Lines 28

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 0 Features 1
Metric Value
cc 11
c 3
b 0
f 1
dl 0
loc 28
rs 3.1764

How to fix   Complexity   

Complexity

Complex classes like score_length() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
"""Provides various scoring methods for word strength."""
2
3
import re
4
5
import fuzzy
0 ignored issues
show
Configuration introduced by
The import fuzzy could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
6
7
dmeta = fuzzy.DMetaphone()
8
soundex = fuzzy.Soundex(4)
9
10
11
def score_dmetaphone(words):
12
    """Score words using the double metaphone algorithm.
13
14
    :param words (list): the list of words.
15
    :rtype scores (list): the scored words
16
    """
17
    scores = []
18
    for word in words:
19
        res, output = dmeta(word)
20
        scores.append('{0}:{1}:{2}'.format(word, res, output))
21
    return scores
22
23
24
def score_soundex(words):
25
    """Score words using the soundex algorithm.
26
27
    :param words (list): the list of words.
28
    :rtype scores (list): the scored words
29
    """
30
    return ['{}: {}'.format(w.lower(), soundex(w)) for w in words]
31
32
33
def score_nysiis(words):
34
    """Score words using the nysiis algorithm.
35
36
    :param words (list): the list of words.
37
    :rtype scores (list): the scored words
38
    """
39
    return ['{}: {}'.format(w.lower(), fuzzy.nysiis(w)) for w in words]
40
41
42
def score_length(word):
43
    """Return a score, 1-5, of the length of the word.
44
45
    Really long, or really short words get a lower score.
46
    There is no hard science, but popular opinion suggests
47
    that a word somewhere between 8-15 letters is optimal.
48
49
    :param word (str): The word to score.
50
    :rtype score (int): The resulting score.
51
    """
52
    if not word or len(word) == 0:
53
        return 0
54
    _len = len(word)
55
    # 20+
56
    if _len > 20:
57
        return 1
58
    # 15-20
59
    elif _len > 15 and _len <= 20:
60
        return 2
61
    # 1-4
62
    elif _len <= 4:
63
        return 3
64
    # 10-15
65
    elif _len >= 10 and _len <= 15:
66
        return 4
67
    # 5-10
68
    elif _len > 4 and _len < 10:
69
        return 5
70
71
72
def bounded(num, start, end):
73
    """Determine if a number is within the bounds of `start` and `end`.
74
75
    :param num (int): An integer.
76
    :param start (int): A start minimum.
77
    :param end (int): An end maximum.
78
    :rtype is_bounded (bool): Whether number is bounded by start and end.
79
    """
80
    return num >= start and num <= end
81
82
83
def score_pronounceability(word):
84
    """Get the ratio of vowels to consonants, a very basic measurement.
85
86
    Half vowels and half consonants indicates a highly pronounceable word.
87
    For example, 0.5 / 0.5 = 1.0, so one is perfect, and lower is worse.
88
89
    The 1-5 scale translation:
90
91
    0.0   0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8   0.9   1.0
92
    0      1     2     3     4     5     4     3     2     1      5
93
94
    :param word (string): The name
95
    :rtype (int): The final pronounceability score
96
    """
97
    if not word or len(word) == 0:
98
        return 0
99
    word = re.sub(r'[^a-zA-Z0-9]', '', word)
100
    re_vowels = re.compile(r'[a|e|i|o|u]')
101
    re_cons = re.compile(r'[^a|e|i|o|u]')
102
    vowels = float(len(re.findall(re_vowels, word)))
103
    consonants = float(len(re.findall(re_cons, word)))
104
    if vowels is 0.0 or consonants is 0.0:
105
        return 0
106
    if vowels < consonants:
107
        ratio = vowels / consonants
108
    else:
109
        ratio = consonants / vowels
110
    if ratio == 0.0:
111
        return 0
112
    if ratio == 1.0:
113
        return 5
114
    if bounded(ratio, 0.0, 0.1) or bounded(ratio, 0.9, 1.0):
115
        return 1
116
    if bounded(ratio, 0.1, 0.2) or bounded(ratio, 0.8, 0.9):
117
        return 2
118
    if bounded(ratio, 0.2, 0.3) or bounded(ratio, 0.7, 0.8):
119
        return 3
120
    if bounded(ratio, 0.3, 0.4) or bounded(ratio, 0.6, 0.7):
121
        return 4
122
    if bounded(ratio, 0.4, 0.5) or bounded(ratio, 0.5, 0.6):
123
        return 5
124
    return 0
125
126
127
def score_simplicity(word):
128
    """Determine how simple the word is.
129
130
    Simple is defined as the number of separate words.
131
    In this case, higher is better, indicating a better score.
132
133
    :param word (string): the name
134
    :rtype score (int): the final simplicity score
135
136
    >>> score_simplicity('the cat in the hat')
137
    >>> 1
138
    >>> score_simplicity('facebook')
139
    >>> 5
140
    """
141
    if not word or len(word) == 0:
142
        return 0
143
    word_count = len(re.split(r'[^a-z]', word))
144
    if word_count == 1:
145
        return 5
146
    if word_count < 3:
147
        return 4
148
    if word_count < 4:
149
        return 3
150
    if word_count < 5:
151
        return 2
152
    # After 4+ words, the name has a very poor score.
153
    return 1
154
155
156
def score_name_overall(word):
157
    """Score the name using separate scoring functions, then normalize to 100.
158
159
    This method gives an overall intuitive score.
160
    The closer to 100%, the better.
161
162
    :param word (string): the name
163
    :rtype score (float): the final name score
164
    """
165
    length = score_length(word)
166
    pronounceability = score_pronounceability(word)
167
    simplicity = score_simplicity(word)
168
    _scores = sum([length, pronounceability, simplicity])
169
    score = round(_scores * 10)
170
    # cut off at 100%
171
    if score > 100:
172
        return 100
173
    return score
174
175
176
def score_names_overall(words):
177
    """Score all names.
178
179
    :param words (list): the list of words.
180
    :rtype words (list): a list of tuples, with the score and word.
181
    """
182
    return [(score_name_overall(w), w) for w in words]
183
184
185
def generate_all_scoring(words):
186
    """Return all scoring methods for a set of words.
187
188
    :param words (list): the list of words.
189
    :rtype words (dict): the scores, keyed by scoring name.
190
    """
191
    return {
192
        'dmetaphone': score_dmetaphone(words),
193
        'soundex': score_soundex(words),
194
        'nysiis': score_nysiis(words),
195
        'grade': score_names_overall(words)
196
    }
197