Completed
Push — master ( cc4906...fdc087 )
by Chris
01:28
created

score_simplicity()   C

Complexity

Conditions 7

Size

Total Lines 30

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 1
Metric Value
cc 7
c 2
b 0
f 1
dl 0
loc 30
rs 5.5
1
"""Provides various scoring methods for word strength."""
2
3
import re
4
5
import fuzzy
0 ignored issues
show
Configuration introduced by
The import fuzzy could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
6
7
dmeta = fuzzy.DMetaphone()
8
soundex = fuzzy.Soundex(4)
9
10
11
def score_dmetaphone(words):
12
    """Score words using the double metaphone algorithm.
13
14
    Args:
15
        words (list) - the list of words.
16
17
    Returns:
18
        scores (list) - the scored words
19
    """
20
    scores = []
21
    for word in words:
22
        res, output = dmeta(word)
23
        scores.append('{0}:{1}:{2}'.format(word, res, output))
24
    return scores
25
26
27
def score_soundex(words):
28
    """Score words using the soundex algorithm.
29
30
    Args:
31
        words (list) - the list of words.
32
33
    Returns:
34
        scores (list) - the scored words
35
    """
36
    return ['{}: {}'.format(w.lower(), soundex(w)) for w in words]
37
38
39
def score_nysiis(words):
40
    """Score words using the nysiis algorithm.
41
42
    Args:
43
        words (list) - the list of words.
44
45
    Returns:
46
        scores (list) - the scored words
47
    """
48
    return ['{}: {}'.format(w.lower(), fuzzy.nysiis(w)) for w in words]
49
50
51
def score_length(word):
52
    """Return a score, 1-5, of the length of the word.
53
54
    Really long, or really short words get a lower score.
55
    There is no hard science, but popular opinion suggests
56
    that a word somewhere between 8-15 letters is optimal.
57
58
    Args:
59
        word (str): The word to score.
60
61
    Returns:
62
        score (int): The resulting score.
63
    """
64
    if not word or len(word) == 0:
65
        return 0
66
    _len = len(word)
67
    # 20+
68
    if _len > 20:
69
        return 1
70
    # 15-20
71
    elif _len > 15 and _len <= 20:
72
        return 2
73
    # 1-4
74
    elif _len <= 4:
75
        return 3
76
    # 10-15
77
    elif _len >= 10 and _len <= 15:
78
        return 4
79
    # 5-10
80
    elif _len > 4 and _len < 10:
81
        return 5
82
83
84
def bounded(num, start, end):
85
    """Determine if a number is within the bounds of `start` and `end`.
86
87
    Args:
88
        num (int): An integer.
89
        start (int): A start minimum.
90
        end (int): An end maximum.
91
92
    Returns:
93
        is_bounded (bool): Whether number is bounded by start and end.
94
    """
95
    return num >= start and num <= end
96
97
98
def score_pronounceability(word):
99
    """Get the ratio of vowels to consonants, a very basic measurement.
100
101
    Half vowels and half consonants indicates a highly pronounceable word.
102
    For example, 0.5 / 0.5 = 1.0, so one is perfect, and lower is worse.
103
104
    The 1-5 scale translation:
105
    ---------------------------------------------------------------
106
    0.0   0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8   0.9   1.0
107
    ---------------------------------------------------------------
108
    0      1     2     3     4     5     4     3     2     1      5
109
    ---------------------------------------------------------------
110
111
    Args:
112
        word (string) - the name
113
114
    Returns:
115
        score (int) - the final pronounceability score
116
    """
117
    if not word or len(word) == 0:
118
        return 0
119
    word = re.sub(r'[^a-zA-Z0-9]', '', word)
120
    re_vowels = re.compile(r'[a|e|i|o|u]')
121
    re_cons = re.compile(r'[^a|e|i|o|u]')
122
    vowels = float(len(re.findall(re_vowels, word)))
123
    consonants = float(len(re.findall(re_cons, word)))
124
    if vowels is 0.0 or consonants is 0.0:
125
        return 0
126
    if vowels < consonants:
127
        ratio = vowels / consonants
128
    else:
129
        ratio = consonants / vowels
130
    if ratio == 0.0:
131
        return 0
132
    if ratio == 1.0:
133
        return 5
134
    if bounded(ratio, 0.0, 0.1) or bounded(ratio, 0.9, 1.0):
135
        return 1
136
    if bounded(ratio, 0.1, 0.2) or bounded(ratio, 0.8, 0.9):
137
        return 2
138
    if bounded(ratio, 0.2, 0.3) or bounded(ratio, 0.7, 0.8):
139
        return 3
140
    if bounded(ratio, 0.3, 0.4) or bounded(ratio, 0.6, 0.7):
141
        return 4
142
    if bounded(ratio, 0.4, 0.5) or bounded(ratio, 0.5, 0.6):
143
        return 5
144
    return 0
145
146
147
def score_simplicity(word):
148
    """Determine how simple the word is.
149
150
    Simple is defined as the number of separate words.
151
    In this case, higher is better, indicating a better score.
152
153
    Args:
154
        word (string) - the name
155
156
    Returns:
157
        score (int) - the final simplicity score
158
159
    >>> score_simplicity('the cat in the hat')
160
    >>> 1
161
    >>> score_simplicity('facebook')
162
    >>> 5
163
    """
164
    if not word or len(word) == 0:
165
        return 0
166
    word_count = len(re.split(r'[^a-z]', word))
167
    if word_count == 1:
168
        return 5
169
    if word_count < 3:
170
        return 4
171
    if word_count < 4:
172
        return 3
173
    if word_count < 5:
174
        return 2
175
    # After 4+ words, the name has a very poor score.
176
    return 1
177
178
179
def score_name_overall(word):
180
    """Score the name using separate scoring functions, then normalize to 100.
181
182
    This method gives an overall intuitive score.
183
    The closer to 100%, the better.
184
185
    Args:
186
        word (string) - the name
187
188
    Returns:
189
        score (float) - the final name score
190
    """
191
    length = score_length(word)
192
    pronounceability = score_pronounceability(word)
193
    simplicity = score_simplicity(word)
194
    _scores = sum([length, pronounceability, simplicity])
195
    score = round(_scores * 10)
196
    # cut off at 100%
197
    if score > 100:
198
        return 100
199
    return score
200
201
202
def score_names_overall(words):
203
    """Score all names.
204
205
    Args:
206
        words (list) - the list of words.
207
208
    Returns:
209
        words (list) - a list of tuples, with the score and word.
210
    """
211
    return [(score_name_overall(w), w) for w in words]
212
213
214
def generate_all_scoring(words):
215
    """Return all scoring methods for a set of words.
216
217
    Args:
218
        words (list) - the list of words.
219
220
    Returns:
221
        words (dict) - the scores, keyed by scoring name.
222
    """
223
    return {
224
        'dmetaphone': score_dmetaphone(words),
225
        'soundex': score_soundex(words),
226
        'nysiis': score_nysiis(words),
227
        'grade': score_names_overall(words)
228
    }
229