1
|
|
|
"""Provides various scoring methods for word strength.""" |
2
|
|
|
|
3
|
|
|
import re |
4
|
|
|
|
5
|
|
|
import fuzzy |
|
|
|
|
6
|
|
|
|
7
|
|
|
dmeta = fuzzy.DMetaphone() |
8
|
|
|
soundex = fuzzy.Soundex(4) |
9
|
|
|
|
10
|
|
|
|
11
|
|
|
def score_dmetaphone(words): |
12
|
|
|
"""Score words using the double metaphone algorithm. |
13
|
|
|
|
14
|
|
|
Args: |
15
|
|
|
words (list) - the list of words. |
16
|
|
|
|
17
|
|
|
Returns: |
18
|
|
|
scores (list) - the scored words |
19
|
|
|
""" |
20
|
|
|
scores = [] |
21
|
|
|
for word in words: |
22
|
|
|
res, output = dmeta(word) |
23
|
|
|
scores.append('{0}:{1}:{2}'.format(word, res, output)) |
24
|
|
|
return scores |
25
|
|
|
|
26
|
|
|
|
27
|
|
|
def score_soundex(words): |
28
|
|
|
"""Score words using the soundex algorithm. |
29
|
|
|
|
30
|
|
|
Args: |
31
|
|
|
words (list) - the list of words. |
32
|
|
|
|
33
|
|
|
Returns: |
34
|
|
|
scores (list) - the scored words |
35
|
|
|
""" |
36
|
|
|
return ['{}: {}'.format(w.lower(), soundex(w)) for w in words] |
37
|
|
|
|
38
|
|
|
|
39
|
|
|
def score_nysiis(words): |
40
|
|
|
"""Score words using the nysiis algorithm. |
41
|
|
|
|
42
|
|
|
Args: |
43
|
|
|
words (list) - the list of words. |
44
|
|
|
|
45
|
|
|
Returns: |
46
|
|
|
scores (list) - the scored words |
47
|
|
|
""" |
48
|
|
|
return ['{}: {}'.format(w.lower(), fuzzy.nysiis(w)) for w in words] |
49
|
|
|
|
50
|
|
|
|
51
|
|
|
def score_length(word): |
52
|
|
|
"""Return a score, 1-5, of the length of the word. |
53
|
|
|
|
54
|
|
|
Really long, or really short words get a lower score. |
55
|
|
|
There is no hard science, but popular opinion suggests |
56
|
|
|
that a word somewhere between 8-15 letters is optimal. |
57
|
|
|
|
58
|
|
|
Args: |
59
|
|
|
word (str): The word to score. |
60
|
|
|
|
61
|
|
|
Returns: |
62
|
|
|
score (int): The resulting score. |
63
|
|
|
""" |
64
|
|
|
if not word or len(word) == 0: |
65
|
|
|
return 0 |
66
|
|
|
_len = len(word) |
67
|
|
|
# 20+ |
68
|
|
|
if _len > 20: |
69
|
|
|
return 1 |
70
|
|
|
# 15-20 |
71
|
|
|
elif _len > 15 and _len <= 20: |
72
|
|
|
return 2 |
73
|
|
|
# 1-4 |
74
|
|
|
elif _len <= 4: |
75
|
|
|
return 3 |
76
|
|
|
# 10-15 |
77
|
|
|
elif _len >= 10 and _len <= 15: |
78
|
|
|
return 4 |
79
|
|
|
# 5-10 |
80
|
|
|
elif _len > 4 and _len < 10: |
81
|
|
|
return 5 |
82
|
|
|
|
83
|
|
|
|
84
|
|
|
def bounded(num, start, end): |
85
|
|
|
"""Determine if a number is within the bounds of `start` and `end`. |
86
|
|
|
|
87
|
|
|
Args: |
88
|
|
|
num (int): An integer. |
89
|
|
|
start (int): A start minimum. |
90
|
|
|
end (int): An end maximum. |
91
|
|
|
|
92
|
|
|
Returns: |
93
|
|
|
is_bounded (bool): Whether number is bounded by start and end. |
94
|
|
|
""" |
95
|
|
|
return num >= start and num <= end |
96
|
|
|
|
97
|
|
|
|
98
|
|
|
def score_pronounceability(word): |
99
|
|
|
"""Get the ratio of vowels to consonants, a very basic measurement. |
100
|
|
|
|
101
|
|
|
Half vowels and half consonants indicates a highly pronounceable word. |
102
|
|
|
For example, 0.5 / 0.5 = 1.0, so one is perfect, and lower is worse. |
103
|
|
|
|
104
|
|
|
The 1-5 scale translation: |
105
|
|
|
--------------------------------------------------------------- |
106
|
|
|
0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 |
107
|
|
|
--------------------------------------------------------------- |
108
|
|
|
0 1 2 3 4 5 4 3 2 1 5 |
109
|
|
|
--------------------------------------------------------------- |
110
|
|
|
|
111
|
|
|
Args: |
112
|
|
|
word (string) - the name |
113
|
|
|
|
114
|
|
|
Returns: |
115
|
|
|
score (int) - the final pronounceability score |
116
|
|
|
""" |
117
|
|
|
if not word or len(word) == 0: |
118
|
|
|
return 0 |
119
|
|
|
word = re.sub(r'[^a-zA-Z0-9]', '', word) |
120
|
|
|
re_vowels = re.compile(r'[a|e|i|o|u]') |
121
|
|
|
re_cons = re.compile(r'[^a|e|i|o|u]') |
122
|
|
|
vowels = float(len(re.findall(re_vowels, word))) |
123
|
|
|
consonants = float(len(re.findall(re_cons, word))) |
124
|
|
|
if vowels is 0.0 or consonants is 0.0: |
125
|
|
|
return 0 |
126
|
|
|
if vowels < consonants: |
127
|
|
|
ratio = vowels / consonants |
128
|
|
|
else: |
129
|
|
|
ratio = consonants / vowels |
130
|
|
|
if ratio == 0.0: |
131
|
|
|
return 0 |
132
|
|
|
if ratio == 1.0: |
133
|
|
|
return 5 |
134
|
|
|
if bounded(ratio, 0.0, 0.1) or bounded(ratio, 0.9, 1.0): |
135
|
|
|
return 1 |
136
|
|
|
if bounded(ratio, 0.1, 0.2) or bounded(ratio, 0.8, 0.9): |
137
|
|
|
return 2 |
138
|
|
|
if bounded(ratio, 0.2, 0.3) or bounded(ratio, 0.7, 0.8): |
139
|
|
|
return 3 |
140
|
|
|
if bounded(ratio, 0.3, 0.4) or bounded(ratio, 0.6, 0.7): |
141
|
|
|
return 4 |
142
|
|
|
if bounded(ratio, 0.4, 0.5) or bounded(ratio, 0.5, 0.6): |
143
|
|
|
return 5 |
144
|
|
|
return 0 |
145
|
|
|
|
146
|
|
|
|
147
|
|
|
def score_simplicity(word): |
148
|
|
|
"""Determine how simple the word is. |
149
|
|
|
|
150
|
|
|
Simple is defined as the number of separate words. |
151
|
|
|
In this case, higher is better, indicating a better score. |
152
|
|
|
|
153
|
|
|
Args: |
154
|
|
|
word (string) - the name |
155
|
|
|
|
156
|
|
|
Returns: |
157
|
|
|
score (int) - the final simplicity score |
158
|
|
|
|
159
|
|
|
>>> score_simplicity('the cat in the hat') |
160
|
|
|
>>> 1 |
161
|
|
|
>>> score_simplicity('facebook') |
162
|
|
|
>>> 5 |
163
|
|
|
""" |
164
|
|
|
if not word or len(word) == 0: |
165
|
|
|
return 0 |
166
|
|
|
word_count = len(re.split(r'[^a-z]', word)) |
167
|
|
|
if word_count == 1: |
168
|
|
|
return 5 |
169
|
|
|
if word_count < 3: |
170
|
|
|
return 4 |
171
|
|
|
if word_count < 4: |
172
|
|
|
return 3 |
173
|
|
|
if word_count < 5: |
174
|
|
|
return 2 |
175
|
|
|
# After 4+ words, the name has a very poor score. |
176
|
|
|
return 1 |
177
|
|
|
|
178
|
|
|
|
179
|
|
|
def score_name_overall(word): |
180
|
|
|
"""Score the name using separate scoring functions, then normalize to 100. |
181
|
|
|
|
182
|
|
|
This method gives an overall intuitive score. |
183
|
|
|
The closer to 100%, the better. |
184
|
|
|
|
185
|
|
|
Args: |
186
|
|
|
word (string) - the name |
187
|
|
|
|
188
|
|
|
Returns: |
189
|
|
|
score (float) - the final name score |
190
|
|
|
""" |
191
|
|
|
length = score_length(word) |
192
|
|
|
pronounceability = score_pronounceability(word) |
193
|
|
|
simplicity = score_simplicity(word) |
194
|
|
|
_scores = sum([length, pronounceability, simplicity]) |
195
|
|
|
score = round(_scores * 10) |
196
|
|
|
# cut off at 100% |
197
|
|
|
if score > 100: |
198
|
|
|
return 100 |
199
|
|
|
return score |
200
|
|
|
|
201
|
|
|
|
202
|
|
|
def score_names_overall(words): |
203
|
|
|
"""Score all names. |
204
|
|
|
|
205
|
|
|
Args: |
206
|
|
|
words (list) - the list of words. |
207
|
|
|
|
208
|
|
|
Returns: |
209
|
|
|
words (list) - a list of tuples, with the score and word. |
210
|
|
|
""" |
211
|
|
|
return [(score_name_overall(w), w) for w in words] |
212
|
|
|
|
213
|
|
|
|
214
|
|
|
def generate_all_scoring(words): |
215
|
|
|
"""Return all scoring methods for a set of words. |
216
|
|
|
|
217
|
|
|
Args: |
218
|
|
|
words (list) - the list of words. |
219
|
|
|
|
220
|
|
|
Returns: |
221
|
|
|
words (dict) - the scores, keyed by scoring name. |
222
|
|
|
""" |
223
|
|
|
return { |
224
|
|
|
'dmetaphone': score_dmetaphone(words), |
225
|
|
|
'soundex': score_soundex(words), |
226
|
|
|
'nysiis': score_nysiis(words), |
227
|
|
|
'grade': score_names_overall(words) |
228
|
|
|
} |
229
|
|
|
|
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.