|
1
|
|
|
"""Provides various scoring methods for word strength.""" |
|
2
|
|
|
|
|
3
|
|
|
import re |
|
4
|
|
|
|
|
5
|
|
|
import fuzzy |
|
|
|
|
|
|
6
|
|
|
|
|
7
|
|
|
dmeta = fuzzy.DMetaphone() |
|
8
|
|
|
soundex = fuzzy.Soundex(4) |
|
9
|
|
|
|
|
10
|
|
|
|
|
11
|
|
|
def score_dmetaphone(words): |
|
12
|
|
|
"""Score words using the double metaphone algorithm. |
|
13
|
|
|
|
|
14
|
|
|
Args: |
|
15
|
|
|
words (list) - the list of words. |
|
16
|
|
|
|
|
17
|
|
|
Returns: |
|
18
|
|
|
scores (list) - the scored words |
|
19
|
|
|
""" |
|
20
|
|
|
scores = [] |
|
21
|
|
|
for word in words: |
|
22
|
|
|
res, output = dmeta(word) |
|
23
|
|
|
scores.append('{0}:{1}:{2}'.format(word, res, output)) |
|
24
|
|
|
return scores |
|
25
|
|
|
|
|
26
|
|
|
|
|
27
|
|
|
def score_soundex(words): |
|
28
|
|
|
"""Score words using the soundex algorithm. |
|
29
|
|
|
|
|
30
|
|
|
Args: |
|
31
|
|
|
words (list) - the list of words. |
|
32
|
|
|
|
|
33
|
|
|
Returns: |
|
34
|
|
|
scores (list) - the scored words |
|
35
|
|
|
""" |
|
36
|
|
|
return ['{}: {}'.format(w.lower(), soundex(w)) for w in words] |
|
37
|
|
|
|
|
38
|
|
|
|
|
39
|
|
|
def score_nysiis(words): |
|
40
|
|
|
"""Score words using the nysiis algorithm. |
|
41
|
|
|
|
|
42
|
|
|
Args: |
|
43
|
|
|
words (list) - the list of words. |
|
44
|
|
|
|
|
45
|
|
|
Returns: |
|
46
|
|
|
scores (list) - the scored words |
|
47
|
|
|
""" |
|
48
|
|
|
return ['{}: {}'.format(w.lower(), fuzzy.nysiis(w)) for w in words] |
|
49
|
|
|
|
|
50
|
|
|
|
|
51
|
|
|
def score_length(word): |
|
52
|
|
|
"""Return a score, 1-5, of the length of the word. |
|
53
|
|
|
|
|
54
|
|
|
Really long, or really short words get a lower score. |
|
55
|
|
|
There is no hard science, but popular opinion suggests |
|
56
|
|
|
that a word somewhere between 8-15 letters is optimal. |
|
57
|
|
|
|
|
58
|
|
|
Args: |
|
59
|
|
|
word (str): The word to score. |
|
60
|
|
|
|
|
61
|
|
|
Returns: |
|
62
|
|
|
score (int): The resulting score. |
|
63
|
|
|
""" |
|
64
|
|
|
if not word or len(word) == 0: |
|
65
|
|
|
return 0 |
|
66
|
|
|
_len = len(word) |
|
67
|
|
|
# 20+ |
|
68
|
|
|
if _len > 20: |
|
69
|
|
|
return 1 |
|
70
|
|
|
# 15-20 |
|
71
|
|
|
elif _len > 15 and _len <= 20: |
|
72
|
|
|
return 2 |
|
73
|
|
|
# 1-4 |
|
74
|
|
|
elif _len <= 4: |
|
75
|
|
|
return 3 |
|
76
|
|
|
# 10-15 |
|
77
|
|
|
elif _len >= 10 and _len <= 15: |
|
78
|
|
|
return 4 |
|
79
|
|
|
# 5-10 |
|
80
|
|
|
elif _len > 4 and _len < 10: |
|
81
|
|
|
return 5 |
|
82
|
|
|
|
|
83
|
|
|
|
|
84
|
|
|
def bounded(num, start, end): |
|
85
|
|
|
"""Determine if a number is within the bounds of `start` and `end`. |
|
86
|
|
|
|
|
87
|
|
|
Args: |
|
88
|
|
|
num (int): An integer. |
|
89
|
|
|
start (int): A start minimum. |
|
90
|
|
|
end (int): An end maximum. |
|
91
|
|
|
|
|
92
|
|
|
Returns: |
|
93
|
|
|
is_bounded (bool): Whether number is bounded by start and end. |
|
94
|
|
|
""" |
|
95
|
|
|
return num >= start and num <= end |
|
96
|
|
|
|
|
97
|
|
|
|
|
98
|
|
|
def score_pronounceability(word): |
|
99
|
|
|
"""Get the ratio of vowels to consonants, a very basic measurement. |
|
100
|
|
|
|
|
101
|
|
|
Half vowels and half consonants indicates a highly pronounceable word. |
|
102
|
|
|
For example, 0.5 / 0.5 = 1.0, so one is perfect, and lower is worse. |
|
103
|
|
|
|
|
104
|
|
|
The 1-5 scale translation: |
|
105
|
|
|
--------------------------------------------------------------- |
|
106
|
|
|
0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 |
|
107
|
|
|
--------------------------------------------------------------- |
|
108
|
|
|
0 1 2 3 4 5 4 3 2 1 5 |
|
109
|
|
|
--------------------------------------------------------------- |
|
110
|
|
|
|
|
111
|
|
|
Args: |
|
112
|
|
|
word (string) - the name |
|
113
|
|
|
|
|
114
|
|
|
Returns: |
|
115
|
|
|
score (int) - the final pronounceability score |
|
116
|
|
|
""" |
|
117
|
|
|
if not word or len(word) == 0: |
|
118
|
|
|
return 0 |
|
119
|
|
|
word = re.sub(r'[^a-zA-Z0-9]', '', word) |
|
120
|
|
|
re_vowels = re.compile(r'[a|e|i|o|u]') |
|
121
|
|
|
re_cons = re.compile(r'[^a|e|i|o|u]') |
|
122
|
|
|
vowels = float(len(re.findall(re_vowels, word))) |
|
123
|
|
|
consonants = float(len(re.findall(re_cons, word))) |
|
124
|
|
|
if vowels is 0.0 or consonants is 0.0: |
|
125
|
|
|
return 0 |
|
126
|
|
|
if vowels < consonants: |
|
127
|
|
|
ratio = vowels / consonants |
|
128
|
|
|
else: |
|
129
|
|
|
ratio = consonants / vowels |
|
130
|
|
|
if ratio == 0.0: |
|
131
|
|
|
return 0 |
|
132
|
|
|
if ratio == 1.0: |
|
133
|
|
|
return 5 |
|
134
|
|
|
if bounded(ratio, 0.0, 0.1) or bounded(ratio, 0.9, 1.0): |
|
135
|
|
|
return 1 |
|
136
|
|
|
if bounded(ratio, 0.1, 0.2) or bounded(ratio, 0.8, 0.9): |
|
137
|
|
|
return 2 |
|
138
|
|
|
if bounded(ratio, 0.2, 0.3) or bounded(ratio, 0.7, 0.8): |
|
139
|
|
|
return 3 |
|
140
|
|
|
if bounded(ratio, 0.3, 0.4) or bounded(ratio, 0.6, 0.7): |
|
141
|
|
|
return 4 |
|
142
|
|
|
if bounded(ratio, 0.4, 0.5) or bounded(ratio, 0.5, 0.6): |
|
143
|
|
|
return 5 |
|
144
|
|
|
return 0 |
|
145
|
|
|
|
|
146
|
|
|
|
|
147
|
|
|
def score_simplicity(word): |
|
148
|
|
|
"""Determine how simple the word is. |
|
149
|
|
|
|
|
150
|
|
|
Simple is defined as the number of separate words. |
|
151
|
|
|
In this case, higher is better, indicating a better score. |
|
152
|
|
|
|
|
153
|
|
|
Args: |
|
154
|
|
|
word (string) - the name |
|
155
|
|
|
|
|
156
|
|
|
Returns: |
|
157
|
|
|
score (int) - the final simplicity score |
|
158
|
|
|
|
|
159
|
|
|
>>> score_simplicity('the cat in the hat') |
|
160
|
|
|
>>> 1 |
|
161
|
|
|
>>> score_simplicity('facebook') |
|
162
|
|
|
>>> 5 |
|
163
|
|
|
""" |
|
164
|
|
|
if not word or len(word) == 0: |
|
165
|
|
|
return 0 |
|
166
|
|
|
word_count = len(re.split(r'[^a-z]', word)) |
|
167
|
|
|
if word_count == 1: |
|
168
|
|
|
return 5 |
|
169
|
|
|
if word_count < 3: |
|
170
|
|
|
return 4 |
|
171
|
|
|
if word_count < 4: |
|
172
|
|
|
return 3 |
|
173
|
|
|
if word_count < 5: |
|
174
|
|
|
return 2 |
|
175
|
|
|
# After 4+ words, the name has a very poor score. |
|
176
|
|
|
return 1 |
|
177
|
|
|
|
|
178
|
|
|
|
|
179
|
|
|
def score_name_overall(word): |
|
180
|
|
|
"""Score the name using separate scoring functions, then normalize to 100. |
|
181
|
|
|
|
|
182
|
|
|
This method gives an overall intuitive score. |
|
183
|
|
|
The closer to 100%, the better. |
|
184
|
|
|
|
|
185
|
|
|
Args: |
|
186
|
|
|
word (string) - the name |
|
187
|
|
|
|
|
188
|
|
|
Returns: |
|
189
|
|
|
score (float) - the final name score |
|
190
|
|
|
""" |
|
191
|
|
|
length = score_length(word) |
|
192
|
|
|
pronounceability = score_pronounceability(word) |
|
193
|
|
|
simplicity = score_simplicity(word) |
|
194
|
|
|
_scores = sum([length, pronounceability, simplicity]) |
|
195
|
|
|
score = round(_scores * 10) |
|
196
|
|
|
# cut off at 100% |
|
197
|
|
|
if score > 100: |
|
198
|
|
|
return 100 |
|
199
|
|
|
return score |
|
200
|
|
|
|
|
201
|
|
|
|
|
202
|
|
|
def score_names_overall(words): |
|
203
|
|
|
"""Score all names. |
|
204
|
|
|
|
|
205
|
|
|
Args: |
|
206
|
|
|
words (list) - the list of words. |
|
207
|
|
|
|
|
208
|
|
|
Returns: |
|
209
|
|
|
words (list) - a list of tuples, with the score and word. |
|
210
|
|
|
""" |
|
211
|
|
|
return [(score_name_overall(w), w) for w in words] |
|
212
|
|
|
|
|
213
|
|
|
|
|
214
|
|
|
def generate_all_scoring(words): |
|
215
|
|
|
"""Return all scoring methods for a set of words. |
|
216
|
|
|
|
|
217
|
|
|
Args: |
|
218
|
|
|
words (list) - the list of words. |
|
219
|
|
|
|
|
220
|
|
|
Returns: |
|
221
|
|
|
words (dict) - the scores, keyed by scoring name. |
|
222
|
|
|
""" |
|
223
|
|
|
return { |
|
224
|
|
|
'dmetaphone': score_dmetaphone(words), |
|
225
|
|
|
'soundex': score_soundex(words), |
|
226
|
|
|
'nysiis': score_nysiis(words), |
|
227
|
|
|
'grade': score_names_overall(words) |
|
228
|
|
|
} |
|
229
|
|
|
|
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.pyfiles in your module folders. Make sure that you place one file in each sub-folder.