1
|
|
|
"""Provides various scoring methods for word strength.""" |
2
|
|
|
|
3
|
|
|
import re |
4
|
|
|
|
5
|
|
|
import fuzzy |
|
|
|
|
6
|
|
|
|
7
|
|
|
dmeta = fuzzy.DMetaphone() |
8
|
|
|
soundex = fuzzy.Soundex(4) |
9
|
|
|
|
10
|
|
|
|
11
|
|
|
def score_dmetaphone(words): |
12
|
|
|
"""Score words using the double metaphone algorithm. |
13
|
|
|
|
14
|
|
|
:param words (list): the list of words. |
15
|
|
|
:rtype scores (list): the scored words |
16
|
|
|
""" |
17
|
|
|
scores = [] |
18
|
|
|
for word in words: |
19
|
|
|
res, output = dmeta(word) |
20
|
|
|
scores.append('{0}:{1}:{2}'.format(word, res, output)) |
21
|
|
|
return scores |
22
|
|
|
|
23
|
|
|
|
24
|
|
|
def score_soundex(words): |
25
|
|
|
"""Score words using the soundex algorithm. |
26
|
|
|
|
27
|
|
|
:param words (list): the list of words. |
28
|
|
|
:rtype scores (list): the scored words |
29
|
|
|
""" |
30
|
|
|
return ['{}: {}'.format(w.lower(), soundex(w)) for w in words] |
31
|
|
|
|
32
|
|
|
|
33
|
|
|
def score_nysiis(words): |
34
|
|
|
"""Score words using the nysiis algorithm. |
35
|
|
|
|
36
|
|
|
:param words (list): the list of words. |
37
|
|
|
:rtype scores (list): the scored words |
38
|
|
|
""" |
39
|
|
|
return ['{}: {}'.format(w.lower(), fuzzy.nysiis(w)) for w in words] |
40
|
|
|
|
41
|
|
|
|
42
|
|
|
def score_length(word): |
43
|
|
|
"""Return a score, 1-5, of the length of the word. |
44
|
|
|
|
45
|
|
|
Really long, or really short words get a lower score. |
46
|
|
|
There is no hard science, but popular opinion suggests |
47
|
|
|
that a word somewhere between 8-15 letters is optimal. |
48
|
|
|
|
49
|
|
|
:param word (str): The word to score. |
50
|
|
|
:rtype score (int): The resulting score. |
51
|
|
|
""" |
52
|
|
|
if not word or len(word) == 0: |
53
|
|
|
return 0 |
54
|
|
|
_len = len(word) |
55
|
|
|
# 20+ |
56
|
|
|
if _len > 20: |
57
|
|
|
return 1 |
58
|
|
|
# 15-20 |
59
|
|
|
elif _len > 15 and _len <= 20: |
60
|
|
|
return 2 |
61
|
|
|
# 1-4 |
62
|
|
|
elif _len <= 4: |
63
|
|
|
return 3 |
64
|
|
|
# 10-15 |
65
|
|
|
elif _len >= 10 and _len <= 15: |
66
|
|
|
return 4 |
67
|
|
|
# 5-10 |
68
|
|
|
elif _len > 4 and _len < 10: |
69
|
|
|
return 5 |
70
|
|
|
|
71
|
|
|
|
72
|
|
|
def bounded(num, start, end): |
73
|
|
|
"""Determine if a number is within the bounds of `start` and `end`. |
74
|
|
|
|
75
|
|
|
:param num (int): An integer. |
76
|
|
|
:param start (int): A start minimum. |
77
|
|
|
:param end (int): An end maximum. |
78
|
|
|
:rtype is_bounded (bool): Whether number is bounded by start and end. |
79
|
|
|
""" |
80
|
|
|
return num >= start and num <= end |
81
|
|
|
|
82
|
|
|
|
83
|
|
|
def score_pronounceability(word): |
84
|
|
|
"""Get the ratio of vowels to consonants, a very basic measurement. |
85
|
|
|
|
86
|
|
|
Half vowels and half consonants indicates a highly pronounceable word. |
87
|
|
|
For example, 0.5 / 0.5 = 1.0, so one is perfect, and lower is worse. |
88
|
|
|
|
89
|
|
|
The 1-5 scale translation: |
90
|
|
|
|
91
|
|
|
0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 |
92
|
|
|
0 1 2 3 4 5 4 3 2 1 5 |
93
|
|
|
|
94
|
|
|
:param word (string): The name |
95
|
|
|
:rtype (int): The final pronounceability score |
96
|
|
|
""" |
97
|
|
|
if not word or len(word) == 0: |
98
|
|
|
return 0 |
99
|
|
|
word = re.sub(r'[^a-zA-Z0-9]', '', word) |
100
|
|
|
re_vowels = re.compile(r'[a|e|i|o|u]') |
101
|
|
|
re_cons = re.compile(r'[^a|e|i|o|u]') |
102
|
|
|
vowels = float(len(re.findall(re_vowels, word))) |
103
|
|
|
consonants = float(len(re.findall(re_cons, word))) |
104
|
|
|
if vowels is 0.0 or consonants is 0.0: |
105
|
|
|
return 0 |
106
|
|
|
if vowels < consonants: |
107
|
|
|
ratio = vowels / consonants |
108
|
|
|
else: |
109
|
|
|
ratio = consonants / vowels |
110
|
|
|
if ratio == 0.0: |
111
|
|
|
return 0 |
112
|
|
|
if ratio == 1.0: |
113
|
|
|
return 5 |
114
|
|
|
if bounded(ratio, 0.0, 0.1) or bounded(ratio, 0.9, 1.0): |
115
|
|
|
return 1 |
116
|
|
|
if bounded(ratio, 0.1, 0.2) or bounded(ratio, 0.8, 0.9): |
117
|
|
|
return 2 |
118
|
|
|
if bounded(ratio, 0.2, 0.3) or bounded(ratio, 0.7, 0.8): |
119
|
|
|
return 3 |
120
|
|
|
if bounded(ratio, 0.3, 0.4) or bounded(ratio, 0.6, 0.7): |
121
|
|
|
return 4 |
122
|
|
|
if bounded(ratio, 0.4, 0.5) or bounded(ratio, 0.5, 0.6): |
123
|
|
|
return 5 |
124
|
|
|
return 0 |
125
|
|
|
|
126
|
|
|
|
127
|
|
|
def score_simplicity(word): |
128
|
|
|
"""Determine how simple the word is. |
129
|
|
|
|
130
|
|
|
Simple is defined as the number of separate words. |
131
|
|
|
In this case, higher is better, indicating a better score. |
132
|
|
|
|
133
|
|
|
:param word (string): the name |
134
|
|
|
:rtype score (int): the final simplicity score |
135
|
|
|
|
136
|
|
|
>>> score_simplicity('the cat in the hat') |
137
|
|
|
>>> 1 |
138
|
|
|
>>> score_simplicity('facebook') |
139
|
|
|
>>> 5 |
140
|
|
|
""" |
141
|
|
|
if not word or len(word) == 0: |
142
|
|
|
return 0 |
143
|
|
|
word_count = len(re.split(r'[^a-z]', word)) |
144
|
|
|
if word_count == 1: |
145
|
|
|
return 5 |
146
|
|
|
if word_count < 3: |
147
|
|
|
return 4 |
148
|
|
|
if word_count < 4: |
149
|
|
|
return 3 |
150
|
|
|
if word_count < 5: |
151
|
|
|
return 2 |
152
|
|
|
# After 4+ words, the name has a very poor score. |
153
|
|
|
return 1 |
154
|
|
|
|
155
|
|
|
|
156
|
|
|
def score_name_overall(word): |
157
|
|
|
"""Score the name using separate scoring functions, then normalize to 100. |
158
|
|
|
|
159
|
|
|
This method gives an overall intuitive score. |
160
|
|
|
The closer to 100%, the better. |
161
|
|
|
|
162
|
|
|
:param word (string): the name |
163
|
|
|
:rtype score (float): the final name score |
164
|
|
|
""" |
165
|
|
|
length = score_length(word) |
166
|
|
|
pronounceability = score_pronounceability(word) |
167
|
|
|
simplicity = score_simplicity(word) |
168
|
|
|
_scores = sum([length, pronounceability, simplicity]) |
169
|
|
|
score = round(_scores * 10) |
170
|
|
|
# cut off at 100% |
171
|
|
|
if score > 100: |
172
|
|
|
return 100 |
173
|
|
|
return score |
174
|
|
|
|
175
|
|
|
|
176
|
|
|
def score_names_overall(words): |
177
|
|
|
"""Score all names. |
178
|
|
|
|
179
|
|
|
:param words (list): the list of words. |
180
|
|
|
:rtype words (list): a list of tuples, with the score and word. |
181
|
|
|
""" |
182
|
|
|
return [(score_name_overall(w), w) for w in words] |
183
|
|
|
|
184
|
|
|
|
185
|
|
|
def generate_all_scoring(words): |
186
|
|
|
"""Return all scoring methods for a set of words. |
187
|
|
|
|
188
|
|
|
:param words (list): the list of words. |
189
|
|
|
:rtype words (dict): the scores, keyed by scoring name. |
190
|
|
|
""" |
191
|
|
|
return { |
192
|
|
|
'dmetaphone': score_dmetaphone(words), |
193
|
|
|
'soundex': score_soundex(words), |
194
|
|
|
'nysiis': score_nysiis(words), |
195
|
|
|
'grade': score_names_overall(words) |
196
|
|
|
} |
197
|
|
|
|
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.