Completed
Push — master ( fa4dad...a451dc )
by Chris
03:49
created

namebot.score_length()   F

Complexity

Conditions 11

Size

Total Lines 25

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 11
dl 0
loc 25
rs 3.1764

How to fix   Complexity   

Complexity

Complex classes like namebot.score_length() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
"""Provides various scoring methods for word strength."""
2
3
import fuzzy
4
import re
5
6
7
def score_dmetaphone(words):
8
    """Score words using the double metaphone algorithm.
9
10
    Args:
11
        words (list) - the list of words.
12
    Returns:
13
        scores (list) - the scored words
14
    """
15
    scores = []
16
    dmeta = fuzzy.DMetaphone()
17
    for word in words:
18
        scored = '{}: {}'.format(word.lower(), dmeta(word))
19
        scores.append(scored)
20
    return scores
21
22
23
def score_soundex(words):
24
    """Score words using the soundex algorithm.
25
26
    Args:
27
        words (list) - the list of words.
28
    Returns:
29
        scores (list) - the scored words
30
    """
31
    scores = []
32
    soundex = fuzzy.Soundex(4)
33
    for word in words:
34
        scored = '{}: {}'.format(word.lower(), soundex(word))
35
        scores.append(scored)
36
    return scores
37
38
39
def score_nysiis(words):
40
    """Score words using the nysiis algorithm.
41
42
    Args:
43
        words (list) - the list of words.
44
    Returns:
45
        scores (list) - the scored words
46
    """
47
    scores = []
48
    for word in words:
49
        scored = '{}: {}'.format(word.lower(), fuzzy.nysiis(word))
50
        scores.append(scored)
51
    return scores
52
53
54
def score_length(word):
55
    """Return a score, 1-5, of the length of the word.
56
57
    Really long, or really short words get a lower score.
58
    There is no hard science, but popular opinion suggests
59
    that a word somewhere between 8-15 letters is optimal.
60
    """
61
    if not word or len(word) == 0:
62
        return 0
63
    _len = len(word)
64
    # 20+
65
    if _len > 20:
66
        return 1
67
    # 15-20
68
    elif _len > 15 and _len <= 20:
69
        return 2
70
    # 1-4
71
    elif _len <= 4:
72
        return 3
73
    # 10-15
74
    elif _len >= 10 and _len <= 15:
75
        return 4
76
    # 5-10
77
    elif _len > 4 and _len < 10:
78
        return 5
79
80
81
def bounded(num, start, end):
82
    """Determine if a number is within the bounds of `start` and `end`."""
83
    return num >= start and num <= end
84
85
86
def score_pronounceability(word):
87
    """Get the ratio of vowels to consonants, a very basic measurement.
88
89
    Half vowels and half consonants indicates a highly pronounceable word.
90
    For example, 0.5 / 0.5 = 1.0, so one is perfect, and lower is worse.
91
92
    The 1-5 scale translation:
93
    ---------------------------------------------------------------
94
    0.0   0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8   0.9   1.0
95
    ---------------------------------------------------------------
96
    0      1     2     3     4     5     4     3     2     1      5
97
    ---------------------------------------------------------------
98
99
    Args:
100
        word (string) - the name
101
    Returns:
102
        score (int) - the final pronounceability score
103
    """
104
    if not word or len(word) == 0:
105
        return 0
106
    word = re.sub(r'[^a-zA-Z0-9]', '', word)
107
    re_vowels = re.compile(r'[a|e|i|o|u]')
108
    re_cons = re.compile(r'[^a|e|i|o|u]')
109
    vowels = float(len(re.findall(re_vowels, word)))
110
    consonants = float(len(re.findall(re_cons, word)))
111
    if vowels is 0.0 or consonants is 0.0:
112
        return 0
113
    if vowels < consonants:
114
        ratio = vowels / consonants
115
    else:
116
        ratio = consonants / vowels
117
    if ratio == 0.0:
118
        return 0
119
    if ratio == 1.0:
120
        return 5
121
    if bounded(ratio, 0.0, 0.1) or bounded(ratio, 0.9, 1.0):
122
        return 1
123
    if bounded(ratio, 0.1, 0.2) or bounded(ratio, 0.8, 0.9):
124
        return 2
125
    if bounded(ratio, 0.2, 0.3) or bounded(ratio, 0.7, 0.8):
126
        return 3
127
    if bounded(ratio, 0.3, 0.4) or bounded(ratio, 0.6, 0.7):
128
        return 4
129
    if bounded(ratio, 0.4, 0.5) or bounded(ratio, 0.5, 0.6):
130
        return 5
131
    return 0
132
133
134
def score_simplicity(word):
135
    """Determine how simple the word is.
136
137
    Simple is defined as the number of separate words.
138
    In this case, higher is better, indicating a better score.
139
140
    Args:
141
        word (string) - the name
142
    Returns:
143
        score (int) - the final simplicity score
144
145
    >>> score_simplicity('the cat in the hat')
146
    >>> 1
147
    >>> score_simplicity('facebook')
148
    >>> 5
149
    """
150
    if not word or len(word) == 0:
151
        return 0
152
    word_count = len(re.split(r'[^a-z]', word))
153
    if word_count == 1:
154
        return 5
155
    if word_count < 3:
156
        return 4
157
    if word_count < 4:
158
        return 3
159
    if word_count < 5:
160
        return 2
161
    # After 4+ words, the name has a very poor score.
162
    return 1
163
164
165
def score_name_overall(word):
166
    """Score the name using separate scoring functions, then normalize to 100.
167
168
    This method gives an overall intuitive score.
169
    The closer to 100%, the better.
170
171
    Args:
172
        word (string) - the name
173
    Returns:
174
        score (float) - the final name score
175
    """
176
    length = score_length(word)
177
    pronounceability = score_pronounceability(word)
178
    simplicity = score_simplicity(word)
179
    _scores = sum([length, pronounceability, simplicity])
180
    score = round(_scores * 10)
181
    # cut off at 100%
182
    if score > 100:
183
        return 100
184
    return score
185
186
187
def score_names_overall(words):
188
    """Score all names.
189
190
    Args:
191
        words (list) - the list of words.
192
    Returns:
193
        words (list) - a list of tuples, with the score and word.
194
    """
195
    new = []
196
    for k, word in enumerate(words):
197
        new.append((score_name_overall(word), word))
198
    return new
199
200
201
def generate_all_scoring(words):
202
    """Return all scoring methods for a set of words.
203
204
    Args:
205
        words (list) - the list of words.
206
    Returns:
207
        words (dict) - the scores, keyed by scoring name.
208
    """
209
    return {
210
        'dmetaphone': score_dmetaphone(words),
211
        'soundex': score_soundex(words),
212
        'nysiis': score_nysiis(words),
213
        'grade': score_names_overall(words)
214
    }
215