namebot.score_length() - Code Metrics - Inspection of "Clean up scoring module, add docstrings" - christabor/namebot - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( fa4dad...a451dc )

by Chris

created 2016-03-08 04:34 UTC

namebot.score_length() F

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
cc	11
dl	0
loc	25
rs	3.1764

How to fix Complexity

"""Provides various scoring methods for word strength."""

import fuzzy
import re


def score_dmetaphone(words):
    """Score words using the double metaphone algorithm.

    Args:
        words (list) - the list of words.
    Returns:
        scores (list) - the scored words
    """
    scores = []
    dmeta = fuzzy.DMetaphone()
    for word in words:
        scored = '{}: {}'.format(word.lower(), dmeta(word))
        scores.append(scored)
    return scores


def score_soundex(words):
    """Score words using the soundex algorithm.

    Args:
        words (list) - the list of words.
    Returns:
        scores (list) - the scored words
    """
    scores = []
    soundex = fuzzy.Soundex(4)
    for word in words:
        scored = '{}: {}'.format(word.lower(), soundex(word))
        scores.append(scored)
    return scores


def score_nysiis(words):
    """Score words using the nysiis algorithm.

    Args:
        words (list) - the list of words.
    Returns:
        scores (list) - the scored words
    """
    scores = []
    for word in words:
        scored = '{}: {}'.format(word.lower(), fuzzy.nysiis(word))
        scores.append(scored)
    return scores


def score_length(word):
    """Return a score, 1-5, of the length of the word.

    Really long, or really short words get a lower score.
    There is no hard science, but popular opinion suggests
    that a word somewhere between 8-15 letters is optimal.
    """
    if not word or len(word) == 0:
        return 0
    _len = len(word)
    # 20+
    if _len > 20:
        return 1
    # 15-20
    elif _len > 15 and _len <= 20:
        return 2
    # 1-4
    elif _len <= 4:
        return 3
    # 10-15
    elif _len >= 10 and _len <= 15:
        return 4
    # 5-10
    elif _len > 4 and _len < 10:
        return 5


def bounded(num, start, end):
    """Determine if a number is within the bounds of `start` and `end`."""
    return num >= start and num <= end


def score_pronounceability(word):
    """Get the ratio of vowels to consonants, a very basic measurement.

    Half vowels and half consonants indicates a highly pronounceable word.
    For example, 0.5 / 0.5 = 1.0, so one is perfect, and lower is worse.

    The 1-5 scale translation:
    ---------------------------------------------------------------
    0.0   0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8   0.9   1.0
    ---------------------------------------------------------------
    0      1     2     3     4     5     4     3     2     1      5
    ---------------------------------------------------------------

    Args:
        word (string) - the name
    Returns:
        score (int) - the final pronounceability score
    """
    if not word or len(word) == 0:
        return 0
    word = re.sub(r'[^a-zA-Z0-9]', '', word)
    re_vowels = re.compile(r'[a|e|i|o|u]')
    re_cons = re.compile(r'[^a|e|i|o|u]')
    vowels = float(len(re.findall(re_vowels, word)))
    consonants = float(len(re.findall(re_cons, word)))
    if vowels is 0.0 or consonants is 0.0:
        return 0
    if vowels < consonants:
        ratio = vowels / consonants
    else:
        ratio = consonants / vowels
    if ratio == 0.0:
        return 0
    if ratio == 1.0:
        return 5
    if bounded(ratio, 0.0, 0.1) or bounded(ratio, 0.9, 1.0):
        return 1
    if bounded(ratio, 0.1, 0.2) or bounded(ratio, 0.8, 0.9):
        return 2
    if bounded(ratio, 0.2, 0.3) or bounded(ratio, 0.7, 0.8):
        return 3
    if bounded(ratio, 0.3, 0.4) or bounded(ratio, 0.6, 0.7):
        return 4
    if bounded(ratio, 0.4, 0.5) or bounded(ratio, 0.5, 0.6):
        return 5
    return 0


def score_simplicity(word):
    """Determine how simple the word is.

    Simple is defined as the number of separate words.
    In this case, higher is better, indicating a better score.

    Args:
        word (string) - the name
    Returns:
        score (int) - the final simplicity score

    >>> score_simplicity('the cat in the hat')
    >>> 1
    >>> score_simplicity('facebook')
    >>> 5
    """
    if not word or len(word) == 0:
        return 0
    word_count = len(re.split(r'[^a-z]', word))
    if word_count == 1:
        return 5
    if word_count < 3:
        return 4
    if word_count < 4:
        return 3
    if word_count < 5:
        return 2
    # After 4+ words, the name has a very poor score.
    return 1


def score_name_overall(word):
    """Score the name using separate scoring functions, then normalize to 100.

    This method gives an overall intuitive score.
    The closer to 100%, the better.

    Args:
        word (string) - the name
    Returns:
        score (float) - the final name score
    """
    length = score_length(word)
    pronounceability = score_pronounceability(word)
    simplicity = score_simplicity(word)
    _scores = sum([length, pronounceability, simplicity])
    score = round(_scores * 10)
    # cut off at 100%
    if score > 100:
        return 100
    return score


def score_names_overall(words):
    """Score all names.

    Args:
        words (list) - the list of words.
    Returns:
        words (list) - a list of tuples, with the score and word.
    """
    new = []
    for k, word in enumerate(words):
        new.append((score_name_overall(word), word))
    return new


def generate_all_scoring(words):
    """Return all scoring methods for a set of words.

    Args:
        words (list) - the list of words.
    Returns:
        words (dict) - the scores, keyed by scoring name.
    """
    return {
        'dmetaphone': score_dmetaphone(words),
        'soundex': score_soundex(words),
        'nysiis': score_nysiis(words),
        'grade': score_names_overall(words)
    }


1			"""Provides various scoring methods for word strength."""
2
3			import fuzzy
4			import re
5
6
7			def score_dmetaphone(words):
8			"""Score words using the double metaphone algorithm.
9
10			Args:
11			words (list) - the list of words.
12			Returns:
13			scores (list) - the scored words
14			"""
15			scores = []
16			dmeta = fuzzy.DMetaphone()
17			for word in words:
18			scored = '{}: {}'.format(word.lower(), dmeta(word))
19			scores.append(scored)
20			return scores
21
22
23			def score_soundex(words):
24			"""Score words using the soundex algorithm.
25
26			Args:
27			words (list) - the list of words.
28			Returns:
29			scores (list) - the scored words
30			"""
31			scores = []
32			soundex = fuzzy.Soundex(4)
33			for word in words:
34			scored = '{}: {}'.format(word.lower(), soundex(word))
35			scores.append(scored)
36			return scores
37
38
39			def score_nysiis(words):
40			"""Score words using the nysiis algorithm.
41
42			Args:
43			words (list) - the list of words.
44			Returns:
45			scores (list) - the scored words
46			"""
47			scores = []
48			for word in words:
49			scored = '{}: {}'.format(word.lower(), fuzzy.nysiis(word))
50			scores.append(scored)
51			return scores
52
53
54			def score_length(word):
55			"""Return a score, 1-5, of the length of the word.
56
57			Really long, or really short words get a lower score.
58			There is no hard science, but popular opinion suggests
59			that a word somewhere between 8-15 letters is optimal.
60			"""
61			if not word or len(word) == 0:
62			return 0
63			_len = len(word)
64			# 20+
65			if _len > 20:
66			return 1
67			# 15-20
68			elif _len > 15 and _len <= 20:
69			return 2
70			# 1-4
71			elif _len <= 4:
72			return 3
73			# 10-15
74			elif _len >= 10 and _len <= 15:
75			return 4
76			# 5-10
77			elif _len > 4 and _len < 10:
78			return 5
79
80
81			def bounded(num, start, end):
82			"""Determine if a number is within the bounds of `start` and `end`."""
83			return num >= start and num <= end
84
85
86			def score_pronounceability(word):
87			"""Get the ratio of vowels to consonants, a very basic measurement.
88
89			Half vowels and half consonants indicates a highly pronounceable word.
90			For example, 0.5 / 0.5 = 1.0, so one is perfect, and lower is worse.
91
92			The 1-5 scale translation:
93			---------------------------------------------------------------
94			0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0
95			---------------------------------------------------------------
96			0 1 2 3 4 5 4 3 2 1 5
97			---------------------------------------------------------------
98
99			Args:
100			word (string) - the name
101			Returns:
102			score (int) - the final pronounceability score
103			"""
104			if not word or len(word) == 0:
105			return 0
106			word = re.sub(r'[^a-zA-Z0-9]', '', word)
107			re_vowels = re.compile(r'[a\|e\|i\|o\|u]')
108			re_cons = re.compile(r'[^a\|e\|i\|o\|u]')
109			vowels = float(len(re.findall(re_vowels, word)))
110			consonants = float(len(re.findall(re_cons, word)))
111			if vowels is 0.0 or consonants is 0.0:
112			return 0
113			if vowels < consonants:
114			ratio = vowels / consonants
115			else:
116			ratio = consonants / vowels
117			if ratio == 0.0:
118			return 0
119			if ratio == 1.0:
120			return 5
121			if bounded(ratio, 0.0, 0.1) or bounded(ratio, 0.9, 1.0):
122			return 1
123			if bounded(ratio, 0.1, 0.2) or bounded(ratio, 0.8, 0.9):
124			return 2
125			if bounded(ratio, 0.2, 0.3) or bounded(ratio, 0.7, 0.8):
126			return 3
127			if bounded(ratio, 0.3, 0.4) or bounded(ratio, 0.6, 0.7):
128			return 4
129			if bounded(ratio, 0.4, 0.5) or bounded(ratio, 0.5, 0.6):
130			return 5
131			return 0
132
133
134			def score_simplicity(word):
135			"""Determine how simple the word is.
136
137			Simple is defined as the number of separate words.
138			In this case, higher is better, indicating a better score.
139
140			Args:
141			word (string) - the name
142			Returns:
143			score (int) - the final simplicity score
144
145			>>> score_simplicity('the cat in the hat')
146			>>> 1
147			>>> score_simplicity('facebook')
148			>>> 5
149			"""
150			if not word or len(word) == 0:
151			return 0
152			word_count = len(re.split(r'[^a-z]', word))
153			if word_count == 1:
154			return 5
155			if word_count < 3:
156			return 4
157			if word_count < 4:
158			return 3
159			if word_count < 5:
160			return 2
161			# After 4+ words, the name has a very poor score.
162			return 1
163
164
165			def score_name_overall(word):
166			"""Score the name using separate scoring functions, then normalize to 100.
167
168			This method gives an overall intuitive score.
169			The closer to 100%, the better.
170
171			Args:
172			word (string) - the name
173			Returns:
174			score (float) - the final name score
175			"""
176			length = score_length(word)
177			pronounceability = score_pronounceability(word)
178			simplicity = score_simplicity(word)
179			_scores = sum([length, pronounceability, simplicity])
180			score = round(_scores * 10)
181			# cut off at 100%
182			if score > 100:
183			return 100
184			return score
185
186
187			def score_names_overall(words):
188			"""Score all names.
189
190			Args:
191			words (list) - the list of words.
192			Returns:
193			words (list) - a list of tuples, with the score and word.
194			"""
195			new = []
196			for k, word in enumerate(words):
197			new.append((score_name_overall(word), word))
198			return new
199
200
201			def generate_all_scoring(words):
202			"""Return all scoring methods for a set of words.
203
204			Args:
205			words (list) - the list of words.
206			Returns:
207			words (dict) - the scores, keyed by scoring name.
208			"""
209			return {
210			'dmetaphone': score_dmetaphone(words),
211			'soundex': score_soundex(words),
212			'nysiis': score_nysiis(words),
213			'grade': score_names_overall(words)
214			}
215

christabor / namebot

Push — master ( fa4dad...a451dc )

namebot.score_length() F

Complexity

Size

Duplication

How to fix Complexity

Complexity

Duplication Side-by-Side

Filter issues like