score_simplicity() - Code Metrics - Inspection of "Update README.md" - christabor/namebot - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( cc4906...fdc087 )

by Chris

created 2016-05-14 21:26 UTC

score_simplicity() C

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	2
Bugs	0	Features	1

Metric	Value
cc	7
c	2
b	0
f	1
dl	0
loc	30
rs	5.5

"""Provides various scoring methods for word strength."""

import re

import fuzzy
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

dmeta = fuzzy.DMetaphone()
soundex = fuzzy.Soundex(4)


def score_dmetaphone(words):
    """Score words using the double metaphone algorithm.

    Args:
        words (list) - the list of words.

    Returns:
        scores (list) - the scored words
    """
    scores = []
    for word in words:
        res, output = dmeta(word)
        scores.append('{0}:{1}:{2}'.format(word, res, output))
    return scores


def score_soundex(words):
    """Score words using the soundex algorithm.

    Args:
        words (list) - the list of words.

    Returns:
        scores (list) - the scored words
    """
    return ['{}: {}'.format(w.lower(), soundex(w)) for w in words]


def score_nysiis(words):
    """Score words using the nysiis algorithm.

    Args:
        words (list) - the list of words.

    Returns:
        scores (list) - the scored words
    """
    return ['{}: {}'.format(w.lower(), fuzzy.nysiis(w)) for w in words]


def score_length(word):
    """Return a score, 1-5, of the length of the word.

    Really long, or really short words get a lower score.
    There is no hard science, but popular opinion suggests
    that a word somewhere between 8-15 letters is optimal.

    Args:
        word (str): The word to score.

    Returns:
        score (int): The resulting score.
    """
    if not word or len(word) == 0:
        return 0
    _len = len(word)
    # 20+
    if _len > 20:
        return 1
    # 15-20
    elif _len > 15 and _len <= 20:
        return 2
    # 1-4
    elif _len <= 4:
        return 3
    # 10-15
    elif _len >= 10 and _len <= 15:
        return 4
    # 5-10
    elif _len > 4 and _len < 10:
        return 5


def bounded(num, start, end):
    """Determine if a number is within the bounds of `start` and `end`.

    Args:
        num (int): An integer.
        start (int): A start minimum.
        end (int): An end maximum.

    Returns:
        is_bounded (bool): Whether number is bounded by start and end.
    """
    return num >= start and num <= end


def score_pronounceability(word):
    """Get the ratio of vowels to consonants, a very basic measurement.

    Half vowels and half consonants indicates a highly pronounceable word.
    For example, 0.5 / 0.5 = 1.0, so one is perfect, and lower is worse.

    The 1-5 scale translation:
    ---------------------------------------------------------------
    0.0   0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8   0.9   1.0
    ---------------------------------------------------------------
    0      1     2     3     4     5     4     3     2     1      5
    ---------------------------------------------------------------

    Args:
        word (string) - the name

    Returns:
        score (int) - the final pronounceability score
    """
    if not word or len(word) == 0:
        return 0
    word = re.sub(r'[^a-zA-Z0-9]', '', word)
    re_vowels = re.compile(r'[a|e|i|o|u]')
    re_cons = re.compile(r'[^a|e|i|o|u]')
    vowels = float(len(re.findall(re_vowels, word)))
    consonants = float(len(re.findall(re_cons, word)))
    if vowels is 0.0 or consonants is 0.0:
        return 0
    if vowels < consonants:
        ratio = vowels / consonants
    else:
        ratio = consonants / vowels
    if ratio == 0.0:
        return 0
    if ratio == 1.0:
        return 5
    if bounded(ratio, 0.0, 0.1) or bounded(ratio, 0.9, 1.0):
        return 1
    if bounded(ratio, 0.1, 0.2) or bounded(ratio, 0.8, 0.9):
        return 2
    if bounded(ratio, 0.2, 0.3) or bounded(ratio, 0.7, 0.8):
        return 3
    if bounded(ratio, 0.3, 0.4) or bounded(ratio, 0.6, 0.7):
        return 4
    if bounded(ratio, 0.4, 0.5) or bounded(ratio, 0.5, 0.6):
        return 5
    return 0


def score_simplicity(word):
    """Determine how simple the word is.

    Simple is defined as the number of separate words.
    In this case, higher is better, indicating a better score.

    Args:
        word (string) - the name

    Returns:
        score (int) - the final simplicity score

    >>> score_simplicity('the cat in the hat')
    >>> 1
    >>> score_simplicity('facebook')
    >>> 5
    """
    if not word or len(word) == 0:
        return 0
    word_count = len(re.split(r'[^a-z]', word))
    if word_count == 1:
        return 5
    if word_count < 3:
        return 4
    if word_count < 4:
        return 3
    if word_count < 5:
        return 2
    # After 4+ words, the name has a very poor score.
    return 1


def score_name_overall(word):
    """Score the name using separate scoring functions, then normalize to 100.

    This method gives an overall intuitive score.
    The closer to 100%, the better.

    Args:
        word (string) - the name

    Returns:
        score (float) - the final name score
    """
    length = score_length(word)
    pronounceability = score_pronounceability(word)
    simplicity = score_simplicity(word)
    _scores = sum([length, pronounceability, simplicity])
    score = round(_scores * 10)
    # cut off at 100%
    if score > 100:
        return 100
    return score


def score_names_overall(words):
    """Score all names.

    Args:
        words (list) - the list of words.

    Returns:
        words (list) - a list of tuples, with the score and word.
    """
    return [(score_name_overall(w), w) for w in words]


def generate_all_scoring(words):
    """Return all scoring methods for a set of words.

    Args:
        words (list) - the list of words.

    Returns:
        words (dict) - the scores, keyed by scoring name.
    """
    return {
        'dmetaphone': score_dmetaphone(words),
        'soundex': score_soundex(words),
        'nysiis': score_nysiis(words),
        'grade': score_names_overall(words)
    }


Push — master ( cc4906...fdc087 )

score_simplicity() C

Complexity

Size

Duplication

Importance

1. Missing Dependencies

2. Missing init.py files

1			"""Provides various scoring methods for word strength."""
2
3			import re
4
5			import fuzzy
			0 ignored issues – show Configuration introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The import `fuzzy` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
6
7			dmeta = fuzzy.DMetaphone()
8			soundex = fuzzy.Soundex(4)
9
10
11			def score_dmetaphone(words):
12			"""Score words using the double metaphone algorithm.
13
14			Args:
15			words (list) - the list of words.
16
17			Returns:
18			scores (list) - the scored words
19			"""
20			scores = []
21			for word in words:
22			res, output = dmeta(word)
23			scores.append('{0}:{1}:{2}'.format(word, res, output))
24			return scores
25
26
27			def score_soundex(words):
28			"""Score words using the soundex algorithm.
29
30			Args:
31			words (list) - the list of words.
32
33			Returns:
34			scores (list) - the scored words
35			"""
36			return ['{}: {}'.format(w.lower(), soundex(w)) for w in words]
37
38
39			def score_nysiis(words):
40			"""Score words using the nysiis algorithm.
41
42			Args:
43			words (list) - the list of words.
44
45			Returns:
46			scores (list) - the scored words
47			"""
48			return ['{}: {}'.format(w.lower(), fuzzy.nysiis(w)) for w in words]
49
50
51			def score_length(word):
52			"""Return a score, 1-5, of the length of the word.
53
54			Really long, or really short words get a lower score.
55			There is no hard science, but popular opinion suggests
56			that a word somewhere between 8-15 letters is optimal.
57
58			Args:
59			word (str): The word to score.
60
61			Returns:
62			score (int): The resulting score.
63			"""
64			if not word or len(word) == 0:
65			return 0
66			_len = len(word)
67			# 20+
68			if _len > 20:
69			return 1
70			# 15-20
71			elif _len > 15 and _len <= 20:
72			return 2
73			# 1-4
74			elif _len <= 4:
75			return 3
76			# 10-15
77			elif _len >= 10 and _len <= 15:
78			return 4
79			# 5-10
80			elif _len > 4 and _len < 10:
81			return 5
82
83
84			def bounded(num, start, end):
85			"""Determine if a number is within the bounds of `start` and `end`.
86
87			Args:
88			num (int): An integer.
89			start (int): A start minimum.
90			end (int): An end maximum.
91
92			Returns:
93			is_bounded (bool): Whether number is bounded by start and end.
94			"""
95			return num >= start and num <= end
96
97
98			def score_pronounceability(word):
99			"""Get the ratio of vowels to consonants, a very basic measurement.
100
101			Half vowels and half consonants indicates a highly pronounceable word.
102			For example, 0.5 / 0.5 = 1.0, so one is perfect, and lower is worse.
103
104			The 1-5 scale translation:
105			---------------------------------------------------------------
106			0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0
107			---------------------------------------------------------------
108			0 1 2 3 4 5 4 3 2 1 5
109			---------------------------------------------------------------
110
111			Args:
112			word (string) - the name
113
114			Returns:
115			score (int) - the final pronounceability score
116			"""
117			if not word or len(word) == 0:
118			return 0
119			word = re.sub(r'[^a-zA-Z0-9]', '', word)
120			re_vowels = re.compile(r'[a\|e\|i\|o\|u]')
121			re_cons = re.compile(r'[^a\|e\|i\|o\|u]')
122			vowels = float(len(re.findall(re_vowels, word)))
123			consonants = float(len(re.findall(re_cons, word)))
124			if vowels is 0.0 or consonants is 0.0:
125			return 0
126			if vowels < consonants:
127			ratio = vowels / consonants
128			else:
129			ratio = consonants / vowels
130			if ratio == 0.0:
131			return 0
132			if ratio == 1.0:
133			return 5
134			if bounded(ratio, 0.0, 0.1) or bounded(ratio, 0.9, 1.0):
135			return 1
136			if bounded(ratio, 0.1, 0.2) or bounded(ratio, 0.8, 0.9):
137			return 2
138			if bounded(ratio, 0.2, 0.3) or bounded(ratio, 0.7, 0.8):
139			return 3
140			if bounded(ratio, 0.3, 0.4) or bounded(ratio, 0.6, 0.7):
141			return 4
142			if bounded(ratio, 0.4, 0.5) or bounded(ratio, 0.5, 0.6):
143			return 5
144			return 0
145
146
147			def score_simplicity(word):
148			"""Determine how simple the word is.
149
150			Simple is defined as the number of separate words.
151			In this case, higher is better, indicating a better score.
152
153			Args:
154			word (string) - the name
155
156			Returns:
157			score (int) - the final simplicity score
158
159			>>> score_simplicity('the cat in the hat')
160			>>> 1
161			>>> score_simplicity('facebook')
162			>>> 5
163			"""
164			if not word or len(word) == 0:
165			return 0
166			word_count = len(re.split(r'[^a-z]', word))
167			if word_count == 1:
168			return 5
169			if word_count < 3:
170			return 4
171			if word_count < 4:
172			return 3
173			if word_count < 5:
174			return 2
175			# After 4+ words, the name has a very poor score.
176			return 1
177
178
179			def score_name_overall(word):
180			"""Score the name using separate scoring functions, then normalize to 100.
181
182			This method gives an overall intuitive score.
183			The closer to 100%, the better.
184
185			Args:
186			word (string) - the name
187
188			Returns:
189			score (float) - the final name score
190			"""
191			length = score_length(word)
192			pronounceability = score_pronounceability(word)
193			simplicity = score_simplicity(word)
194			_scores = sum([length, pronounceability, simplicity])
195			score = round(_scores * 10)
196			# cut off at 100%
197			if score > 100:
198			return 100
199			return score
200
201
202			def score_names_overall(words):
203			"""Score all names.
204
205			Args:
206			words (list) - the list of words.
207
208			Returns:
209			words (list) - a list of tuples, with the score and word.
210			"""
211			return [(score_name_overall(w), w) for w in words]
212
213
214			def generate_all_scoring(words):
215			"""Return all scoring methods for a set of words.
216
217			Args:
218			words (list) - the list of words.
219
220			Returns:
221			words (dict) - the scores, keyed by scoring name.
222			"""
223			return {
224			'dmetaphone': score_dmetaphone(words),
225			'soundex': score_soundex(words),
226			'nysiis': score_nysiis(words),
227			'grade': score_names_overall(words)
228			}
229

christabor / namebot

Push — master ( cc4906...fdc087 )

score_simplicity() C

Complexity

Size

Duplication

Importance

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files