clean_sort() - Code Metrics - Inspection of "Update README.md" - christabor/namebot - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( cc4906...fdc087 )

by Chris

created 2016-05-14 21:26 UTC

clean_sort() B

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	5
Bugs	0	Features	1

Metric	Value
cc	6
c	5
b	0
f	1
dl	0
loc	20
rs	8

"""Helpers to normalize inputs and text."""

import re
import string
from collections import defaultdict

from nltk.corpus import stopwords
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from pattern.vector import PORTER
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
from pattern.vector import stem
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

import settings as namebot_settings
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3


def flatten(lst):
    """Flatten a list with arbitrary levels of nesting.

    CREDIT: http://stackoverflow.com/questions/10823877/
        what-is-the-fastest-way-to-flatten-arbitrarily-nested-lists-in-python
    Changes made include:
        1. Adding error handling,
        2. Renaming variables,
        3. Using `any` instead of `or`.
    See http://creativecommons.org/licenses/by-sa/3.0/ for specific details.

    Args:
        lst (list): The nested list.

    Returns:
        (generator): The new flattened list of words.
    """
    if not isinstance(lst, list):
        yield []
    for i in lst:
        if any([isinstance(i, list), isinstance(i, tuple)]):
            for j in flatten(i):
                yield j
        else:
            yield i


def remove_odd_sounding_words(words):
    """Remove random odd sounding word combinations via regular expressions.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with words cleaned.
    """
    odd_regexes = [
        re.compile(r'^a|e|i|o|u|y{3,6}'),
        # bk, ck, dk, gk, etc...
        re.compile(r'\b[^aeiouys]k|zt|ksd|kd|zhr'),
        re.compile(r'\bzt|ksd|kd|zhr')
    ]
    cleaned = []
    if words is None or len(words) == 0:
        return words
    # Loop through any number of
    # regexes and add only if no matches exist
    [cleaned.append(word) for word in words if not any(

        re.match(regex, word) for regex in odd_regexes)]
    return cleaned


def stem_words(words):
    """Stem words to their base linguistic stem to remove redundancy.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with words stemmed.
    """
    new = []
    for val in words:
        val = stem(val, stemmer=PORTER)
        new.append(val)
    return new


def remove_stop_words(words):
    """Remove all stop words.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with stopwords removed.
    """
    stop_words = stopwords.words('english')
    # http://stackoverflow.com/questions/5486337/
    # how-to-remove-stop-words-using-nltk-or-python
    newdata = [w for w in words if w.lower() not in stop_words]
    # newdata = set(stopwords.words('english'))
    return newdata


def remove_bad_words(words):
    """Remove naughty words that might come from wordnet synsets and lemmata.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with bad words removed.
    """
    bad_words = ["nigger", "wop",
                 "kike", "faggot",
                 "fuck", "pussy", "cunt"]

    newdata = [word for word in words if word.lower() not in bad_words]
    return newdata


def filter_words(words):
    """Filter words by default min/max settings in the settings module.

    Args:
        words (list): The list of words

    Returns:
        list: The filtered words
    """
    new_arr = []
    for word in words:
        if not re.search(' ', word):
            lte = len(word) <= namebot_settings.MAX_LENGTH
            gte = len(word) >= namebot_settings.MIN_LENGTH
            if all([lte, gte]):
                new_arr.append(word)
        elif re.search(' ', word):
            split = re.split(' ', word)
            split_join = []
            for chunks in split:
                length = len(chunks)
                lte = length <= namebot_settings.SPACED_MAX_LENGTH
                gte = length >= namebot_settings.MIN_LENGTH
                if all([lte, gte]):
                    split_join.append(chunks)
            new_arr.append(
                ' '.join(split_join))
    return new_arr


def uniquify(words):
    """Remove duplicates from a list.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with duplicates removed.
    """
    return {}.fromkeys(words).keys() if words is not None else words


def clean_sort(words):
    """A function for cleaning and prepping words for techniques.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with words cleaned and sorted.
    """
    if isinstance(words, basestring):

        return words
    chars = '!"#$%\'()*+,._/:;<=>?@[\\]^`{|}~01234567890'
    if words is not None:
        try:
            words = [word.strip().lower().translate(
                string.maketrans('', ''),

                chars) for word in words if len(word) > 1]
        except TypeError:
try:
    raises_exception()
except:  # Could be removed
    pass
            pass
    return words


def chop_duplicate_ends(word):
    """Remove duplicate letters on either end, if the are adjacent.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with duplicate ends removed for each word.
    """
    if word[0] == word[1]:
        word = word[1:]
    if word[-2:-1] == word[-1:]:
        word = word[:-1]
    return word


def key_words_by_pos_tag(words):
    """Key words by the pos tag name, given when using pos_tag on a list.

    Args:
        words (list): The list of words, where each item is a 2-tuple.

    Returns:
        dict: An updated dictionary keyed by pos tag,
            with values as a list of matching pos matching words.
    """
    alltags = defaultdict(list)
    for word, pos in words:
        alltags[pos].append(word)
    return alltags


Push — master ( cc4906...fdc087 )

clean_sort() B

Complexity

Size

Duplication

Importance

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1			"""Helpers to normalize inputs and text."""
2
3			import re
4			import string
5			from collections import defaultdict
6
7			from nltk.corpus import stopwords
			0 ignored issues – show Configuration introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The import `nltk.corpus` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
8
9			from pattern.vector import PORTER
			0 ignored issues – show Configuration introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The import `pattern.vector` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
10			from pattern.vector import stem
			0 ignored issues – show Configuration introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The import `pattern.vector` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
11
12			import settings as namebot_settings
			0 ignored issues – show Configuration introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The import `settings` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
13
14
15			def flatten(lst):
16			"""Flatten a list with arbitrary levels of nesting.
17
18			CREDIT: http://stackoverflow.com/questions/10823877/
19			what-is-the-fastest-way-to-flatten-arbitrarily-nested-lists-in-python
20			Changes made include:
21			1. Adding error handling,
22			2. Renaming variables,
23			3. Using `any` instead of `or`.
24			See http://creativecommons.org/licenses/by-sa/3.0/ for specific details.
25
26			Args:
27			lst (list): The nested list.
28
29			Returns:
30			(generator): The new flattened list of words.
31			"""
32			if not isinstance(lst, list):
33			yield []
34			for i in lst:
35			if any([isinstance(i, list), isinstance(i, tuple)]):
36			for j in flatten(i):
37			yield j
38			else:
39			yield i
40
41
42			def remove_odd_sounding_words(words):
43			"""Remove random odd sounding word combinations via regular expressions.
44
45			Args:
46			words (list): The list of words
47
48			Returns:
49			list: An updated word list with words cleaned.
50			"""
51			odd_regexes = [
52			re.compile(r'^a\|e\|i\|o\|u\|y{3,6}'),
53			# bk, ck, dk, gk, etc...
54			re.compile(r'\b[^aeiouys]k\|zt\|ksd\|kd\|zhr'),
55			re.compile(r'\bzt\|ksd\|kd\|zhr')
56			]
57			cleaned = []
58			if words is None or len(words) == 0:
59			return words
60			# Loop through any number of
61			# regexes and add only if no matches exist
62			[cleaned.append(word) for word in words if not any(
			0 ignored issues – show Unused Code Bug introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The expression `[cleaned.append(word) fo...regex in odd_regexes))]` does not seem to have sideeffects and its result is not used. If a expression has no sideeffects (any lasting effect after it has been called) and its return value is not used, this usually means that this code can be removed or that an assignment is missing. Loading history...
63			re.match(regex, word) for regex in odd_regexes)]
64			return cleaned
65
66
67			def stem_words(words):
68			"""Stem words to their base linguistic stem to remove redundancy.
69
70			Args:
71			words (list): The list of words
72
73			Returns:
74			list: An updated word list with words stemmed.
75			"""
76			new = []
77			for val in words:
78			val = stem(val, stemmer=PORTER)
79			new.append(val)
80			return new
81
82
83			def remove_stop_words(words):
84			"""Remove all stop words.
85
86			Args:
87			words (list): The list of words
88
89			Returns:
90			list: An updated word list with stopwords removed.
91			"""
92			stop_words = stopwords.words('english')
93			# http://stackoverflow.com/questions/5486337/
94			# how-to-remove-stop-words-using-nltk-or-python
95			newdata = [w for w in words if w.lower() not in stop_words]
96			# newdata = set(stopwords.words('english'))
97			return newdata
98
99
100			def remove_bad_words(words):
101			"""Remove naughty words that might come from wordnet synsets and lemmata.
102
103			Args:
104			words (list): The list of words
105
106			Returns:
107			list: An updated word list with bad words removed.
108			"""
109			bad_words = ["nigger", "wop",
110			"kike", "faggot",
111			"fuck", "pussy", "cunt"]
112
113			newdata = [word for word in words if word.lower() not in bad_words]
114			return newdata
115
116
117			def filter_words(words):
118			"""Filter words by default min/max settings in the settings module.
119
120			Args:
121			words (list): The list of words
122
123			Returns:
124			list: The filtered words
125			"""
126			new_arr = []
127			for word in words:
128			if not re.search(' ', word):
129			lte = len(word) <= namebot_settings.MAX_LENGTH
130			gte = len(word) >= namebot_settings.MIN_LENGTH
131			if all([lte, gte]):
132			new_arr.append(word)
133			elif re.search(' ', word):
134			split = re.split(' ', word)
135			split_join = []
136			for chunks in split:
137			length = len(chunks)
138			lte = length <= namebot_settings.SPACED_MAX_LENGTH
139			gte = length >= namebot_settings.MIN_LENGTH
140			if all([lte, gte]):
141			split_join.append(chunks)
142			new_arr.append(
143			' '.join(split_join))
144			return new_arr
145
146
147			def uniquify(words):
148			"""Remove duplicates from a list.
149
150			Args:
151			words (list): The list of words
152
153			Returns:
154			list: An updated word list with duplicates removed.
155			"""
156			return {}.fromkeys(words).keys() if words is not None else words
157
158
159			def clean_sort(words):
160			"""A function for cleaning and prepping words for techniques.
161
162			Args:
163			words (list): The list of words
164
165			Returns:
166			list: An updated word list with words cleaned and sorted.
167			"""
168			if isinstance(words, basestring):
			0 ignored issues – show Comprehensibility Best Practice introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report Undefined variable 'basestring' Loading history...
169			return words
170			chars = '!"#$%\'()*+,._/:;<=>?@[\\]^`{\|}~01234567890'
171			if words is not None:
172			try:
173			words = [word.strip().lower().translate(
174			string.maketrans('', ''),
			0 ignored issues – show Bug introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The Module `string` does not seem to have a member named `maketrans`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
175			chars) for word in words if len(word) > 1]
176			except TypeError:
			0 ignored issues – show Unused Code introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report This except handler seems to be unused and could be removed. Except handlers which only contain `pass` and do not have an `else` clause can usually simply be removed: try: raises_exception() except: # Could be removed pass Loading history...
177			pass
178			return words
179
180
181			def chop_duplicate_ends(word):
182			"""Remove duplicate letters on either end, if the are adjacent.
183
184			Args:
185			words (list): The list of words
186
187			Returns:
188			list: An updated word list with duplicate ends removed for each word.
189			"""
190			if word[0] == word[1]:
191			word = word[1:]
192			if word[-2:-1] == word[-1:]:
193			word = word[:-1]
194			return word
195
196
197			def key_words_by_pos_tag(words):
198			"""Key words by the pos tag name, given when using pos_tag on a list.
199
200			Args:
201			words (list): The list of words, where each item is a 2-tuple.
202
203			Returns:
204			dict: An updated dictionary keyed by pos tag,
205			with values as a list of matching pos matching words.
206			"""
207			alltags = defaultdict(list)
208			for word, pos in words:
209			alltags[pos].append(word)
210			return alltags
211

christabor / namebot

Push — master ( cc4906...fdc087 )

clean_sort() B

Complexity

Size

Duplication

Importance

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files