namebot.flatten() - Code Metrics - Inspection of "Add new backronym technique and helper functions,..." - christabor/namebot - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 533d77...907c05 )

by Chris

created 2016-03-23 22:34 UTC

namebot.flatten() B

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
cc	5
dl	0
loc	25
rs	8.0894

"""Helpers to normalize inputs and text."""

import re
import string
from collections import defaultdict

from nltk.corpus import stopwords
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from pattern.vector import PORTER
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
from pattern.vector import stem
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

import settings as namebot_settings
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3


def flatten(lst):
    """Flatten a list with arbitrary levels of nesting.

    CREDIT: http://stackoverflow.com/questions/10823877/
        what-is-the-fastest-way-to-flatten-arbitrarily-nested-lists-in-python
    Changes made include:
        1. Adding error handling,
        2. Renaming variables,
        3. Using `any` instead of `or`.
    See http://creativecommons.org/licenses/by-sa/3.0/ for specific details.

    Args:
        lst (list): The nested list.

    Returns:
        (generator): The new flattened list of words.
    """
    if not isinstance(lst, list):
        yield []
    for i in lst:
        if any([isinstance(i, list), isinstance(i, tuple)]):
            for j in flatten(i):
                yield j
        else:
            yield i


def remove_odd_sounding_words(words):
    """Remove random odd sounding word combinations via regular expressions.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with words cleaned.
    """
    odd_regexes = [
        re.compile(r'^a|e|i|o|u|y{3,6}'),
        # bk, ck, dk, gk, etc...
        re.compile(r'\b[^aeiouys]k|zt|ksd|kd|zhr'),
        re.compile(r'\bzt|ksd|kd|zhr')
    ]
    cleaned = []
    if words is None or len(words) == 0:
        return words
    # Loop through any number of
    # regexes and add only if no matches exist
    [cleaned.append(word) for word in words if not any(

        re.match(regex, word) for regex in odd_regexes)]
    return cleaned


def stem_words(words):
    """Stem words to their base linguistic stem to remove redundancy.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with words stemmed.
    """
    new = []
    for val in words:
        val = stem(val, stemmer=PORTER)
        new.append(val)
    return new


def remove_stop_words(words):
    """Remove all stop words.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with stopwords removed.
    """
    stop_words = stopwords.words('english')
    # http://stackoverflow.com/questions/5486337/
    # how-to-remove-stop-words-using-nltk-or-python
    newdata = [w for w in words if w.lower() not in stop_words]
    # newdata = set(stopwords.words('english'))
    return newdata


def remove_bad_words(words):
    """Remove naughty words that might come from wordnet synsets and lemmata.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with bad words removed.
    """
    bad_words = ["nigger", "wop",
                 "kike", "faggot",
                 "fuck", "pussy", "cunt"]

    newdata = [word for word in words if word.lower() not in bad_words]
    return newdata


def filter_words(words):
    """Filter words by default min/max settings in the settings module.

    Args:
        words (list): The list of words

    Returns:
        list: The filtered words
    """
    new_arr = []
    for word in words:
        if not re.search(' ', word):
            if len(word) <= namebot_settings.MAX_LENGTH and \
                    len(word) >= namebot_settings.MIN_LENGTH:
                        new_arr.append(word)


        elif re.search(' ', word):
            split = re.split(' ', word)
            split_join = []
            for chunks in split:
                length = len(chunks)
                if length <= namebot_settings.SPACED_MAX_LENGTH and \
                        length >= namebot_settings.MIN_LENGTH:
                            split_join.append(chunks)


            new_arr.append(
                ' '.join(split_join))
    return new_arr


def uniquify(words):
    """Remove duplicates from a list.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with duplicates removed.
    """
    if words is not None:
        return {}.fromkeys(words).keys()
    else:
        return words


def clean_sort(words):
    """A function for cleaning and prepping words for techniques.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with words cleaned and sorted.
    """
    if isinstance(words, basestring):

        return words
    chars = '!"#$%\'()*+,._/:;<=>?@[\\]^`{|}~01234567890'
    if words is not None:
        try:
            words = [word.strip().lower().translate(
                string.maketrans('', ''),

                chars) for word in words if len(word) > 1]
        except TypeError:
try:
    raises_exception()
except:  # Could be removed
    pass
            pass
    return words


def chop_duplicate_ends(word):
    """Remove duplicate letters on either end, if the are adjacent.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with duplicate ends removed for each word.
    """
    if word[0] == word[1]:
        word = word[1:]
    if word[-2:-1] == word[-1:]:
        word = word[:-1]
    return word


def key_words_by_pos_tag(words):
    """Key words by the pos tag name, given when using pos_tag on a list.

    Args:
        words (list): The list of words, where each item is a 2-tuple.

    Returns:
        dict: An updated dictionary keyed by pos tag,
            with values as a list of matching pos matching words.
    """
    alltags = defaultdict(list)
    for word, pos in words:
        alltags[pos].append(word)
    return alltags


Push — master ( 533d77...907c05 )

namebot.flatten() B

Complexity

Size

Duplication

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1			"""Helpers to normalize inputs and text."""
2
3			import re
4			import string
5			from collections import defaultdict
6
7			from nltk.corpus import stopwords
			0 ignored issues – show Configuration introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The import `nltk.corpus` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
8
9			from pattern.vector import PORTER
			0 ignored issues – show Configuration introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The import `pattern.vector` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
10			from pattern.vector import stem
			0 ignored issues – show Configuration introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The import `pattern.vector` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
11
12			import settings as namebot_settings
			0 ignored issues – show Configuration introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The import `settings` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
13
14
15			def flatten(lst):
16			"""Flatten a list with arbitrary levels of nesting.
17
18			CREDIT: http://stackoverflow.com/questions/10823877/
19			what-is-the-fastest-way-to-flatten-arbitrarily-nested-lists-in-python
20			Changes made include:
21			1. Adding error handling,
22			2. Renaming variables,
23			3. Using `any` instead of `or`.
24			See http://creativecommons.org/licenses/by-sa/3.0/ for specific details.
25
26			Args:
27			lst (list): The nested list.
28
29			Returns:
30			(generator): The new flattened list of words.
31			"""
32			if not isinstance(lst, list):
33			yield []
34			for i in lst:
35			if any([isinstance(i, list), isinstance(i, tuple)]):
36			for j in flatten(i):
37			yield j
38			else:
39			yield i
40
41
42			def remove_odd_sounding_words(words):
43			"""Remove random odd sounding word combinations via regular expressions.
44
45			Args:
46			words (list): The list of words
47
48			Returns:
49			list: An updated word list with words cleaned.
50			"""
51			odd_regexes = [
52			re.compile(r'^a\|e\|i\|o\|u\|y{3,6}'),
53			# bk, ck, dk, gk, etc...
54			re.compile(r'\b[^aeiouys]k\|zt\|ksd\|kd\|zhr'),
55			re.compile(r'\bzt\|ksd\|kd\|zhr')
56			]
57			cleaned = []
58			if words is None or len(words) == 0:
59			return words
60			# Loop through any number of
61			# regexes and add only if no matches exist
62			[cleaned.append(word) for word in words if not any(
			0 ignored issues – show Unused Code Bug introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The expression `[cleaned.append(word) fo...regex in odd_regexes))]` does not seem to have sideeffects and its result is not used. If a expression has no sideeffects (any lasting effect after it has been called) and its return value is not used, this usually means that this code can be removed or that an assignment is missing. Loading history...
63			re.match(regex, word) for regex in odd_regexes)]
64			return cleaned
65
66
67			def stem_words(words):
68			"""Stem words to their base linguistic stem to remove redundancy.
69
70			Args:
71			words (list): The list of words
72
73			Returns:
74			list: An updated word list with words stemmed.
75			"""
76			new = []
77			for val in words:
78			val = stem(val, stemmer=PORTER)
79			new.append(val)
80			return new
81
82
83			def remove_stop_words(words):
84			"""Remove all stop words.
85
86			Args:
87			words (list): The list of words
88
89			Returns:
90			list: An updated word list with stopwords removed.
91			"""
92			stop_words = stopwords.words('english')
93			# http://stackoverflow.com/questions/5486337/
94			# how-to-remove-stop-words-using-nltk-or-python
95			newdata = [w for w in words if w.lower() not in stop_words]
96			# newdata = set(stopwords.words('english'))
97			return newdata
98
99
100			def remove_bad_words(words):
101			"""Remove naughty words that might come from wordnet synsets and lemmata.
102
103			Args:
104			words (list): The list of words
105
106			Returns:
107			list: An updated word list with bad words removed.
108			"""
109			bad_words = ["nigger", "wop",
110			"kike", "faggot",
111			"fuck", "pussy", "cunt"]
112
113			newdata = [word for word in words if word.lower() not in bad_words]
114			return newdata
115
116
117			def filter_words(words):
118			"""Filter words by default min/max settings in the settings module.
119
120			Args:
121			words (list): The list of words
122
123			Returns:
124			list: The filtered words
125			"""
126			new_arr = []
127			for word in words:
128			if not re.search(' ', word):
129			if len(word) <= namebot_settings.MAX_LENGTH and \
130			len(word) >= namebot_settings.MIN_LENGTH:
131			new_arr.append(word)
			0 ignored issues – show Coding Style introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The indentation here looks off. 16 spaces were expected, but 24 were found. Loading history...
132
133			elif re.search(' ', word):
134			split = re.split(' ', word)
135			split_join = []
136			for chunks in split:
137			length = len(chunks)
138			if length <= namebot_settings.SPACED_MAX_LENGTH and \
139			length >= namebot_settings.MIN_LENGTH:
140			split_join.append(chunks)
			0 ignored issues – show Coding Style introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The indentation here looks off. 20 spaces were expected, but 28 were found. Loading history...
141
142			new_arr.append(
143			' '.join(split_join))
144			return new_arr
145
146
147			def uniquify(words):
148			"""Remove duplicates from a list.
149
150			Args:
151			words (list): The list of words
152
153			Returns:
154			list: An updated word list with duplicates removed.
155			"""
156			if words is not None:
157			return {}.fromkeys(words).keys()
158			else:
159			return words
160
161
162			def clean_sort(words):
163			"""A function for cleaning and prepping words for techniques.
164
165			Args:
166			words (list): The list of words
167
168			Returns:
169			list: An updated word list with words cleaned and sorted.
170			"""
171			if isinstance(words, basestring):
			0 ignored issues – show Comprehensibility Best Practice introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report Undefined variable 'basestring' Loading history...
172			return words
173			chars = '!"#$%\'()*+,._/:;<=>?@[\\]^`{\|}~01234567890'
174			if words is not None:
175			try:
176			words = [word.strip().lower().translate(
177			string.maketrans('', ''),
			0 ignored issues – show Bug introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The Module `string` does not seem to have a member named `maketrans`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
178			chars) for word in words if len(word) > 1]
179			except TypeError:
			0 ignored issues – show Unused Code introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report This except handler seems to be unused and could be removed. Except handlers which only contain `pass` and do not have an `else` clause can usually simply be removed: try: raises_exception() except: # Could be removed pass Loading history...
180			pass
181			return words
182
183
184			def chop_duplicate_ends(word):
185			"""Remove duplicate letters on either end, if the are adjacent.
186
187			Args:
188			words (list): The list of words
189
190			Returns:
191			list: An updated word list with duplicate ends removed for each word.
192			"""
193			if word[0] == word[1]:
194			word = word[1:]
195			if word[-2:-1] == word[-1:]:
196			word = word[:-1]
197			return word
198
199
200			def key_words_by_pos_tag(words):
201			"""Key words by the pos tag name, given when using pos_tag on a list.
202
203			Args:
204			words (list): The list of words, where each item is a 2-tuple.
205
206			Returns:
207			dict: An updated dictionary keyed by pos tag,
208			with values as a list of matching pos matching words.
209			"""
210			alltags = defaultdict(list)
211			for word, pos in words:
212			alltags[pos].append(word)
213			return alltags
214

christabor / namebot

Push — master ( 533d77...907c05 )

namebot.flatten() B

Complexity

Size

Duplication

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files