remove_odd_sounding_words() - Code Metrics - christabor/namebot - Measure and Improve Code Quality continuously with Scrutinizer

remove_odd_sounding_words() B
last analyzed 2017-03-03 06:10 UTC

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	4
Bugs	0	Features	1

Metric	Value
cc	6
c	4
b	0
f	1
dl	0
loc	23
rs	7.6949

"""Helpers to normalize inputs and text."""

import re
import string
from collections import defaultdict

from nltk.corpus import stopwords
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from pattern.vector import PORTER
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
from pattern.vector import stem
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

import settings as namebot_settings
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

_regexes = namebot_settings.regexes


def flatten(lst):
    """Flatten a list with arbitrary levels of nesting.

    CREDIT: http://stackoverflow.com/questions/10823877/
        what-is-the-fastest-way-to-flatten-arbitrarily-nested-lists-in-python
    Changes made include:
        1. Adding error handling,
        2. Renaming variables,
        3. Using `any` instead of `or`.
    See http://creativecommons.org/licenses/by-sa/3.0/ for specific details.

    Args:
        lst (list): The nested list.

    Returns:
        (generator): The new flattened list of words.
    """
    if not isinstance(lst, list):
        yield []
    for i in lst:
        if any([isinstance(i, list), isinstance(i, tuple)]):
            for j in flatten(i):
                yield j
        else:
            yield i


def remove_odd_sounding_words(words):
    """Remove random odd sounding word combinations via regular expressions.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with words cleaned.
    """
    odd_regexes = [
        re.compile(r'^a|e|i|o|u|y{3,6}'),
        # bk, ck, dk, gk, etc...
        re.compile(r'\b[^aeiouys]k|zt|ksd|kd|zhr'),
        re.compile(r'\bzt|ksd|kd|zhr')
    ]
    cleaned = []
    if words is None or len(words) == 0:
        return words
    # Loop through any number of
    # regexes and add only if no matches exist
    [cleaned.append(word) for word in words if not any(

        re.match(regex, word) for regex in odd_regexes)]
    return cleaned


def stem_words(words):
    """Stem words to their base linguistic stem to remove redundancy.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with words stemmed.
    """
    return [stem(word, stemmer=PORTER) for word in words]


def remove_stop_words(words):
    """Remove all stop words.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with stopwords removed.
    """
    # http://stackoverflow.com/questions/5486337/
    # how-to-remove-stop-words-using-nltk-or-python
    return [w for w in words if w.lower() not in stopwords.words('english')]


def remove_bad_words(words):
    """Remove naughty words that might come from wordnet synsets and lemmata.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with bad words removed.
    """
    bad_words = ["nigger", "wop",
                 "kike", "faggot",
                 "fuck", "pussy", "cunt"]
    return [word for word in words if word.lower() not in bad_words]


def filter_words(words):
    """Filter words by default min/max settings in the settings module.

    Args:
        words (list): The list of words

    Returns:
        list: The filtered words
    """
    new_arr = []
    for word in words:
        if not re.search(' ', word):
            lte = len(word) <= namebot_settings.MAX_LENGTH
            gte = len(word) >= namebot_settings.MIN_LENGTH
            if all([lte, gte]):
                new_arr.append(word)
        elif re.search(' ', word):
            split = re.split(' ', word)
            split_join = []
            for chunks in split:
                length = len(chunks)
                lte = length <= namebot_settings.SPACED_MAX_LENGTH
                gte = length >= namebot_settings.MIN_LENGTH
                if all([lte, gte]):
                    split_join.append(chunks)
            new_arr.append(
                ' '.join(split_join))
    return new_arr


def uniquify(words):
    """Remove duplicates from a list.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with duplicates removed.
    """
    return {}.fromkeys(words).keys() if words is not None else words


def clean_sort(words):
    """A function for cleaning and prepping words for techniques.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with words cleaned and sorted.
    """
    if isinstance(words, basestring):

        return words
    chars = '!"#$%\'()*+,._/:;<=>?@[\\]^`{|}~01234567890'
    if words is not None:
        try:
            words = [word.strip().lower().translate(
                string.maketrans('', ''),

                chars) for word in words if len(word) > 1]
        except TypeError:
try:
    raises_exception()
except:  # Could be removed
    pass
            pass
    return words


def chop_duplicate_ends(word):
    """Remove duplicate letters on either end, if the are adjacent.

    Args:
        words (list): The list of words

    Returns:
        list: An updated word list with duplicate ends removed for each word.
    """
    if word[0] == word[1]:
        word = word[1:]
    if word[-2:-1] == word[-1:]:
        word = word[:-1]
    return word


def key_words_by_pos_tag(words):
    """Key words by the pos tag name, given when using pos_tag on a list.

    Args:
        words (list): The list of words, where each item is a 2-tuple.

    Returns:
        dict: An updated dictionary keyed by pos tag,
            with values as a list of matching pos matching words.
    """
    alltags = defaultdict(list)
    for word, pos in words:
        alltags[pos].append(word)
    return alltags


remove_odd_sounding_words() B
last analyzed 2017-03-03 06:10 UTC

Complexity

Size

Duplication

Importance

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1			"""Helpers to normalize inputs and text."""
2
3			import re
4			import string
5			from collections import defaultdict
6
7			from nltk.corpus import stopwords
			0 ignored issues – show Configuration introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The import `nltk.corpus` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
8
9			from pattern.vector import PORTER
			0 ignored issues – show Configuration introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The import `pattern.vector` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
10			from pattern.vector import stem
			0 ignored issues – show Configuration introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The import `pattern.vector` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
11
12			import settings as namebot_settings
			0 ignored issues – show Configuration introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The import `settings` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
13
14			_regexes = namebot_settings.regexes
15
16
17			def flatten(lst):
18			"""Flatten a list with arbitrary levels of nesting.
19
20			CREDIT: http://stackoverflow.com/questions/10823877/
21			what-is-the-fastest-way-to-flatten-arbitrarily-nested-lists-in-python
22			Changes made include:
23			1. Adding error handling,
24			2. Renaming variables,
25			3. Using `any` instead of `or`.
26			See http://creativecommons.org/licenses/by-sa/3.0/ for specific details.
27
28			Args:
29			lst (list): The nested list.
30
31			Returns:
32			(generator): The new flattened list of words.
33			"""
34			if not isinstance(lst, list):
35			yield []
36			for i in lst:
37			if any([isinstance(i, list), isinstance(i, tuple)]):
38			for j in flatten(i):
39			yield j
40			else:
41			yield i
42
43
44			def remove_odd_sounding_words(words):
45			"""Remove random odd sounding word combinations via regular expressions.
46
47			Args:
48			words (list): The list of words
49
50			Returns:
51			list: An updated word list with words cleaned.
52			"""
53			odd_regexes = [
54			re.compile(r'^a\|e\|i\|o\|u\|y{3,6}'),
55			# bk, ck, dk, gk, etc...
56			re.compile(r'\b[^aeiouys]k\|zt\|ksd\|kd\|zhr'),
57			re.compile(r'\bzt\|ksd\|kd\|zhr')
58			]
59			cleaned = []
60			if words is None or len(words) == 0:
61			return words
62			# Loop through any number of
63			# regexes and add only if no matches exist
64			[cleaned.append(word) for word in words if not any(
			0 ignored issues – show Unused Code Bug introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The expression `[cleaned.append(word) fo...regex in odd_regexes))]` does not seem to have sideeffects and its result is not used. If a expression has no sideeffects (any lasting effect after it has been called) and its return value is not used, this usually means that this code can be removed or that an assignment is missing. Loading history...
65			re.match(regex, word) for regex in odd_regexes)]
66			return cleaned
67
68
69			def stem_words(words):
70			"""Stem words to their base linguistic stem to remove redundancy.
71
72			Args:
73			words (list): The list of words
74
75			Returns:
76			list: An updated word list with words stemmed.
77			"""
78			return [stem(word, stemmer=PORTER) for word in words]
79
80
81			def remove_stop_words(words):
82			"""Remove all stop words.
83
84			Args:
85			words (list): The list of words
86
87			Returns:
88			list: An updated word list with stopwords removed.
89			"""
90			# http://stackoverflow.com/questions/5486337/
91			# how-to-remove-stop-words-using-nltk-or-python
92			return [w for w in words if w.lower() not in stopwords.words('english')]
93
94
95			def remove_bad_words(words):
96			"""Remove naughty words that might come from wordnet synsets and lemmata.
97
98			Args:
99			words (list): The list of words
100
101			Returns:
102			list: An updated word list with bad words removed.
103			"""
104			bad_words = ["nigger", "wop",
105			"kike", "faggot",
106			"fuck", "pussy", "cunt"]
107			return [word for word in words if word.lower() not in bad_words]
108
109
110			def filter_words(words):
111			"""Filter words by default min/max settings in the settings module.
112
113			Args:
114			words (list): The list of words
115
116			Returns:
117			list: The filtered words
118			"""
119			new_arr = []
120			for word in words:
121			if not re.search(' ', word):
122			lte = len(word) <= namebot_settings.MAX_LENGTH
123			gte = len(word) >= namebot_settings.MIN_LENGTH
124			if all([lte, gte]):
125			new_arr.append(word)
126			elif re.search(' ', word):
127			split = re.split(' ', word)
128			split_join = []
129			for chunks in split:
130			length = len(chunks)
131			lte = length <= namebot_settings.SPACED_MAX_LENGTH
132			gte = length >= namebot_settings.MIN_LENGTH
133			if all([lte, gte]):
134			split_join.append(chunks)
135			new_arr.append(
136			' '.join(split_join))
137			return new_arr
138
139
140			def uniquify(words):
141			"""Remove duplicates from a list.
142
143			Args:
144			words (list): The list of words
145
146			Returns:
147			list: An updated word list with duplicates removed.
148			"""
149			return {}.fromkeys(words).keys() if words is not None else words
150
151
152			def clean_sort(words):
153			"""A function for cleaning and prepping words for techniques.
154
155			Args:
156			words (list): The list of words
157
158			Returns:
159			list: An updated word list with words cleaned and sorted.
160			"""
161			if isinstance(words, basestring):
			0 ignored issues – show Comprehensibility Best Practice introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report Undefined variable 'basestring' Loading history...
162			return words
163			chars = '!"#$%\'()*+,._/:;<=>?@[\\]^`{\|}~01234567890'
164			if words is not None:
165			try:
166			words = [word.strip().lower().translate(
167			string.maketrans('', ''),
			0 ignored issues – show Bug introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report The Module `string` does not seem to have a member named `maketrans`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
168			chars) for word in words if len(word) > 1]
169			except TypeError:
			0 ignored issues – show Unused Code introduced 2016-03-10 08:51 UTC by Report Bug Copy Issue Report This except handler seems to be unused and could be removed. Except handlers which only contain `pass` and do not have an `else` clause can usually simply be removed: try: raises_exception() except: # Could be removed pass Loading history...
170			pass
171			return words
172
173
174			def chop_duplicate_ends(word):
175			"""Remove duplicate letters on either end, if the are adjacent.
176
177			Args:
178			words (list): The list of words
179
180			Returns:
181			list: An updated word list with duplicate ends removed for each word.
182			"""
183			if word[0] == word[1]:
184			word = word[1:]
185			if word[-2:-1] == word[-1:]:
186			word = word[:-1]
187			return word
188
189
190			def key_words_by_pos_tag(words):
191			"""Key words by the pos tag name, given when using pos_tag on a list.
192
193			Args:
194			words (list): The list of words, where each item is a 2-tuple.
195
196			Returns:
197			dict: An updated dictionary keyed by pos tag,
198			with values as a list of matching pos matching words.
199			"""
200			alltags = defaultdict(list)
201			for word, pos in words:
202			alltags[pos].append(word)
203			return alltags
204

christabor / namebot

remove_odd_sounding_words() B last analyzed 2017-03-03 06:10 UTC

Complexity

Size

Duplication

Importance

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

remove_odd_sounding_words() B
last analyzed 2017-03-03 06:10 UTC

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files