namebot.make_misspelling() - Code Metrics - Inspection of "Fix more pep docstring errors." - christabor/namebot - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 32a249...fa4dad )

by Chris

created 2016-03-07 06:16 UTC

namebot.make_misspelling() A

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
cc	3
dl	0
loc	71
rs	9.1369

How to fix Long Method

"""Primary techniques for the core functionality of namebot."""

from __future__ import absolute_import
from __future__ import division

from random import choice
from string import ascii_uppercase
from collections import defaultdict
import re
import nltk

from . import settings as namebot_settings
from . import normalization


_prefixes = namebot_settings.PREFIXES
_suffixes = namebot_settings.SUFFIXES
_alphabet = namebot_settings.ALPHABET
_consonants = namebot_settings.CONSONANTS
_vowels = namebot_settings.VOWELS
_regexes = namebot_settings.regexes


class InsufficientWordsError(Exception):
    def __init__(self, msg):
        self.msg = msg


def domainify(words, tld='com'):
    """Convert words into a domain format for testing domains.

    Args:
        words (list): List of words
        tld (str, optional): The TLD (top-level domain) to use.

    Returns:
        list: The modified list of words.
    """
    _words = []
    if tld.startswith('.'):
        tld = tld.replace('.', '')
    for word in words:
        if word.endswith(tld) and tld != '':
            word = word.replace(tld, '.{}'.format(tld))
        _words.append(word)
    return _words


def spoonerism(words):
    """Convert a list of words formatted with the spoonerism technique.

    Args:
        words (list) - The list of words to operate on

    Returns:
        words (list) - The updated list of words

    >>> spoonerism(['foo', 'bar'])
    >>> ['boo', 'far']
    """
    "First: [f]oo [b]ar => boo far"
    new_words = []
    if len(words) < 2:
        raise InsufficientWordsError('Need more than one word to combine')
    for k, word in enumerate(words):
        try:
            new_words.append('{}{} {}{}'.format(
                words[k + 1][0],  # 2nd word, 1st letter
                word[1:],  # 1st word, 2nd letter to end
                word[0],  # 1st word, 1st letter
                words[k + 1][1:]))  # 2nd word, 2nd letter to end
        except IndexError:
            continue
    return new_words


def kniferism(words):
    """Convert a list of words formatted with the kniferism technique.

    Args:
        words (list) - The list of words to operate on

    Returns:
        words (list) - The updated list of words

    >>> kniferism(['foo', 'bar'])
    >>> ['fao', 'bor']
    """
    "Mid: f[o]o b[a]r => fao bor"
    if len(words) < 2:
        raise InsufficientWordsError('Need more than one word to combine')
    new_words = []
    for k, word in enumerate(words):
        try:
            middle_second = int(len(words[k + 1]) / 2)
            middle_first = int(len(word) / 2)
            new_words.append('{}{}{} {}{}{}'.format(
                word[:middle_first],
                words[k + 1][middle_second],
                word[middle_first + 1:],
                words[k + 1][:middle_second],
                word[middle_first],
                words[k + 1][middle_second + 1:]))
        except IndexError:
            continue
    return new_words


def forkerism(words):
    """Convert a list of words formatted with the forkerism technique.

    Args:
        words (list) - The list of words to operate on

    Returns:
        words (list) - The updated list of words

    >>> forkerism(['foo', 'bar'])
    >>> ['for', 'bao']
    """
    "Last: fo[o] ba[r] => for bao"
    if len(words) < 2:
        raise InsufficientWordsError('Need more than one word to combine')
    new_words = []
    for k, word in enumerate(words):
        try:
            s_word = words[k + 1]
            s_word_len = len(s_word)
            f_word_len = len(word)
            f_w_last_letter = word[f_word_len - 1]
            s_w_last_letter = words[k + 1][s_word_len - 1]
            new_words.append('{}{} {}{}'.format(
                word[:f_word_len - 1],  # 1st word, 1st letter to last - 1
                s_w_last_letter,  # 2nd word, last letter
                s_word[:s_word_len - 1],  # 2nd word, 1st letter to last - 1
                f_w_last_letter))  # 1st word, last letter
        except IndexError:
            continue
    return new_words


def reduplication_ablaut(words, count=1, random=True, vowel='e'):
    """A technique to combine words and altering the vowels.

    e.g ch[i]t-ch[a]t, d[i]lly, d[a]lly.
    See http://phrases.org.uk/meanings/reduplication.html.
    """
    if len(words) < 2:
        raise InsufficientWordsError('Need more than one word to combine')
    new_words = []
    substitution = choice(_vowels) if random else vowel
    for word in words:
        second = re.sub(r'a|e|i|o|u', substitution, word, count=count)
        # Only append if the first and second are different.
        if word != second:
            new_words.append('{} {}'.format(word, second))
    return new_words


def prefixify(words):
    """Apply a prefix technique to a set of words.

    Args:
        words (list) - The list of words to operate on.

    Returns:
        new_arr (list): the updated *fixed words
    """
    new_arr = []
    for word in words:
        if not word:
            continue
        for prefix in _prefixes:
            first_prefix_no_vowel = re.search(_regexes['no_vowels'], word[0])
            second_prefix_no_vowel = re.search(_regexes['no_vowels'], prefix[0])
            if first_prefix_no_vowel or second_prefix_no_vowel:
                # if there's a vowel at the end of
                # prefix but not at the beginning
                # of the word (or vice versa)
                vowel_beginning = re.search(r'a|e|i|o|u', prefix[-1:])
                vowel_end = re.search(r'^a|e|i|o|u', word[:1])
                if vowel_beginning or vowel_end:
                    new_arr.append('{}{}'.format(prefix, word))
    return new_arr


def suffixify(words):
    """Apply a suffix technique to a set of words.

    Args:
        words (list) - The list of words to operate on.
            (e.g -> chard + ard = chardard -> chard)

    Returns:
        new_arr (list): the updated *fixed words
    """
    new_arr = []
    for word in words:
        if not word:
            continue
        for suffix in _suffixes:
            prefix_start_vowel = re.search(_regexes['all_vowels'], word[0])
            suffix_start_vowel = re.search(_regexes['all_vowels'], suffix[0])
            if prefix_start_vowel or suffix_start_vowel:
                if suffix is 'ify':
                    if word[-1] is 'e':
                        if word[-2] is not 'i':
                            new_arr.append('{}{}'.format(word[:-2], suffix))
                        else:
                            new_arr.append('{}{}'.format(word[:-1], suffix))
                    new_arr.append(word + suffix)
                else:
                    new_arr.append(word + suffix)
    return new_arr


def duplifixify(words):
    """Apply a duplifix technique to a set of words (e.g: teeny weeny, etc...).

    Args:
        words (list) - The list of words to operate on.

    Returns:
        new_arr (list): the updated *fixed words
    """
    new_arr = []
    for word in words:
        if not word:
            continue
        for letter in _alphabet:
            # check if the first letter is NOT the same as the second letter,
            # or the combined word is not a duplicate of the first.
            duplicate_word = '{}{}'.format(letter, word[1:]) == word
            if word[0] is not letter and not duplicate_word:
                new_arr.append('{} {}{}'.format(word, letter, word[1:]))
    return new_arr


def disfixify(words):
    """Apply a disfix technique to a set of words.

    TODO: implement

    Args:
        words (list) - The list of words to operate on.

    Returns:
        new_arr (list): the updated *fixed words
    """
    new_arr = []
    return new_arr


def infixify(words):
    """Apply a disfix technique to a set of words.

    TODO: implement

    Args:
        words (list) - The list of words to operate on.

    Returns:
        new_arr (list): the updated *fixed words
    """
    new_arr = []
    return new_arr


def simulfixify(words, pairs=None, max=5):
    """Generate simulfixed words.

    Args:
        words (list) - List of words to operate on.
        pairs (list, optional) - Simulfix pairs to use for each word.
                                 If not specified, these will be generated
                                 randomly as vowel + consonant strings.
        max (int, optional): The number of simulfix pairs to generate
                             (if pairs is not specified.)

    Returns:
        results (list) - The simulfix version of each word,
                         for each simulfix pair.
    """
    results = []
    if pairs is None:
        pairs = ['{}{}'.format(choice(_vowels), choice(_consonants))
                 for _ in range(max)]
    for word in words:
        for combo in pairs:
            mid = len(word) // 2
            _word = '{}{}{}'.format(word[0:mid], combo, word[mid:])
            results.append(_word)
    return results


def palindrome(word):
    return '{}{}'.format(word, word[::-1])


def palindromes(words):
    return [palindrome(word) for word in words]


def make_founder_product_name(founder1, founder2, product):
    """Get the name of two people forming a company and combine it."""
    return '{} & {} {}'.format(
        founder1[0].upper(),
        founder2[0].upper(),
        product)


def make_name_alliteration(word_array, divider=' '):
    new_arr = []
    """
    java jacket
    singing sally
    earth engines
    ...etc

    1. Loop through a given array of words
    2. group by words with the same first letter
    3. combine them and return to new array

    """
    word_array = sorted(word_array)

    for word1 in word_array:
        for word2 in word_array:
            if word1[:1] is word2[:1] and word1 is not word2:
                new_arr.append(word1 + divider + word2)
    return new_arr


def make_name_abbreviation(words):
    """Will make some kind of company acronym.

    eg: BASF, AT&T, A&W
    Returns a single string of the new word combined.
    """
    return ''.join([word[:1].upper() for word in words])


def make_vowel(words, vowel_type, vowel_index):
    """Primary for all Portmanteau generators.

    This creates the portmanteau based on :vowel_index, and :vowel_type.

    The algorithm works as following:

    It looks for the first occurrence of a specified vowel in the first word,
    then gets the matching occurrence (if any) of the second word,
    then determines which should be first or second position, based on
    the ratio of letters (for each word) divided by the position of the vowel
    in question (e.g. c[a]t (2/3) vs. cr[a]te (3/5)).

    The higher number is ordered first, and the two words are then fused
    together by the single matching vowel.
    """
    new_arr = []
    for i in words:
        for j in words:
            is_match_i = re.search(vowel_type, i)
            is_match_j = re.search(vowel_type, j)
            if i is not j and is_match_i and is_match_j:
                # get the indices and lengths to use in finding the ratio
                pos_i = i.index(vowel_index)
                len_i = len(i)
                pos_j = j.index(vowel_index)
                len_j = len(j)

                # If starting index is 0,
                # add 1 to it so we're not dividing by zero
                if pos_i is 0:
                    pos_i = 1
                if pos_j is 0:
                    pos_j = 1

                # Decide which word should be the
                # prefix and which should be suffix
                if round(pos_i / len_i) > round(pos_j / len_j):
                    p = i[0: pos_i + 1]
                    p2 = j[pos_j: len(j)]
                    if len(p) + len(p2) > 2:
                        if re.search(
                            _regexes['all_vowels'], p) or re.search(
                                _regexes['all_vowels'], p2):
                                    if p[-1] is p2[0]:
                                        new_arr.append(p[:-1] + p2)
                                    else:
                                        new_arr.append(p + p2)
    return new_arr


def make_portmanteau_default_vowel(words):
    """Make a portmanteau based on vowel matches.

    E.g. (ala Brad+Angelina = Brangelina)
    Only matches for second to last letter
    in first word and matching vowel in second word.

    This defers to the make_vowel function for all the internal
    magic, but is a helper in that it provides all types of vowel
    combinations in one function.
    """
    new_arr = []
    vowel_a_re = re.compile(r'a{1}')
    vowel_e_re = re.compile(r'e{1}')
    vowel_i_re = re.compile(r'i{1}')
    vowel_o_re = re.compile(r'o{1}')
    vowel_u_re = re.compile(r'u{1}')

    new_arr += make_vowel(words, vowel_a_re, "a")
    new_arr += make_vowel(words, vowel_e_re, "e")
    new_arr += make_vowel(words, vowel_i_re, "i")
    new_arr += make_vowel(words, vowel_o_re, "o")
    new_arr += make_vowel(words, vowel_u_re, "u")
    return new_arr


def make_portmanteau_split(words):
    """Make a portmeanteau, split by vowel/consonant combos.

    Based on the word formation of nikon: [ni]pp[on] go[k]aku,
    which is comprised of Nippon + Gokaku.

    We get the first C+V in the first word,
    then last V+C in the first word,
    then all C in the second word.
    """
    new_arr = []
    for i in words:
        for j in words:
                if i is not j:
                    l1 = re.search(r'[^a|e|i|o|u{1}]+[a|e|i|o|u{1}]', i)
                    l2 = re.search(r'[a|e|i|o|u{1}]+[^a|e|i|o|u{1}]$', j)
                    if i and l1 and l2:
                        # Third letter used for
                        # consonant middle splits only
                        l3 = re.split(r'[a|e|i|o|u{1}]', i)
                        l1 = l1.group(0)
                        l2 = l2.group(0)
                        if l3 and len(l3) > 0:
                            for v in l3:
                                new_arr.append(l1 + v + l2)
                            else:
                                new_arr.append('{}{}{}'.format(l1, 't', l2))
                                new_arr.append('{}{}{}'.format(l1, 's', l2))
                                new_arr.append('{}{}{}'.format(l1, 'z', l2))
                                new_arr.append('{}{}{}'.format(l1, 'x', l2))
    return new_arr


def make_punctuator(words, replace):
    """Put some hyphens or dots, or a given punctutation.

    Works via :replace in the word, but only around vowels ala "del.ic.ious"
    """
    def _replace(words, replace, replace_type='.'):
        return [word.replace(
            replace, replace + replace_type) for word in words]

    hyphens = _replace(words, replace, replace_type='-')
    periods = _replace(words, replace)
    return hyphens + periods


def make_punctuator_vowels(words):
    """Helper function that combines all possible combinations for vowels."""
    new_words = []
    new_words += make_punctuator(words, 'a')
    new_words += make_punctuator(words, 'e')
    new_words += make_punctuator(words, 'i')
    new_words += make_punctuator(words, 'o')
    new_words += make_punctuator(words, 'u')
    return new_words


def make_vowelify(words):
    """Chop off consonant ala nautica if second to last letter is a vowel."""
    new_arr = []
    for word in words:
        if re.search(_regexes['all_vowels'], word[:-2]):
            new_arr.append(word[:-1])
    return new_arr


def make_misspelling(words):
    """Misspell a word in numerous ways, to create interesting results."""
    token_groups = (
        ('ics', 'ix'),
        ('ph', 'f'),
        ('kew', 'cue'),
        ('f', 'ph'),
        ('o', 'ough'),
        # these seem to have
        # sucked in practice
        ('o', 'off'),
        ('ow', 'o'),
        ('x', 'ecks'),
        ('za', 'xa'),
        ('xa', 'za'),
        ('ze', 'xe'),
        ('xe', 'ze'),
        ('zi', 'xi'),
        ('xi', 'zi'),
        ('zo', 'xo'),
        ('xo', 'zo'),
        ('zu', 'xu'),
        ('xu', 'zu'),
        # number based
        ('one', '1'),
        ('1', 'one'),
        ('two', '2'),
        ('2', 'two'),
        ('three', '3'),
        ('3', 'three'),
        ('four', '4'),
        ('4', 'four'),
        ('five', '5'),
        ('5', 'five'),
        ('six', '6'),
        ('6', 'six'),
        ('seven', '7'),
        ('7', 'seven'),
        ('eight', '8'),
        ('8', 'eight'),
        ('nine', '9'),
        ('9', 'nine'),
        ('ten', '10'),
        ('10', 'ten'),
        ('ecks', 'x'),
        ('spir', 'speer'),
        ('speer', 'spir'),
        ('x', 'ex'),
        ('on', 'awn'),
        ('ow', 'owoo'),
        ('awn', 'on'),
        ('awf', 'off'),
        ('s', 'z'),
        ('ce', 'ze'),
        ('ss', 'zz'),
        ('ku', 'koo'),
        ('trate', 'trait'),
        ('trait', 'trate'),
        ('ance', 'anz'),
        ('il', 'yll'),
        ('ice', 'ize'),
        ('chr', 'kr'),
        # These should only be at end of word!
        ('er', 'r'),
        ('lee', 'ly'),
    )
    new_arr = []
    for word in words:
        for tokens in token_groups:
            new_arr.append(word.replace(*tokens))
    return normalization.uniquify(new_arr)


def _pig_latinize(word, postfix='ay'):
    """Generates standard pig latin style,
    with customizeable postfix argument"""
    # Common postfixes: ['ay', 'yay', 'way']
    if not type(postfix) is str:
        raise TypeError('Must use a string for postfix.')

    piggified = None

    vowel_re = re.compile(r'(a|e|i|o|u)')
    first_letter = word[0:1]

    # clean up non letters
    word = word.replace(r'[^a-zA-Z]', '')

    if vowel_re.match(first_letter):
        piggified = word + 'way'
    else:
        piggified = ''.join([word[1: len(word)], first_letter, postfix])
    return piggified


def pig_latinize(words, postfix='ay'):
    return [_pig_latinize(word, postfix=postfix) for word in words]


def acronym_lastname(description, lastname):
    """Inspiration: ALFA Romeo"""
    desc = ''.join([word[0].upper() for word in normalization.remove_stop_words(
        description.split(' '))])
    return '{} {}'.format(desc, lastname)


def get_descriptors(words):
    """
    Use NLTK to first grab tokens by looping through words,
    then tag part-of-speech (in isolation)
    and provide a dictionary with a list of each type
    for later retrieval and usage
    """
    descriptors = defaultdict(list)
    tokens = nltk.word_tokenize(' '.join(words))
    parts = nltk.pos_tag(tokens)
    # Then, push the word into the matching type
    for part in parts:
        descriptors[part[1]].append(part[0])
    return descriptors


def _add_pos_subtypes(nouns, verbs):
    """Combine alternating verbs and nouns into a new list.

    Args:
        nouns (list) - List of nouns, noun phrases, etc...
        verbs (list) - List of verbs, verb phrases, etc...

    Returns:
        words (list) - The newly combined list
    """
    words = []
    try:
        for noun in nouns:
            for verb in verbs:
                words.append('{} {}'.format(noun, verb))
                words.append('{} {}'.format(verb, noun))
    except KeyError:
        pass
    return words


def _create_pos_subtypes(words):
    """Check the part-of-speech tags for a noun-phrase, and if it exists,
    add combinations with noun-phrase + verb-phrase, noun-phrase + verb,
    and noun-phrase + adverb, for each pos type that exists.

    Args:
        words (list) - List of verbs, verb phrases, etc...

    Returns:
        new_words (list) - The newly combined list
    """
    new_words = []
    types = words.keys()
    if 'NNP' in types:
        if 'VBP' in types:
            new_words += _add_pos_subtypes(words['NNP'], words['VBP'])
        if 'VB' in types:
            new_words += _add_pos_subtypes(words['NNP'], words['VB'])
        if 'RB' in types:
            new_words += _add_pos_subtypes(words['NNP'], words['RB'])
    return new_words


def make_descriptors(words):
    """Make descriptor names.

    Based from a verb + noun, adjective + noun combination.
    Examples:
        -Pop Cap,
        -Big Fish,
        -Red Fin,
        -Cold Water (grill), etc...
    Combines VBP/VB/RB, with NN/NNS
    """
    return list(set(_create_pos_subtypes(words)))


def all_prefix_first_vowel(word, letters=list(ascii_uppercase)):
    """Finds the first vowel in a word and removes all letters before it,
    prefixing it with all consonants.

    Args:
        word (str) - the word to update
        letters (list) - the letters to use for prefixing.

    Returns:
        words (list) - All prefixed words

    """
    re_vowels = re.compile(r'[aeiouy]')
    matches = re.search(re_vowels, word)
    if matches is None:
        return [word]
    words = []
    vowels = ['A', 'E', 'I', 'O', 'U']
    first_match = matches.start(0)
    for letter in letters:
        if letter not in vowels:
            # If beginning letter is a vowel, don't offset the index
            if first_match == 0:
                words.append('{}{}'.format(letter, word))
            else:
                words.append('{}{}'.format(letter, word[first_match:]))
    return words


def recycle(words, func, times=2):
    """Run a set of words applied to `func` and re-runs it
    `times` with the last output as the new input.
    `words` must be a list, and `func` must return a list."""
    if times > 0:
        return recycle(func(words), func, times - 1)
    return words


def super_scrub(data):
    """Run words through a comprehensive list of filtering functions.

    Expects a dictionary with key "words"
    """
    for technique in data['words']:
        data['words'][technique] = normalization.uniquify(
            normalization.remove_odd_sounding_words(
                normalization.clean_sort(
                    data['words'][technique])))
    return data


def generate_all_techniques(words):
    """Generate all techniques across the library in one place."""
    data = {
        'words': {
            'alliterations': make_name_alliteration(words),
            'alliterations': make_name_alliteration(words),
            'portmanteau': make_portmanteau_default_vowel(words),
            'vowels': make_vowelify(words),
            'suffix': suffixify(words),
            'prefix': prefixify(words),
            'duplifix': duplifixify(words),
            'disfix': disfixify(words),
            'infix': infixify(words),
            'simulfix': simulfixify(words),
            'founder_product_name': make_founder_product_name(
                'Lindsey', 'Chris', 'Widgets'),
            'punctuator': make_punctuator_vowels(words),
            'name_abbreviation': make_name_abbreviation(words),
            'make_portmanteau_split': make_portmanteau_split(words),
            'forkerism': forkerism(words),
            'kniferism': kniferism(words),
            'spoonerism': spoonerism(words),
            'palindrome': palindromes(words),
            'reduplication_ablaut': reduplication_ablaut(words),
            'misspelling': make_misspelling(words),
            'descriptors': make_descriptors(
                get_descriptors(words))
        }
    }
    return super_scrub(data)


1			"""Primary techniques for the core functionality of namebot."""
2
3			from __future__ import absolute_import
4			from __future__ import division
5
6			from random import choice
7			from string import ascii_uppercase
8			from collections import defaultdict
9			import re
10			import nltk
11
12			from . import settings as namebot_settings
13			from . import normalization
14
15
16			_prefixes = namebot_settings.PREFIXES
17			_suffixes = namebot_settings.SUFFIXES
18			_alphabet = namebot_settings.ALPHABET
19			_consonants = namebot_settings.CONSONANTS
20			_vowels = namebot_settings.VOWELS
21			_regexes = namebot_settings.regexes
22
23
24			class InsufficientWordsError(Exception):
25			def __init__(self, msg):
26			self.msg = msg
27
28
29			def domainify(words, tld='com'):
30			"""Convert words into a domain format for testing domains.
31
32			Args:
33			words (list): List of words
34			tld (str, optional): The TLD (top-level domain) to use.
35
36			Returns:
37			list: The modified list of words.
38			"""
39			_words = []
40			if tld.startswith('.'):
41			tld = tld.replace('.', '')
42			for word in words:
43			if word.endswith(tld) and tld != '':
44			word = word.replace(tld, '.{}'.format(tld))
45			_words.append(word)
46			return _words
47
48
49			def spoonerism(words):
50			"""Convert a list of words formatted with the spoonerism technique.
51
52			Args:
53			words (list) - The list of words to operate on
54
55			Returns:
56			words (list) - The updated list of words
57
58			>>> spoonerism(['foo', 'bar'])
59			>>> ['boo', 'far']
60			"""
61			"First: [f]oo [b]ar => boo far"
62			new_words = []
63			if len(words) < 2:
64			raise InsufficientWordsError('Need more than one word to combine')
65			for k, word in enumerate(words):
66			try:
67			new_words.append('{}{} {}{}'.format(
68			words[k + 1][0], # 2nd word, 1st letter
69			word[1:], # 1st word, 2nd letter to end
70			word[0], # 1st word, 1st letter
71			words[k + 1][1:])) # 2nd word, 2nd letter to end
72			except IndexError:
73			continue
74			return new_words
75
76
77			def kniferism(words):
78			"""Convert a list of words formatted with the kniferism technique.
79
80			Args:
81			words (list) - The list of words to operate on
82
83			Returns:
84			words (list) - The updated list of words
85
86			>>> kniferism(['foo', 'bar'])
87			>>> ['fao', 'bor']
88			"""
89			"Mid: f[o]o b[a]r => fao bor"
90			if len(words) < 2:
91			raise InsufficientWordsError('Need more than one word to combine')
92			new_words = []
93			for k, word in enumerate(words):
94			try:
95			middle_second = int(len(words[k + 1]) / 2)
96			middle_first = int(len(word) / 2)
97			new_words.append('{}{}{} {}{}{}'.format(
98			word[:middle_first],
99			words[k + 1][middle_second],
100			word[middle_first + 1:],
101			words[k + 1][:middle_second],
102			word[middle_first],
103			words[k + 1][middle_second + 1:]))
104			except IndexError:
105			continue
106			return new_words
107
108
109			def forkerism(words):
110			"""Convert a list of words formatted with the forkerism technique.
111
112			Args:
113			words (list) - The list of words to operate on
114
115			Returns:
116			words (list) - The updated list of words
117
118			>>> forkerism(['foo', 'bar'])
119			>>> ['for', 'bao']
120			"""
121			"Last: fo[o] ba[r] => for bao"
122			if len(words) < 2:
123			raise InsufficientWordsError('Need more than one word to combine')
124			new_words = []
125			for k, word in enumerate(words):
126			try:
127			s_word = words[k + 1]
128			s_word_len = len(s_word)
129			f_word_len = len(word)
130			f_w_last_letter = word[f_word_len - 1]
131			s_w_last_letter = words[k + 1][s_word_len - 1]
132			new_words.append('{}{} {}{}'.format(
133			word[:f_word_len - 1], # 1st word, 1st letter to last - 1
134			s_w_last_letter, # 2nd word, last letter
135			s_word[:s_word_len - 1], # 2nd word, 1st letter to last - 1
136			f_w_last_letter)) # 1st word, last letter
137			except IndexError:
138			continue
139			return new_words
140
141
142			def reduplication_ablaut(words, count=1, random=True, vowel='e'):
143			"""A technique to combine words and altering the vowels.
144
145			e.g ch[i]t-ch[a]t, d[i]lly, d[a]lly.
146			See http://phrases.org.uk/meanings/reduplication.html.
147			"""
148			if len(words) < 2:
149			raise InsufficientWordsError('Need more than one word to combine')
150			new_words = []
151			substitution = choice(_vowels) if random else vowel
152			for word in words:
153			second = re.sub(r'a\|e\|i\|o\|u', substitution, word, count=count)
154			# Only append if the first and second are different.
155			if word != second:
156			new_words.append('{} {}'.format(word, second))
157			return new_words
158
159
160			def prefixify(words):
161			"""Apply a prefix technique to a set of words.
162
163			Args:
164			words (list) - The list of words to operate on.
165
166			Returns:
167			new_arr (list): the updated *fixed words
168			"""
169			new_arr = []
170			for word in words:
171			if not word:
172			continue
173			for prefix in _prefixes:
174			first_prefix_no_vowel = re.search(_regexes['no_vowels'], word[0])
175			second_prefix_no_vowel = re.search(_regexes['no_vowels'], prefix[0])
176			if first_prefix_no_vowel or second_prefix_no_vowel:
177			# if there's a vowel at the end of
178			# prefix but not at the beginning
179			# of the word (or vice versa)
180			vowel_beginning = re.search(r'a\|e\|i\|o\|u', prefix[-1:])
181			vowel_end = re.search(r'^a\|e\|i\|o\|u', word[:1])
182			if vowel_beginning or vowel_end:
183			new_arr.append('{}{}'.format(prefix, word))
184			return new_arr
185
186
187			def suffixify(words):
188			"""Apply a suffix technique to a set of words.
189
190			Args:
191			words (list) - The list of words to operate on.
192			(e.g -> chard + ard = chardard -> chard)
193
194			Returns:
195			new_arr (list): the updated *fixed words
196			"""
197			new_arr = []
198			for word in words:
199			if not word:
200			continue
201			for suffix in _suffixes:
202			prefix_start_vowel = re.search(_regexes['all_vowels'], word[0])
203			suffix_start_vowel = re.search(_regexes['all_vowels'], suffix[0])
204			if prefix_start_vowel or suffix_start_vowel:
205			if suffix is 'ify':
206			if word[-1] is 'e':
207			if word[-2] is not 'i':
208			new_arr.append('{}{}'.format(word[:-2], suffix))
209			else:
210			new_arr.append('{}{}'.format(word[:-1], suffix))
211			new_arr.append(word + suffix)
212			else:
213			new_arr.append(word + suffix)
214			return new_arr
215
216
217			def duplifixify(words):
218			"""Apply a duplifix technique to a set of words (e.g: teeny weeny, etc...).
219
220			Args:
221			words (list) - The list of words to operate on.
222
223			Returns:
224			new_arr (list): the updated *fixed words
225			"""
226			new_arr = []
227			for word in words:
228			if not word:
229			continue
230			for letter in _alphabet:
231			# check if the first letter is NOT the same as the second letter,
232			# or the combined word is not a duplicate of the first.
233			duplicate_word = '{}{}'.format(letter, word[1:]) == word
234			if word[0] is not letter and not duplicate_word:
235			new_arr.append('{} {}{}'.format(word, letter, word[1:]))
236			return new_arr
237
238
239			def disfixify(words):
240			"""Apply a disfix technique to a set of words.
241
242			TODO: implement
243
244			Args:
245			words (list) - The list of words to operate on.
246
247			Returns:
248			new_arr (list): the updated *fixed words
249			"""
250			new_arr = []
251			return new_arr
252
253
254			def infixify(words):
255			"""Apply a disfix technique to a set of words.
256
257			TODO: implement
258
259			Args:
260			words (list) - The list of words to operate on.
261
262			Returns:
263			new_arr (list): the updated *fixed words
264			"""
265			new_arr = []
266			return new_arr
267
268
269			def simulfixify(words, pairs=None, max=5):
270			"""Generate simulfixed words.
271
272			Args:
273			words (list) - List of words to operate on.
274			pairs (list, optional) - Simulfix pairs to use for each word.
275			If not specified, these will be generated
276			randomly as vowel + consonant strings.
277			max (int, optional): The number of simulfix pairs to generate
278			(if pairs is not specified.)
279
280			Returns:
281			results (list) - The simulfix version of each word,
282			for each simulfix pair.
283			"""
284			results = []
285			if pairs is None:
286			pairs = ['{}{}'.format(choice(_vowels), choice(_consonants))
287			for _ in range(max)]
288			for word in words:
289			for combo in pairs:
290			mid = len(word) // 2
291			_word = '{}{}{}'.format(word[0:mid], combo, word[mid:])
292			results.append(_word)
293			return results
294
295
296			def palindrome(word):
297			return '{}{}'.format(word, word[::-1])
298
299
300			def palindromes(words):
301			return [palindrome(word) for word in words]
302
303
304			def make_founder_product_name(founder1, founder2, product):
305			"""Get the name of two people forming a company and combine it."""
306			return '{} & {} {}'.format(
307			founder1[0].upper(),
308			founder2[0].upper(),
309			product)
310
311
312			def make_name_alliteration(word_array, divider=' '):
313			new_arr = []
314			"""
315			java jacket
316			singing sally
317			earth engines
318			...etc
319
320			1. Loop through a given array of words
321			2. group by words with the same first letter
322			3. combine them and return to new array
323
324			"""
325			word_array = sorted(word_array)
326
327			for word1 in word_array:
328			for word2 in word_array:
329			if word1[:1] is word2[:1] and word1 is not word2:
330			new_arr.append(word1 + divider + word2)
331			return new_arr
332
333
334			def make_name_abbreviation(words):
335			"""Will make some kind of company acronym.
336
337			eg: BASF, AT&T, A&W
338			Returns a single string of the new word combined.
339			"""
340			return ''.join([word[:1].upper() for word in words])
341
342
343			def make_vowel(words, vowel_type, vowel_index):
344			"""Primary for all Portmanteau generators.
345
346			This creates the portmanteau based on :vowel_index, and :vowel_type.
347
348			The algorithm works as following:
349
350			It looks for the first occurrence of a specified vowel in the first word,
351			then gets the matching occurrence (if any) of the second word,
352			then determines which should be first or second position, based on
353			the ratio of letters (for each word) divided by the position of the vowel
354			in question (e.g. c[a]t (2/3) vs. cr[a]te (3/5)).
355
356			The higher number is ordered first, and the two words are then fused
357			together by the single matching vowel.
358			"""
359			new_arr = []
360			for i in words:
361			for j in words:
362			is_match_i = re.search(vowel_type, i)
363			is_match_j = re.search(vowel_type, j)
364			if i is not j and is_match_i and is_match_j:
365			# get the indices and lengths to use in finding the ratio
366			pos_i = i.index(vowel_index)
367			len_i = len(i)
368			pos_j = j.index(vowel_index)
369			len_j = len(j)
370
371			# If starting index is 0,
372			# add 1 to it so we're not dividing by zero
373			if pos_i is 0:
374			pos_i = 1
375			if pos_j is 0:
376			pos_j = 1
377
378			# Decide which word should be the
379			# prefix and which should be suffix
380			if round(pos_i / len_i) > round(pos_j / len_j):
381			p = i[0: pos_i + 1]
382			p2 = j[pos_j: len(j)]
383			if len(p) + len(p2) > 2:
384			if re.search(
385			_regexes['all_vowels'], p) or re.search(
386			_regexes['all_vowels'], p2):
387			if p[-1] is p2[0]:
388			new_arr.append(p[:-1] + p2)
389			else:
390			new_arr.append(p + p2)
391			return new_arr
392
393
394			def make_portmanteau_default_vowel(words):
395			"""Make a portmanteau based on vowel matches.
396
397			E.g. (ala Brad+Angelina = Brangelina)
398			Only matches for second to last letter
399			in first word and matching vowel in second word.
400
401			This defers to the make_vowel function for all the internal
402			magic, but is a helper in that it provides all types of vowel
403			combinations in one function.
404			"""
405			new_arr = []
406			vowel_a_re = re.compile(r'a{1}')
407			vowel_e_re = re.compile(r'e{1}')
408			vowel_i_re = re.compile(r'i{1}')
409			vowel_o_re = re.compile(r'o{1}')
410			vowel_u_re = re.compile(r'u{1}')
411
412			new_arr += make_vowel(words, vowel_a_re, "a")
413			new_arr += make_vowel(words, vowel_e_re, "e")
414			new_arr += make_vowel(words, vowel_i_re, "i")
415			new_arr += make_vowel(words, vowel_o_re, "o")
416			new_arr += make_vowel(words, vowel_u_re, "u")
417			return new_arr
418
419
420			def make_portmanteau_split(words):
421			"""Make a portmeanteau, split by vowel/consonant combos.
422
423			Based on the word formation of nikon: [ni]pp[on] go[k]aku,
424			which is comprised of Nippon + Gokaku.
425
426			We get the first C+V in the first word,
427			then last V+C in the first word,
428			then all C in the second word.
429			"""
430			new_arr = []
431			for i in words:
432			for j in words:
433			if i is not j:
434			l1 = re.search(r'[^a\|e\|i\|o\|u{1}]+[a\|e\|i\|o\|u{1}]', i)
435			l2 = re.search(r'[a\|e\|i\|o\|u{1}]+[^a\|e\|i\|o\|u{1}]$', j)
436			if i and l1 and l2:
437			# Third letter used for
438			# consonant middle splits only
439			l3 = re.split(r'[a\|e\|i\|o\|u{1}]', i)
440			l1 = l1.group(0)
441			l2 = l2.group(0)
442			if l3 and len(l3) > 0:
443			for v in l3:
444			new_arr.append(l1 + v + l2)
445			else:
446			new_arr.append('{}{}{}'.format(l1, 't', l2))
447			new_arr.append('{}{}{}'.format(l1, 's', l2))
448			new_arr.append('{}{}{}'.format(l1, 'z', l2))
449			new_arr.append('{}{}{}'.format(l1, 'x', l2))
450			return new_arr
451
452
453			def make_punctuator(words, replace):
454			"""Put some hyphens or dots, or a given punctutation.
455
456			Works via :replace in the word, but only around vowels ala "del.ic.ious"
457			"""
458			def _replace(words, replace, replace_type='.'):
459			return [word.replace(
460			replace, replace + replace_type) for word in words]
461
462			hyphens = _replace(words, replace, replace_type='-')
463			periods = _replace(words, replace)
464			return hyphens + periods
465
466
467			def make_punctuator_vowels(words):
468			"""Helper function that combines all possible combinations for vowels."""
469			new_words = []
470			new_words += make_punctuator(words, 'a')
471			new_words += make_punctuator(words, 'e')
472			new_words += make_punctuator(words, 'i')
473			new_words += make_punctuator(words, 'o')
474			new_words += make_punctuator(words, 'u')
475			return new_words
476
477
478			def make_vowelify(words):
479			"""Chop off consonant ala nautica if second to last letter is a vowel."""
480			new_arr = []
481			for word in words:
482			if re.search(_regexes['all_vowels'], word[:-2]):
483			new_arr.append(word[:-1])
484			return new_arr
485
486
487			def make_misspelling(words):
488			"""Misspell a word in numerous ways, to create interesting results."""
489			token_groups = (
490			('ics', 'ix'),
491			('ph', 'f'),
492			('kew', 'cue'),
493			('f', 'ph'),
494			('o', 'ough'),
495			# these seem to have
496			# sucked in practice
497			('o', 'off'),
498			('ow', 'o'),
499			('x', 'ecks'),
500			('za', 'xa'),
501			('xa', 'za'),
502			('ze', 'xe'),
503			('xe', 'ze'),
504			('zi', 'xi'),
505			('xi', 'zi'),
506			('zo', 'xo'),
507			('xo', 'zo'),
508			('zu', 'xu'),
509			('xu', 'zu'),
510			# number based
511			('one', '1'),
512			('1', 'one'),
513			('two', '2'),
514			('2', 'two'),
515			('three', '3'),
516			('3', 'three'),
517			('four', '4'),
518			('4', 'four'),
519			('five', '5'),
520			('5', 'five'),
521			('six', '6'),
522			('6', 'six'),
523			('seven', '7'),
524			('7', 'seven'),
525			('eight', '8'),
526			('8', 'eight'),
527			('nine', '9'),
528			('9', 'nine'),
529			('ten', '10'),
530			('10', 'ten'),
531			('ecks', 'x'),
532			('spir', 'speer'),
533			('speer', 'spir'),
534			('x', 'ex'),
535			('on', 'awn'),
536			('ow', 'owoo'),
537			('awn', 'on'),
538			('awf', 'off'),
539			('s', 'z'),
540			('ce', 'ze'),
541			('ss', 'zz'),
542			('ku', 'koo'),
543			('trate', 'trait'),
544			('trait', 'trate'),
545			('ance', 'anz'),
546			('il', 'yll'),
547			('ice', 'ize'),
548			('chr', 'kr'),
549			# These should only be at end of word!
550			('er', 'r'),
551			('lee', 'ly'),
552			)
553			new_arr = []
554			for word in words:
555			for tokens in token_groups:
556			new_arr.append(word.replace(*tokens))
557			return normalization.uniquify(new_arr)
558
559
560			def _pig_latinize(word, postfix='ay'):
561			"""Generates standard pig latin style,
562			with customizeable postfix argument"""
563			# Common postfixes: ['ay', 'yay', 'way']
564			if not type(postfix) is str:
565			raise TypeError('Must use a string for postfix.')
566
567			piggified = None
568
569			vowel_re = re.compile(r'(a\|e\|i\|o\|u)')
570			first_letter = word[0:1]
571
572			# clean up non letters
573			word = word.replace(r'[^a-zA-Z]', '')
574
575			if vowel_re.match(first_letter):
576			piggified = word + 'way'
577			else:
578			piggified = ''.join([word[1: len(word)], first_letter, postfix])
579			return piggified
580
581
582			def pig_latinize(words, postfix='ay'):
583			return [_pig_latinize(word, postfix=postfix) for word in words]
584
585
586			def acronym_lastname(description, lastname):
587			"""Inspiration: ALFA Romeo"""
588			desc = ''.join([word[0].upper() for word in normalization.remove_stop_words(
589			description.split(' '))])
590			return '{} {}'.format(desc, lastname)
591
592
593			def get_descriptors(words):
594			"""
595			Use NLTK to first grab tokens by looping through words,
596			then tag part-of-speech (in isolation)
597			and provide a dictionary with a list of each type
598			for later retrieval and usage
599			"""
600			descriptors = defaultdict(list)
601			tokens = nltk.word_tokenize(' '.join(words))
602			parts = nltk.pos_tag(tokens)
603			# Then, push the word into the matching type
604			for part in parts:
605			descriptors[part[1]].append(part[0])
606			return descriptors
607
608
609			def _add_pos_subtypes(nouns, verbs):
610			"""Combine alternating verbs and nouns into a new list.
611
612			Args:
613			nouns (list) - List of nouns, noun phrases, etc...
614			verbs (list) - List of verbs, verb phrases, etc...
615
616			Returns:
617			words (list) - The newly combined list
618			"""
619			words = []
620			try:
621			for noun in nouns:
622			for verb in verbs:
623			words.append('{} {}'.format(noun, verb))
624			words.append('{} {}'.format(verb, noun))
625			except KeyError:
626			pass
627			return words
628
629
630			def _create_pos_subtypes(words):
631			"""Check the part-of-speech tags for a noun-phrase, and if it exists,
632			add combinations with noun-phrase + verb-phrase, noun-phrase + verb,
633			and noun-phrase + adverb, for each pos type that exists.
634
635			Args:
636			words (list) - List of verbs, verb phrases, etc...
637
638			Returns:
639			new_words (list) - The newly combined list
640			"""
641			new_words = []
642			types = words.keys()
643			if 'NNP' in types:
644			if 'VBP' in types:
645			new_words += _add_pos_subtypes(words['NNP'], words['VBP'])
646			if 'VB' in types:
647			new_words += _add_pos_subtypes(words['NNP'], words['VB'])
648			if 'RB' in types:
649			new_words += _add_pos_subtypes(words['NNP'], words['RB'])
650			return new_words
651
652
653			def make_descriptors(words):
654			"""Make descriptor names.
655
656			Based from a verb + noun, adjective + noun combination.
657			Examples:
658			-Pop Cap,
659			-Big Fish,
660			-Red Fin,
661			-Cold Water (grill), etc...
662			Combines VBP/VB/RB, with NN/NNS
663			"""
664			return list(set(_create_pos_subtypes(words)))
665
666
667			def all_prefix_first_vowel(word, letters=list(ascii_uppercase)):
668			"""Finds the first vowel in a word and removes all letters before it,
669			prefixing it with all consonants.
670
671			Args:
672			word (str) - the word to update
673			letters (list) - the letters to use for prefixing.
674
675			Returns:
676			words (list) - All prefixed words
677
678			"""
679			re_vowels = re.compile(r'[aeiouy]')
680			matches = re.search(re_vowels, word)
681			if matches is None:
682			return [word]
683			words = []
684			vowels = ['A', 'E', 'I', 'O', 'U']
685			first_match = matches.start(0)
686			for letter in letters:
687			if letter not in vowels:
688			# If beginning letter is a vowel, don't offset the index
689			if first_match == 0:
690			words.append('{}{}'.format(letter, word))
691			else:
692			words.append('{}{}'.format(letter, word[first_match:]))
693			return words
694
695
696			def recycle(words, func, times=2):
697			"""Run a set of words applied to `func` and re-runs it
698			`times` with the last output as the new input.
699			`words` must be a list, and `func` must return a list."""
700			if times > 0:
701			return recycle(func(words), func, times - 1)
702			return words
703
704
705			def super_scrub(data):
706			"""Run words through a comprehensive list of filtering functions.
707
708			Expects a dictionary with key "words"
709			"""
710			for technique in data['words']:
711			data['words'][technique] = normalization.uniquify(
712			normalization.remove_odd_sounding_words(
713			normalization.clean_sort(
714			data['words'][technique])))
715			return data
716
717
718			def generate_all_techniques(words):
719			"""Generate all techniques across the library in one place."""
720			data = {
721			'words': {
722			'alliterations': make_name_alliteration(words),
723			'alliterations': make_name_alliteration(words),
724			'portmanteau': make_portmanteau_default_vowel(words),
725			'vowels': make_vowelify(words),
726			'suffix': suffixify(words),
727			'prefix': prefixify(words),
728			'duplifix': duplifixify(words),
729			'disfix': disfixify(words),
730			'infix': infixify(words),
731			'simulfix': simulfixify(words),
732			'founder_product_name': make_founder_product_name(
733			'Lindsey', 'Chris', 'Widgets'),
734			'punctuator': make_punctuator_vowels(words),
735			'name_abbreviation': make_name_abbreviation(words),
736			'make_portmanteau_split': make_portmanteau_split(words),
737			'forkerism': forkerism(words),
738			'kniferism': kniferism(words),
739			'spoonerism': spoonerism(words),
740			'palindrome': palindromes(words),
741			'reduplication_ablaut': reduplication_ablaut(words),
742			'misspelling': make_misspelling(words),
743			'descriptors': make_descriptors(
744			get_descriptors(words))
745			}
746			}
747			return super_scrub(data)
748

christabor / namebot

Push — master ( 32a249...fa4dad )

namebot.make_misspelling() A

Complexity

Size

Duplication

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like