make_vowel()   F
last analyzed

Complexity

Conditions 13

Size

Total Lines 49

Duplication

Lines 0
Ratio 0 %

Importance

Changes 5
Bugs 0 Features 0
Metric Value
cc 13
c 5
b 0
f 0
dl 0
loc 49
rs 2.5507

How to fix   Complexity   

Complexity

Complex classes like make_vowel() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
"""Primary techniques for the core functionality of namebot."""
2
3
from __future__ import absolute_import
4
from __future__ import division
5
6
import re
7
from collections import defaultdict
8
from random import choice
9
from string import ascii_uppercase
10
11
import nltk
0 ignored issues
show
Configuration introduced by
The import nltk could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
12
13
from . import nlp
14
from . import normalization
15
from . import settings as namebot_settings
16
17
18
_prefixes = namebot_settings.PREFIXES
19
_suffixes = namebot_settings.SUFFIXES
20
_alphabet = namebot_settings.ALPHABET
21
_consonants = namebot_settings.CONSONANTS
22
_vowels = namebot_settings.VOWELS
23
_regexes = namebot_settings.regexes
24
25
26
def slice_ends(word, count=1):
27
    """Slice letters off each side, in a symmetric fashion.
28
29
    The idea is to find interesting substring word combinations.
30
31
    :param word (string): the word to modify.
32
    :param count (int, optional): The number of letters to chop off each end.
33
    :rtype string: The modified string.
34
35
    >>> slice_ends('potatoes', count=2)
36
    >>> 'tato'
37
    """
38
    if any([not count, count is None]):
39
        return word
40
    return word[count:len(word) - count]
41
42
43
def domainify(words, tld='com'):
44
    """Convert words into a domain format for testing domains.
45
46
    :param words (list): List of words
47
    :param tld (str, optional): The TLD (top-level domain) to use.
48
    :rtype list: The modified list of words.
49
50
    >>> domanify(['radio'], tld='.io')
51
    >>> ['rad.io']
52
    """
53
    _words = []
54
    if tld.startswith('.'):
55
        tld = tld.replace('.', '')
56
    for word in words:
57
        if word.endswith(tld) and tld != '':
58
            word = word.replace(tld, '.{}'.format(tld))
59
        _words.append(word)
60
    return _words
61
62
63
def spoonerism(words):
64
    """Convert a list of words formatted with the spoonerism technique.
65
66
    :param words (list) - The list of words to operate on
67
    :rtype words (list) - The updated list of words
68
69
    >>> spoonerism(['foo', 'bar'])
70
    >>> ['boo', 'far']
71
    """
72
    "First: [f]oo [b]ar => boo far"
0 ignored issues
show
Unused Code introduced by
This string statement has no effect and could be removed.
Loading history...
73
    new_words = []
74
    if len(words) < 2:
75
        raise ValueError('Need more than one word to combine')
76
    for k, word in enumerate(words):
77
        try:
78
            new_words.append('{}{} {}{}'.format(
79
                words[k + 1][0],  # 2nd word, 1st letter
80
                word[1:],  # 1st word, 2nd letter to end
81
                word[0],  # 1st word, 1st letter
82
                words[k + 1][1:]))  # 2nd word, 2nd letter to end
83
        except IndexError:
84
            continue
85
    return new_words
86
87
88
def kniferism(words):
89
    """Convert a list of words formatted with the kniferism technique.
90
91
    :param words (list) - The list of words to operate on
92
    :rtype words (list) - The updated list of words
93
94
    >>> kniferism(['foo', 'bar'])
95
    >>> ['fao', 'bor']
96
    """
97
    "Mid: f[o]o b[a]r => fao bor"
0 ignored issues
show
Unused Code introduced by
This string statement has no effect and could be removed.
Loading history...
98
    if len(words) < 2:
99
        raise ValueError('Need more than one word to combine')
100
    new_words = []
101
    for k, word in enumerate(words):
102
        try:
103
            middle_second = int(len(words[k + 1]) / 2)
104
            middle_first = int(len(word) / 2)
105
            new_words.append('{}{}{} {}{}{}'.format(
106
                word[:middle_first],
107
                words[k + 1][middle_second],
108
                word[middle_first + 1:],
109
                words[k + 1][:middle_second],
110
                word[middle_first],
111
                words[k + 1][middle_second + 1:]))
112
        except IndexError:
113
            continue
114
    return new_words
115
116
117
def forkerism(words):
118
    """Convert a list of words formatted with the forkerism technique.
119
120
    :param words (list) - The list of words to operate on
121
    :rtype words (list) - The updated list of words
122
123
    >>> forkerism(['foo', 'bar'])
124
    >>> ['for', 'bao']
125
    """
126
    "Last: fo[o] ba[r] => for bao"
0 ignored issues
show
Unused Code introduced by
This string statement has no effect and could be removed.
Loading history...
127
    if len(words) < 2:
128
        raise ValueError('Need more than one word to combine')
129
    new_words = []
130
    for k, word in enumerate(words):
131
        try:
132
            s_word = words[k + 1]
133
            s_word_len = len(s_word)
134
            f_word_len = len(word)
135
            f_w_last_letter = word[f_word_len - 1]
136
            s_w_last_letter = words[k + 1][s_word_len - 1]
137
            new_words.append('{}{} {}{}'.format(
138
                word[:f_word_len - 1],  # 1st word, 1st letter to last - 1
139
                s_w_last_letter,  # 2nd word, last letter
140
                s_word[:s_word_len - 1],  # 2nd word, 1st letter to last - 1
141
                f_w_last_letter))  # 1st word, last letter
142
        except IndexError:
143
            continue
144
    return new_words
145
146
147
def reduplication_ablaut(words, count=1, random=True, vowel='e'):
148
    """A technique to combine words and altering the vowels.
149
150
    See http://phrases.org.uk/meanings/reduplication.html for origination.
151
152
    :param words (list): The list of words to operate on.
153
    :param count (int, optional): The number of regex substitutions to make.
154
    :param random (bool, optional): Whether or not to randomize vowel choices.
155
    :param vowel (string, optional): Which vowel to substitue.
156
                                     If not vowel is available the word
157
                                     will not change.
158
159
    >>> reduplication_ablaut(['cat', 'dog'], vowel='a')
160
    >>> ['dog', 'dag']
161
    """
162
    if len(words) < 2:
163
        raise ValueError('Need more than one word to combine')
164
    new_words = []
165
    substitution = choice(_vowels) if random else vowel
166
    for word in words:
167
        second = re.sub(r'a|e|i|o|u', substitution, word, count=count)
168
        # Only append if the first and second are different.
169
        if word != second:
170
            new_words.append('{} {}'.format(word, second))
171
    return new_words
172
173
174
def prefixify(words):
175
    """Apply a prefix technique to a set of words.
176
177
    :param words (list) - The list of words to operate on.
178
    :rtype new_arr (list): the updated *fixed words
179
    """
180
    new_arr = []
181
    for word in words:
182
        if not word:
183
            continue
184
        for prefix in _prefixes:
185
            first_prefix_no_vowel = re.search(
186
                _regexes['no_vowels'], word[0])
187
            second_prefix_no_vowel = re.search(
188
                _regexes['no_vowels'], prefix[0])
189
            if first_prefix_no_vowel or second_prefix_no_vowel:
190
                # if there's a vowel at the end of
191
                # prefix but not at the beginning
192
                # of the word (or vice versa)
193
                vowel_beginning = re.search(r'a|e|i|o|u', prefix[-1:])
194
                vowel_end = re.search(r'^a|e|i|o|u', word[:1])
195
                if vowel_beginning or vowel_end:
196
                    new_arr.append('{}{}'.format(prefix, word))
197
    return new_arr
198
199
200
def suffixify(words):
201
    """Apply a suffix technique to a set of words.
202
203
    :param words (list) - The list of words to operate on.
204
    :rtype new_arr (list): the updated *fixed words
205
    """
206
    new_arr = []
207
    for word in words:
208
        if not word:
209
            continue
210
        for suffix in _suffixes:
211
            prefix_start_vowel = re.search(_regexes['all_vowels'], word[0])
212
            suffix_start_vowel = re.search(_regexes['all_vowels'], suffix[0])
213
            if prefix_start_vowel or suffix_start_vowel:
214
                if suffix is 'ify':
215
                    if word[-1] is 'e':
216
                        if word[-2] is not 'i':
217
                            new_arr.append('{}{}'.format(word[:-2], suffix))
218
                        else:
219
                            new_arr.append('{}{}'.format(word[:-1], suffix))
220
                    new_arr.append(word + suffix)
221
                else:
222
                    new_arr.append(word + suffix)
223
    return new_arr
224
225
226
def duplifixify(words):
227
    """Apply a duplifix technique to a set of words (e.g: teeny weeny, etc...).
228
229
    :param words (list) - The list of words to operate on.
230
    :rtype new_arr (list): the updated *fixed words
231
    """
232
    new_arr = []
233
    for word in words:
234
        if not word:
235
            continue
236
        for letter in _alphabet:
237
            # check if the first letter is NOT the same as the second letter,
238
            # or the combined word is not a duplicate of the first.
239
            duplicate_word = '{}{}'.format(letter, word[1:]) == word
240
            if word[0] is not letter and not duplicate_word:
241
                new_arr.append('{} {}{}'.format(word, letter, word[1:]))
242
    return new_arr
243
244
245
def disfixify(words, replaces=1):
246
    """Apply a disfix technique to a set of words.
247
248
    Disfixing is done by removing the first set of vowel-consonant pairs.
249
250
    Args:
251
        words (list) - The list of words to operate on.
252
        replaces (int, optional): Number of replacements
253
            to make on this string.
254
255
    Returns:
256
        new_arr (list): the updated *fixed words
257
    """
258
    new_arr = []
259
    vc_combo = r'[a-zA-Z][aeiou]{1}[qwrtypsdfghjklzxcvbnm]{1}'
260
    for word in words:
261
        if len(re.findall(vc_combo, word)) > 1:
262
            new_arr.append(re.sub(vc_combo, '', word, replaces))
263
        else:
264
            new_arr.append(word)
265
    return new_arr
266
267
268
def infixify(words):
269
    """Apply a infix technique to a set of words.
270
271
    Adds all consonant+vowel pairs to all inner matching vowel+consonant pairs
272
    of a word, giving all combinations for each word.
273
274
    Args:
275
        words (list) - The list of words to operate on.
276
277
    Returns:
278
        new_arr (list): the updated *fixed words
279
    """
280
    new_arr = []
281
    vc_combo_pair = re.compile(
282
        r'[a-zA-Z][aeiou]{1}[qwrtypsdfghjklzxcvbnm]{1}[aeiou]'
283
        '{1}[qwrtypsdfghjklzxcvbnm]{1}')
284
    for word in words:
285
        matches = re.findall(vc_combo_pair, word)
286
        if matches:
287
            for match in matches:
288
                for infix_pair in namebot_settings.CV_TL_PAIRS:
289
                    # Get midpoint of this string.
290
                    mid = len(match) // 2
291
                    # Get the left and right substrings to join with.
292
                    first, second = match[0:mid], match[mid:]
293
                    # Check if the infix_pair is the same as start, or end.
294
                    bad_matches = [
295
                        # Duplicates joined is bad.
296
                        infix_pair == first, infix_pair == second,
297
                        # Matching letters on start/end joining substrings
298
                        # is bad.
299
                        first[-1] == infix_pair[0],
300
                        # Matching letters on end/start joining substrings
301
                        # is also bad.
302
                        first[0] == infix_pair[-1],
303
                    ]
304
                    # Skip bad 'fusings'
305
                    if any(bad_matches):
306
                        continue
307
                    replacer = '{}{}{}'.format(first, infix_pair, second)
308
                    new_arr.append(word.replace(match, replacer))
309
        else:
310
            new_arr.append(word)
311
    return new_arr
312
313
314
def simulfixify(words, pairs=None, max=5):
0 ignored issues
show
Bug Best Practice introduced by
This seems to re-define the built-in max.

It is generally discouraged to redefine built-ins as this makes code very hard to read.

Loading history...
315
    """Generate simulfixed words.
316
317
    Args:
318
        words (list) - List of words to operate on.
319
        pairs (list, optional) - Simulfix pairs to use for each word.
320
                                 If not specified, these will be generated
321
                                 randomly as vowel + consonant strings.
322
        max (int, optional): The number of simulfix pairs to generate
323
                             (if pairs is not specified.)
324
325
    Returns:
326
        results (list) - The simulfix version of each word,
327
                         for each simulfix pair.
328
    """
329
    results = []
330
    if pairs is None:
331
        pairs = ['{}{}'.format(choice(_vowels), choice(_consonants))
332
                 for _ in range(max)]
333
    for word in words:
334
        for combo in pairs:
335
            mid = len(word) // 2
336
            _word = '{}{}{}'.format(word[0:mid], combo, word[mid:])
337
            results.append(_word)
338
    return results
339
340
341
def palindrome(word):
342
    """Create a palindrome from a word.
343
344
    Args:
345
        word (str): The word.
346
347
    Returns:
348
        str: The updated palindrome.
349
350
    >>> palindrome('cool')
351
    >>> 'coollooc'
352
    """
353
    return '{}{}'.format(word, word[::-1])
354
355
356
def palindromes(words):
357
    """Convert a list of words into their palindromic form.
358
359
    Args:
360
        words (list): The words.
361
362
    Returns:
363
        list: The list of palindromes.
364
365
    >>> palindrome(['cool', 'neat'])
366
    >>> ['coollooc', 'neattaen']
367
    """
368
    return [palindrome(word) for word in words]
369
370
371
def make_founder_product_name(founder1, founder2, product):
372
    """Get the name of two people forming a company and combine it.
373
374
    Args:
375
        founder1 (str): Your founder name 1.
376
        founder2 (str): Your founder name 2.
377
        product (str): Your product/feature/service name.
378
379
    Returns:
380
        str: The updated name.
381
382
    >>> make_founder_product_name('chris', 'ella', 'widgets')
383
    >>> 'chris & ella widgets'
384
    """
385
    return '{} & {} {}'.format(
386
        founder1[0].upper(),
387
        founder2[0].upper(),
388
        product)
389
390
391
def make_name_alliteration(words, divider=' '):
392
    """Make an alliteration with a set of words, if applicable.
393
394
    Examples:
395
    java jacket
396
    singing sally
397
    earth engines
398
    ...etc
399
400
    1. Loop through a given array of words
401
    2. group by words with the same first letter
402
    3. combine them and return to new array
403
    """
404
    new_arr = []
405
    words = sorted(words)
406
407
    for word1 in words:
408
        for word2 in words:
409
            if word1[:1] is word2[:1] and word1 is not word2:
410
                new_arr.append(word1 + divider + word2)
411
    return new_arr
412
413
414
def make_name_abbreviation(words):
415
    """Will make some kind of company acronym.
416
417
    eg: BASF, AT&T, A&W
418
    Returns a single string of the new word combined.
419
    """
420
    return ''.join([word[:1].upper() for word in words])
421
422
423
def make_vowel(words, vowel_type, vowel_index):
424
    """Primary for all Portmanteau generators.
425
426
    This creates the portmanteau based on :vowel_index, and :vowel_type.
427
428
    The algorithm works as following:
429
430
    It looks for the first occurrence of a specified vowel in the first word,
431
    then gets the matching occurrence (if any) of the second word,
432
    then determines which should be first or second position, based on
433
    the ratio of letters (for each word) divided by the position of the vowel
434
    in question (e.g. c[a]t (2/3) vs. cr[a]te (3/5)).
435
436
    The higher number is ordered first, and the two words are then fused
437
    together by the single matching vowel.
438
    """
439
    new_arr = []
440
    for i in words:
441
        for j in words:
442
            is_match_i = re.search(vowel_type, i)
443
            is_match_j = re.search(vowel_type, j)
444
            if i is not j and is_match_i and is_match_j:
445
                # get the indices and lengths to use in finding the ratio
446
                pos_i = i.index(vowel_index)
447
                len_i = len(i)
448
                pos_j = j.index(vowel_index)
449
                len_j = len(j)
450
451
                # If starting index is 0,
452
                # add 1 to it so we're not dividing by zero
453
                if pos_i is 0:
454
                    pos_i = 1
455
                if pos_j is 0:
456
                    pos_j = 1
457
458
                # Decide which word should be the
459
                # prefix and which should be suffix
460
                if round(pos_i / len_i) > round(pos_j / len_j):
461
                    p = i[0: pos_i + 1]
462
                    p2 = j[pos_j: len(j)]
463
                    if len(p) + len(p2) > 2:
464
                        if re.search(
465
                            _regexes['all_vowels'], p) or re.search(
466
                                _regexes['all_vowels'], p2):
467
                                    if p[-1] is p2[0]:
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 28 spaces were expected, but 36 were found.
Loading history...
468
                                        new_arr.append(p[:-1] + p2)
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 32 spaces were expected, but 40 were found.
Loading history...
469
                                    else:
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 28 spaces were expected, but 36 were found.
Loading history...
470
                                        new_arr.append(p + p2)
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 32 spaces were expected, but 40 were found.
Loading history...
471
    return new_arr
472
473
474
def make_portmanteau_default_vowel(words):
475
    """Make a portmanteau based on vowel matches.
476
477
    E.g. (ala Brad+Angelina = Brangelina)
478
    Only matches for second to last letter
479
    in first word and matching vowel in second word.
480
481
    This defers to the make_vowel function for all the internal
482
    magic, but is a helper in that it provides all types of vowel
483
    combinations in one function.
484
    """
485
    new_arr = []
486
    vowel_a_re = re.compile(r'a{1}')
487
    vowel_e_re = re.compile(r'e{1}')
488
    vowel_i_re = re.compile(r'i{1}')
489
    vowel_o_re = re.compile(r'o{1}')
490
    vowel_u_re = re.compile(r'u{1}')
491
492
    new_arr += make_vowel(words, vowel_a_re, 'a')
493
    new_arr += make_vowel(words, vowel_e_re, 'e')
494
    new_arr += make_vowel(words, vowel_i_re, 'i')
495
    new_arr += make_vowel(words, vowel_o_re, 'o')
496
    new_arr += make_vowel(words, vowel_u_re, 'u')
497
    return new_arr
498
499
500
def make_portmanteau_split(words):
501
    """Make a portmeanteau, split by vowel/consonant combos.
502
503
    Based on the word formation of nikon: [ni]pp[on] go[k]aku,
504
    which is comprised of Nippon + Gokaku.
505
506
    We get the first C+V in the first word,
507
    then last V+C in the first word,
508
    then all C in the second word.
509
    """
510
    new_arr = []
511
    for i in words:
512
        for j in words:
513
                if i is not j:
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 12 spaces were expected, but 16 were found.
Loading history...
514
                    l1 = re.search(r'[^a|e|i|o|u{1}]+[a|e|i|o|u{1}]', i)
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 16 spaces were expected, but 20 were found.
Loading history...
515
                    l2 = re.search(r'[a|e|i|o|u{1}]+[^a|e|i|o|u{1}]$', j)
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 16 spaces were expected, but 20 were found.
Loading history...
516
                    if i and l1 and l2:
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 16 spaces were expected, but 20 were found.
Loading history...
517
                        # Third letter used for
518
                        # consonant middle splits only
519
                        l3 = re.split(r'[a|e|i|o|u{1}]', i)
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 20 spaces were expected, but 24 were found.
Loading history...
520
                        l1 = l1.group(0)
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 20 spaces were expected, but 24 were found.
Loading history...
521
                        l2 = l2.group(0)
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 20 spaces were expected, but 24 were found.
Loading history...
522
                        if l3 and len(l3) > 0:
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 20 spaces were expected, but 24 were found.
Loading history...
523
                            for v in l3:
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 24 spaces were expected, but 28 were found.
Loading history...
524
                                new_arr.append(l1 + v + l2)
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 28 spaces were expected, but 32 were found.
Loading history...
525
                            else:
0 ignored issues
show
Bug introduced by
The else clause is not necessary as the loop does not contain a break statement.

If the loop cannot exit early through the use of break, the else part will always be executed. You can therefore just leave off the else.

Loading history...
Coding Style introduced by
The indentation here looks off. 24 spaces were expected, but 28 were found.
Loading history...
526
                                new_arr.append('{}{}{}'.format(l1, 't', l2))
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 28 spaces were expected, but 32 were found.
Loading history...
527
                                new_arr.append('{}{}{}'.format(l1, 's', l2))
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 28 spaces were expected, but 32 were found.
Loading history...
528
                                new_arr.append('{}{}{}'.format(l1, 'z', l2))
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 28 spaces were expected, but 32 were found.
Loading history...
529
                                new_arr.append('{}{}{}'.format(l1, 'x', l2))
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 28 spaces were expected, but 32 were found.
Loading history...
530
    return new_arr
531
532
533
def make_punctuator(words, replace):
534
    """Put some hyphens or dots, or a given punctutation.
535
536
    Works via :replace in the word, but only around vowels ala "del.ic.ious"
537
    """
538
    def _replace(words, replace, replace_type='.'):
0 ignored issues
show
Coding Style introduced by
This function should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
539
        return [word.replace(
540
            replace, replace + replace_type) for word in words]
541
542
    hyphens = _replace(words, replace, replace_type='-')
543
    periods = _replace(words, replace)
544
    return hyphens + periods
545
546
547
def make_punctuator_vowels(words):
548
    """Helper function that combines all possible combinations for vowels."""
549
    new_words = []
550
    new_words += make_punctuator(words, 'a')
551
    new_words += make_punctuator(words, 'e')
552
    new_words += make_punctuator(words, 'i')
553
    new_words += make_punctuator(words, 'o')
554
    new_words += make_punctuator(words, 'u')
555
    return new_words
556
557
558
def make_vowelify(words):
559
    """Chop off consonant ala nautica if second to last letter is a vowel."""
560
    new_arr = []
561
    for word in words:
562
        if re.search(_regexes['all_vowels'], word[:-2]):
563
            new_arr.append(word[:-1])
564
    return new_arr
565
566
567
def make_misspelling(words):
568
    """Misspell a word in numerous ways, to create interesting results."""
569
    token_groups = (
570
        ('ics', 'ix'),
571
        ('ph', 'f'),
572
        ('kew', 'cue'),
573
        ('f', 'ph'),
574
        ('o', 'ough'),
575
        # these seem to have
576
        # sucked in practice
577
        ('o', 'off'),
578
        ('ow', 'o'),
579
        ('x', 'ecks'),
580
        ('za', 'xa'),
581
        ('xa', 'za'),
582
        ('ze', 'xe'),
583
        ('xe', 'ze'),
584
        ('zi', 'xi'),
585
        ('xi', 'zi'),
586
        ('zo', 'xo'),
587
        ('xo', 'zo'),
588
        ('zu', 'xu'),
589
        ('xu', 'zu'),
590
        # number based
591
        ('one', '1'),
592
        ('1', 'one'),
593
        ('two', '2'),
594
        ('2', 'two'),
595
        ('three', '3'),
596
        ('3', 'three'),
597
        ('four', '4'),
598
        ('4', 'four'),
599
        ('five', '5'),
600
        ('5', 'five'),
601
        ('six', '6'),
602
        ('6', 'six'),
603
        ('seven', '7'),
604
        ('7', 'seven'),
605
        ('eight', '8'),
606
        ('8', 'eight'),
607
        ('nine', '9'),
608
        ('9', 'nine'),
609
        ('ten', '10'),
610
        ('10', 'ten'),
611
        ('ecks', 'x'),
612
        ('spir', 'speer'),
613
        ('speer', 'spir'),
614
        ('x', 'ex'),
615
        ('on', 'awn'),
616
        ('ow', 'owoo'),
617
        ('awn', 'on'),
618
        ('awf', 'off'),
619
        ('s', 'z'),
620
        ('ce', 'ze'),
621
        ('ss', 'zz'),
622
        ('ku', 'koo'),
623
        ('trate', 'trait'),
624
        ('trait', 'trate'),
625
        ('ance', 'anz'),
626
        ('il', 'yll'),
627
        ('ice', 'ize'),
628
        ('chr', 'kr'),
629
        # These should only be at end of word!
630
        ('er', 'r'),
631
        ('lee', 'ly'),
632
    )
633
    new_arr = []
634
    for word in words:
635
        for tokens in token_groups:
636
            new_arr.append(word.replace(*tokens))
637
    return normalization.uniquify(new_arr)
638
639
640
def _pig_latinize(word, postfix='ay'):
641
    """Generate standard pig latin style, with optional postfix argument."""
642
    # Common postfixes: ['ay', 'yay', 'way']
643
    if not type(postfix) is str:
644
        raise TypeError('Must use a string for postfix.')
645
646
    piggified = None
647
648
    vowel_re = re.compile(r'(a|e|i|o|u)')
649
    first_letter = word[0:1]
650
651
    # clean up non letters
652
    word = word.replace(r'[^a-zA-Z]', '')
653
654
    if vowel_re.match(first_letter):
655
        piggified = word + 'way'
656
    else:
657
        piggified = ''.join([word[1: len(word)], first_letter, postfix])
658
    return piggified
659
660
661
def pig_latinize(words, postfix='ay'):
662
    """Pig latinize a set of words.
663
664
    Args:
665
        words (list): A list of words.
666
        postfix (str, optional): A postfix to use. Default is `ay`.
667
668
    Returns:
669
        words (list): The updated list.
670
671
    """
672
    return [_pig_latinize(word, postfix=postfix) for word in words]
673
674
675
def acronym_lastname(description, lastname):
676
    """Create an acronym plus the last name.
677
678
    Inspiration: ALFA Romeo.
679
    """
680
    desc = ''.join([word[0].upper() for word
681
                   in normalization.remove_stop_words(description.split(' '))])
682
    return '{} {}'.format(desc, lastname)
683
684
685
def get_descriptors(words):
686
    """Group words by their NLTK part-of-speech descriptors.
687
688
    Use NLTK to first grab tokens by looping through words,
689
    then tag part-of-speech (in isolation)
690
    and provide a dictionary with a list of each type
691
    for later retrieval and usage.
692
    """
693
    descriptors = defaultdict(list)
694
    tokens = nltk.word_tokenize(' '.join(words))
695
    parts = nltk.pos_tag(tokens)
696
    # Then, push the word into the matching type
697
    for part in parts:
698
        descriptors[part[1]].append(part[0])
699
    return descriptors
700
701
702
def _add_pos_subtypes(nouns, verbs):
703
    """Combine alternating verbs and nouns into a new list.
704
705
    Args:
706
        nouns (list) - List of nouns, noun phrases, etc...
707
        verbs (list) - List of verbs, verb phrases, etc...
708
709
    Returns:
710
        words (list) - The newly combined list
711
    """
712
    words = []
713
    try:
714
        for noun in nouns:
715
            for verb in verbs:
716
                words.append('{} {}'.format(noun, verb))
717
                words.append('{} {}'.format(verb, noun))
718
    except KeyError:
0 ignored issues
show
Unused Code introduced by
This except handler seems to be unused and could be removed.

Except handlers which only contain pass and do not have an else clause can usually simply be removed:

try:
    raises_exception()
except:  # Could be removed
    pass
Loading history...
719
        pass
720
    return words
721
722
723
def _create_pos_subtypes(words):
724
    """Check part-of-speech tags for a noun-phrase, adding combinations if so.
725
726
    If it exists, add combinations with noun-phrase + verb-phrase,
727
    noun-phrase + verb, and noun-phrase + adverb,
728
    for each pos type that exists.
729
730
    :param words (list) - List of verbs, verb phrases, etc...
731
    :rtype new_words (list) - The newly combined list
732
    """
733
    new_words = []
734
    types = words.keys()
735
    if 'NNP' in types:
736
        if 'VBP' in types:
737
            new_words += _add_pos_subtypes(words['NNP'], words['VBP'])
738
        if 'VB' in types:
739
            new_words += _add_pos_subtypes(words['NNP'], words['VB'])
740
        if 'RB' in types:
741
            new_words += _add_pos_subtypes(words['NNP'], words['RB'])
742
    return new_words
743
744
745
def make_descriptors(words):
746
    """Make descriptor names.
747
748
    Based from a verb + noun, adjective + noun combination.
749
    Examples:
750
        -Pop Cap,
751
        -Big Fish,
752
        -Red Fin,
753
        -Cold Water (grill), etc...
754
    Combines VBP/VB/RB, with NN/NNS
755
    """
756
    return list(set(_create_pos_subtypes(words)))
757
758
759
def all_prefix_first_vowel(word, letters=list(ascii_uppercase)):
0 ignored issues
show
Bug Best Practice introduced by
The default value list() (builtins.list) might cause unintended side-effects.

Objects as default values are only created once in Python and not on each invocation of the function. If the default object is modified, this modification is carried over to the next invocation of the method.

# Bad:
# If array_param is modified inside the function, the next invocation will
# receive the modified object.
def some_function(array_param=[]):
    # ...

# Better: Create an array on each invocation
def some_function(array_param=None):
    array_param = array_param or []
    # ...
Loading history...
760
    """Find the first vowel in a word and prefixes with consonants.
761
762
    :param word (str) - the word to update
763
    :param letters (list) - the letters to use for prefixing.
764
    :rtype words (list) - All prefixed words
765
    """
766
    re_vowels = re.compile(r'[aeiouy]')
767
    matches = re.search(re_vowels, word)
768
    if matches is None:
769
        return [word]
770
    words = []
771
    vowels = ['A', 'E', 'I', 'O', 'U']
772
    first_match = matches.start(0)
773
    for letter in letters:
774
        if letter not in vowels:
775
            # If beginning letter is a vowel, don't offset the index
776
            if first_match == 0:
777
                words.append('{}{}'.format(letter, word))
778
            else:
779
                words.append('{}{}'.format(letter, word[first_match:]))
780
    return words
781
782
783
def recycle(words, func, times=2):
784
    """Run a set of words applied to a function repeatedly.
785
786
    It will re-run with the last output as the new input.
787
    `words` must be a list, and `func` must return a list.
788
789
    :param words (list): The list of words.
790
    :param func (function): A function to recycle.
791
                            This function must take a single argument,
792
                            a list of strings.
793
    :param times (int, optional): The number of times to call the function.
794
    """
795
    if times > 0:
796
        return recycle(func(words), func, times - 1)
797
    return words
798
799
800
def backronym(acronym, theme, max_attempts=10):
801
    """Attempt to generate a backronym based on a given acronym and theme.
802
803
    :param acronym (str): The starting acronym.
804
    :param theme (str): The seed word to base other words off of.
805
    :param max_attempts (int, optional): The number of attempts before failing.
806
    :rtype dict: The result dictionary. If a backronym was successfully
807
                 generated, the `success` key will be True, otherwise False.
808
    """
809
    ret = {
810
        'acronym': '.'.join(list(acronym)).upper(),
811
        'backronym': '',
812
        'words': [],
813
        'success_ratio': 0.0,
814
        'success': False
815
    }
816
    if not acronym or not theme:
817
        return ret
818
    all_words = set()
819
    words = nlp._get_synset_words(theme)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _get_synset_words was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
820
    _backronym = []
821
    acronym = acronym.lower()
822
    # Add words if they contain the same first letter
823
    # as any in the given acronym.
824
    cur_step = 0
825
    while len(_backronym) < len(acronym) or cur_step < max_attempts:
826
        all_words.update(words)
827
        for word in words:
828
            if word[0].lower() in acronym:
829
                if '_' in word:
830
                    # Don't add multi-word strings, but don't leave it blank.
831
                    _backronym.append(word[0])
832
                else:
833
                    _backronym.append(word)
834
        sdict = {}
835
        # Sort the word in order of the acronyms
836
        # letters by re-arranging indices.
837
        for word in _backronym:
838
            try:
839
                index = acronym.index(word[0].lower())
840
                sdict[index] = word
841
            except IndexError:
842
                continue
843
        cur_step += 1
844
        # Refresh words for next attempt.
845
        words = nlp._get_synset_words(theme)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _get_synset_words was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
846
        # Try again if no words existed.
847
        if not words:
848
            continue
849
        # Get new theme, similar to originating theme.
850
        theme = words[0]
851
    vals = sdict.values()
852
    ret.update({
853
        'backronym': ' '.join(vals).upper(),
854
        'words': vals,
855
        'success_ratio': float(len(vals)) / float(len(acronym)),
856
        'success': len(vals) == len(acronym)
857
    })
858
    return ret
859
860
861
def super_scrub(data):
862
    """Run words through a comprehensive list of filtering functions.
863
864
    Expects a dictionary with key "words"
865
    """
866
    for technique in data['words']:
867
        data['words'][technique] = normalization.uniquify(
868
            normalization.remove_odd_sounding_words(
869
                normalization.clean_sort(
870
                    data['words'][technique])))
871
    return data
872
873
874
def generate_all_techniques(words):
875
    """Generate all techniques across the library in one place."""
876
    data = {
877
        'words': {
878
            'alliterations': make_name_alliteration(words),
879
            'portmanteau': make_portmanteau_default_vowel(words),
880
            'vowels': make_vowelify(words),
881
            'suffix': suffixify(words),
882
            'prefix': prefixify(words),
883
            'duplifix': duplifixify(words),
884
            'disfix': disfixify(words),
885
            'infix': infixify(words),
886
            'simulfix': simulfixify(words),
887
            'founder_product_name': make_founder_product_name(
888
                'Lindsey', 'Chris', 'Widgets'),
889
            'punctuator': make_punctuator_vowels(words),
890
            'name_abbreviation': make_name_abbreviation(words),
891
            'make_portmanteau_split': make_portmanteau_split(words),
892
            'forkerism': forkerism(words),
893
            'kniferism': kniferism(words),
894
            'spoonerism': spoonerism(words),
895
            'palindrome': palindromes(words),
896
            'reduplication_ablaut': reduplication_ablaut(words),
897
            'misspelling': make_misspelling(words),
898
            'descriptors': make_descriptors(
899
                get_descriptors(words))
900
        }
901
    }
902
    return super_scrub(data)
903