Completed
Push — master ( cc4906...fdc087 )
by Chris
01:28
created

make_portmanteau_split()   F

Complexity

Conditions 11

Size

Total Lines 31

Duplication

Lines 0
Ratio 0 %

Importance

Changes 4
Bugs 0 Features 0
Metric Value
cc 11
c 4
b 0
f 0
dl 0
loc 31
rs 3.1764

How to fix   Complexity   

Complexity

Complex classes like make_portmanteau_split() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
"""Primary techniques for the core functionality of namebot."""
2
3
from __future__ import absolute_import
4
from __future__ import division
5
6
import re
7
from collections import defaultdict
8
from random import choice
9
from string import ascii_uppercase
10
11
import nltk
0 ignored issues
show
Configuration introduced by
The import nltk could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
12
13
from . import nlp
14
from . import normalization
15
from . import settings as namebot_settings
16
17
18
_prefixes = namebot_settings.PREFIXES
19
_suffixes = namebot_settings.SUFFIXES
20
_alphabet = namebot_settings.ALPHABET
21
_consonants = namebot_settings.CONSONANTS
22
_vowels = namebot_settings.VOWELS
23
_regexes = namebot_settings.regexes
24
25
26
def domainify(words, tld='com'):
27
    """Convert words into a domain format for testing domains.
28
29
    Args:
30
        words (list): List of words
31
        tld (str, optional): The TLD (top-level domain) to use.
32
33
    Returns:
34
        list: The modified list of words.
35
36
    >>> domanify(['radio'], tld='.io')
37
    >>> ['rad.io']
38
    """
39
    _words = []
40
    if tld.startswith('.'):
41
        tld = tld.replace('.', '')
42
    for word in words:
43
        if word.endswith(tld) and tld != '':
44
            word = word.replace(tld, '.{}'.format(tld))
45
        _words.append(word)
46
    return _words
47
48
49
def spoonerism(words):
50
    """Convert a list of words formatted with the spoonerism technique.
51
52
    Args:
53
        words (list) - The list of words to operate on
54
55
    Returns:
56
        words (list) - The updated list of words
57
58
    >>> spoonerism(['foo', 'bar'])
59
    >>> ['boo', 'far']
60
    """
61
    "First: [f]oo [b]ar => boo far"
0 ignored issues
show
Unused Code introduced by
This string statement has no effect and could be removed.
Loading history...
62
    new_words = []
63
    if len(words) < 2:
64
        raise ValueError('Need more than one word to combine')
65
    for k, word in enumerate(words):
66
        try:
67
            new_words.append('{}{} {}{}'.format(
68
                words[k + 1][0],  # 2nd word, 1st letter
69
                word[1:],  # 1st word, 2nd letter to end
70
                word[0],  # 1st word, 1st letter
71
                words[k + 1][1:]))  # 2nd word, 2nd letter to end
72
        except IndexError:
73
            continue
74
    return new_words
75
76
77
def kniferism(words):
78
    """Convert a list of words formatted with the kniferism technique.
79
80
    Args:
81
        words (list) - The list of words to operate on
82
83
    Returns:
84
        words (list) - The updated list of words
85
86
    >>> kniferism(['foo', 'bar'])
87
    >>> ['fao', 'bor']
88
    """
89
    "Mid: f[o]o b[a]r => fao bor"
0 ignored issues
show
Unused Code introduced by
This string statement has no effect and could be removed.
Loading history...
90
    if len(words) < 2:
91
        raise ValueError('Need more than one word to combine')
92
    new_words = []
93
    for k, word in enumerate(words):
94
        try:
95
            middle_second = int(len(words[k + 1]) / 2)
96
            middle_first = int(len(word) / 2)
97
            new_words.append('{}{}{} {}{}{}'.format(
98
                word[:middle_first],
99
                words[k + 1][middle_second],
100
                word[middle_first + 1:],
101
                words[k + 1][:middle_second],
102
                word[middle_first],
103
                words[k + 1][middle_second + 1:]))
104
        except IndexError:
105
            continue
106
    return new_words
107
108
109
def forkerism(words):
110
    """Convert a list of words formatted with the forkerism technique.
111
112
    Args:
113
        words (list) - The list of words to operate on
114
115
    Returns:
116
        words (list) - The updated list of words
117
118
    >>> forkerism(['foo', 'bar'])
119
    >>> ['for', 'bao']
120
    """
121
    "Last: fo[o] ba[r] => for bao"
0 ignored issues
show
Unused Code introduced by
This string statement has no effect and could be removed.
Loading history...
122
    if len(words) < 2:
123
        raise ValueError('Need more than one word to combine')
124
    new_words = []
125
    for k, word in enumerate(words):
126
        try:
127
            s_word = words[k + 1]
128
            s_word_len = len(s_word)
129
            f_word_len = len(word)
130
            f_w_last_letter = word[f_word_len - 1]
131
            s_w_last_letter = words[k + 1][s_word_len - 1]
132
            new_words.append('{}{} {}{}'.format(
133
                word[:f_word_len - 1],  # 1st word, 1st letter to last - 1
134
                s_w_last_letter,  # 2nd word, last letter
135
                s_word[:s_word_len - 1],  # 2nd word, 1st letter to last - 1
136
                f_w_last_letter))  # 1st word, last letter
137
        except IndexError:
138
            continue
139
    return new_words
140
141
142
def reduplication_ablaut(words, count=1, random=True, vowel='e'):
143
    """A technique to combine words and altering the vowels.
144
145
    e.g ch[i]t-ch[a]t, d[i]lly, d[a]lly.
146
    See http://phrases.org.uk/meanings/reduplication.html.
147
    """
148
    if len(words) < 2:
149
        raise ValueError('Need more than one word to combine')
150
    new_words = []
151
    substitution = choice(_vowels) if random else vowel
152
    for word in words:
153
        second = re.sub(r'a|e|i|o|u', substitution, word, count=count)
154
        # Only append if the first and second are different.
155
        if word != second:
156
            new_words.append('{} {}'.format(word, second))
157
    return new_words
158
159
160
def prefixify(words):
161
    """Apply a prefix technique to a set of words.
162
163
    Args:
164
        words (list) - The list of words to operate on.
165
166
    Returns:
167
        new_arr (list): the updated *fixed words
168
    """
169
    new_arr = []
170
    for word in words:
171
        if not word:
172
            continue
173
        for prefix in _prefixes:
174
            first_prefix_no_vowel = re.search(
175
                _regexes['no_vowels'], word[0])
176
            second_prefix_no_vowel = re.search(
177
                _regexes['no_vowels'], prefix[0])
178
            if first_prefix_no_vowel or second_prefix_no_vowel:
179
                # if there's a vowel at the end of
180
                # prefix but not at the beginning
181
                # of the word (or vice versa)
182
                vowel_beginning = re.search(r'a|e|i|o|u', prefix[-1:])
183
                vowel_end = re.search(r'^a|e|i|o|u', word[:1])
184
                if vowel_beginning or vowel_end:
185
                    new_arr.append('{}{}'.format(prefix, word))
186
    return new_arr
187
188
189
def suffixify(words):
190
    """Apply a suffix technique to a set of words.
191
192
    Args:
193
        words (list) - The list of words to operate on.
194
            (e.g -> chard + ard = chardard -> chard)
195
196
    Returns:
197
        new_arr (list): the updated *fixed words
198
    """
199
    new_arr = []
200
    for word in words:
201
        if not word:
202
            continue
203
        for suffix in _suffixes:
204
            prefix_start_vowel = re.search(_regexes['all_vowels'], word[0])
205
            suffix_start_vowel = re.search(_regexes['all_vowels'], suffix[0])
206
            if prefix_start_vowel or suffix_start_vowel:
207
                if suffix is 'ify':
208
                    if word[-1] is 'e':
209
                        if word[-2] is not 'i':
210
                            new_arr.append('{}{}'.format(word[:-2], suffix))
211
                        else:
212
                            new_arr.append('{}{}'.format(word[:-1], suffix))
213
                    new_arr.append(word + suffix)
214
                else:
215
                    new_arr.append(word + suffix)
216
    return new_arr
217
218
219
def duplifixify(words):
220
    """Apply a duplifix technique to a set of words (e.g: teeny weeny, etc...).
221
222
    Args:
223
        words (list) - The list of words to operate on.
224
225
    Returns:
226
        new_arr (list): the updated *fixed words
227
    """
228
    new_arr = []
229
    for word in words:
230
        if not word:
231
            continue
232
        for letter in _alphabet:
233
            # check if the first letter is NOT the same as the second letter,
234
            # or the combined word is not a duplicate of the first.
235
            duplicate_word = '{}{}'.format(letter, word[1:]) == word
236
            if word[0] is not letter and not duplicate_word:
237
                new_arr.append('{} {}{}'.format(word, letter, word[1:]))
238
    return new_arr
239
240
241
def disfixify(words, replaces=1):
242
    """Apply a disfix technique to a set of words.
243
244
    Disfixing is done by removing the first set of vowel-consonant pairs.
245
246
    Args:
247
        words (list) - The list of words to operate on.
248
        replaces (int, optional): Number of replacements
249
            to make on this string.
250
251
    Returns:
252
        new_arr (list): the updated *fixed words
253
    """
254
    new_arr = []
255
    vc_combo = r'[a-zA-Z][aeiou]{1}[qwrtypsdfghjklzxcvbnm]{1}'
256
    for word in words:
257
        if len(re.findall(vc_combo, word)) > 1:
258
            new_arr.append(re.sub(vc_combo, '', word, replaces))
259
        else:
260
            new_arr.append(word)
261
    return new_arr
262
263
264
def infixify(words):
265
    """Apply a infix technique to a set of words.
266
267
    Adds all consonant+vowel pairs to all inner matching vowel+consonant pairs
268
    of a word, giving all combinations for each word.
269
270
    Args:
271
        words (list) - The list of words to operate on.
272
273
    Returns:
274
        new_arr (list): the updated *fixed words
275
    """
276
    new_arr = []
277
    vc_combo_pair = re.compile(
278
        r'[a-zA-Z][aeiou]{1}[qwrtypsdfghjklzxcvbnm]{1}[aeiou]'
279
        '{1}[qwrtypsdfghjklzxcvbnm]{1}')
280
    for word in words:
281
        matches = re.findall(vc_combo_pair, word)
282
        if matches:
283
            for match in matches:
284
                for infix_pair in namebot_settings.CV_TL_PAIRS:
285
                    # Get midpoint of this string.
286
                    mid = len(match) // 2
287
                    # Get the left and right substrings to join with.
288
                    first, second = match[0:mid], match[mid:]
289
                    # Check if the infix_pair is the same as start, or end.
290
                    bad_matches = [
291
                        # Duplicates joined is bad.
292
                        infix_pair == first, infix_pair == second,
293
                        # Matching letters on start/end joining substrings
294
                        # is bad.
295
                        first[-1] == infix_pair[0],
296
                        # Matching letters on end/start joining substrings
297
                        # is also bad.
298
                        first[0] == infix_pair[-1],
299
                    ]
300
                    # Skip bad 'fusings'
301
                    if any(bad_matches):
302
                        continue
303
                    replacer = '{}{}{}'.format(first, infix_pair, second)
304
                    new_arr.append(word.replace(match, replacer))
305
        else:
306
            new_arr.append(word)
307
    return new_arr
308
309
310
def simulfixify(words, pairs=None, max=5):
0 ignored issues
show
Bug Best Practice introduced by
This seems to re-define the built-in max.

It is generally discouraged to redefine built-ins as this makes code very hard to read.

Loading history...
311
    """Generate simulfixed words.
312
313
    Args:
314
        words (list) - List of words to operate on.
315
        pairs (list, optional) - Simulfix pairs to use for each word.
316
                                 If not specified, these will be generated
317
                                 randomly as vowel + consonant strings.
318
        max (int, optional): The number of simulfix pairs to generate
319
                             (if pairs is not specified.)
320
321
    Returns:
322
        results (list) - The simulfix version of each word,
323
                         for each simulfix pair.
324
    """
325
    results = []
326
    if pairs is None:
327
        pairs = ['{}{}'.format(choice(_vowels), choice(_consonants))
328
                 for _ in range(max)]
329
    for word in words:
330
        for combo in pairs:
331
            mid = len(word) // 2
332
            _word = '{}{}{}'.format(word[0:mid], combo, word[mid:])
333
            results.append(_word)
334
    return results
335
336
337
def palindrome(word):
338
    """Create a palindrome from a word.
339
340
    Args:
341
        word (str): The word.
342
343
    Returns:
344
        str: The updated palindrome.
345
346
    >>> palindrome('cool')
347
    >>> 'coollooc'
348
    """
349
    return '{}{}'.format(word, word[::-1])
350
351
352
def palindromes(words):
353
    """Convert a list of words into their palindromic form.
354
355
    Args:
356
        words (list): The words.
357
358
    Returns:
359
        list: The list of palindromes.
360
361
    >>> palindrome(['cool', 'neat'])
362
    >>> ['coollooc', 'neattaen']
363
    """
364
    return [palindrome(word) for word in words]
365
366
367
def make_founder_product_name(founder1, founder2, product):
368
    """Get the name of two people forming a company and combine it.
369
370
    Args:
371
        founder1 (str): Your founder name 1.
372
        founder2 (str): Your founder name 2.
373
        product (str): Your product/feature/service name.
374
375
    Returns:
376
        str: The updated name.
377
378
    >>> make_founder_product_name('chris', 'ella', 'widgets')
379
    >>> 'chris & ella widgets'
380
    """
381
    return '{} & {} {}'.format(
382
        founder1[0].upper(),
383
        founder2[0].upper(),
384
        product)
385
386
387
def make_name_alliteration(words, divider=' '):
388
    """Make an alliteration with a set of words, if applicable.
389
390
    Examples:
391
    java jacket
392
    singing sally
393
    earth engines
394
    ...etc
395
396
    1. Loop through a given array of words
397
    2. group by words with the same first letter
398
    3. combine them and return to new array
399
    """
400
    new_arr = []
401
    words = sorted(words)
402
403
    for word1 in words:
404
        for word2 in words:
405
            if word1[:1] is word2[:1] and word1 is not word2:
406
                new_arr.append(word1 + divider + word2)
407
    return new_arr
408
409
410
def make_name_abbreviation(words):
411
    """Will make some kind of company acronym.
412
413
    eg: BASF, AT&T, A&W
414
    Returns a single string of the new word combined.
415
    """
416
    return ''.join([word[:1].upper() for word in words])
417
418
419
def make_vowel(words, vowel_type, vowel_index):
420
    """Primary for all Portmanteau generators.
421
422
    This creates the portmanteau based on :vowel_index, and :vowel_type.
423
424
    The algorithm works as following:
425
426
    It looks for the first occurrence of a specified vowel in the first word,
427
    then gets the matching occurrence (if any) of the second word,
428
    then determines which should be first or second position, based on
429
    the ratio of letters (for each word) divided by the position of the vowel
430
    in question (e.g. c[a]t (2/3) vs. cr[a]te (3/5)).
431
432
    The higher number is ordered first, and the two words are then fused
433
    together by the single matching vowel.
434
    """
435
    new_arr = []
436
    for i in words:
437
        for j in words:
438
            is_match_i = re.search(vowel_type, i)
439
            is_match_j = re.search(vowel_type, j)
440
            if i is not j and is_match_i and is_match_j:
441
                # get the indices and lengths to use in finding the ratio
442
                pos_i = i.index(vowel_index)
443
                len_i = len(i)
444
                pos_j = j.index(vowel_index)
445
                len_j = len(j)
446
447
                # If starting index is 0,
448
                # add 1 to it so we're not dividing by zero
449
                if pos_i is 0:
450
                    pos_i = 1
451
                if pos_j is 0:
452
                    pos_j = 1
453
454
                # Decide which word should be the
455
                # prefix and which should be suffix
456
                if round(pos_i / len_i) > round(pos_j / len_j):
457
                    p = i[0: pos_i + 1]
458
                    p2 = j[pos_j: len(j)]
459
                    if len(p) + len(p2) > 2:
460
                        if re.search(
461
                            _regexes['all_vowels'], p) or re.search(
462
                                _regexes['all_vowels'], p2):
463
                                    if p[-1] is p2[0]:
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 28 spaces were expected, but 36 were found.
Loading history...
464
                                        new_arr.append(p[:-1] + p2)
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 32 spaces were expected, but 40 were found.
Loading history...
465
                                    else:
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 28 spaces were expected, but 36 were found.
Loading history...
466
                                        new_arr.append(p + p2)
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 32 spaces were expected, but 40 were found.
Loading history...
467
    return new_arr
468
469
470
def make_portmanteau_default_vowel(words):
471
    """Make a portmanteau based on vowel matches.
472
473
    E.g. (ala Brad+Angelina = Brangelina)
474
    Only matches for second to last letter
475
    in first word and matching vowel in second word.
476
477
    This defers to the make_vowel function for all the internal
478
    magic, but is a helper in that it provides all types of vowel
479
    combinations in one function.
480
    """
481
    new_arr = []
482
    vowel_a_re = re.compile(r'a{1}')
483
    vowel_e_re = re.compile(r'e{1}')
484
    vowel_i_re = re.compile(r'i{1}')
485
    vowel_o_re = re.compile(r'o{1}')
486
    vowel_u_re = re.compile(r'u{1}')
487
488
    new_arr += make_vowel(words, vowel_a_re, 'a')
489
    new_arr += make_vowel(words, vowel_e_re, 'e')
490
    new_arr += make_vowel(words, vowel_i_re, 'i')
491
    new_arr += make_vowel(words, vowel_o_re, 'o')
492
    new_arr += make_vowel(words, vowel_u_re, 'u')
493
    return new_arr
494
495
496
def make_portmanteau_split(words):
497
    """Make a portmeanteau, split by vowel/consonant combos.
498
499
    Based on the word formation of nikon: [ni]pp[on] go[k]aku,
500
    which is comprised of Nippon + Gokaku.
501
502
    We get the first C+V in the first word,
503
    then last V+C in the first word,
504
    then all C in the second word.
505
    """
506
    new_arr = []
507
    for i in words:
508
        for j in words:
509
                if i is not j:
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 12 spaces were expected, but 16 were found.
Loading history...
510
                    l1 = re.search(r'[^a|e|i|o|u{1}]+[a|e|i|o|u{1}]', i)
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 16 spaces were expected, but 20 were found.
Loading history...
511
                    l2 = re.search(r'[a|e|i|o|u{1}]+[^a|e|i|o|u{1}]$', j)
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 16 spaces were expected, but 20 were found.
Loading history...
512
                    if i and l1 and l2:
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 16 spaces were expected, but 20 were found.
Loading history...
513
                        # Third letter used for
514
                        # consonant middle splits only
515
                        l3 = re.split(r'[a|e|i|o|u{1}]', i)
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 20 spaces were expected, but 24 were found.
Loading history...
516
                        l1 = l1.group(0)
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 20 spaces were expected, but 24 were found.
Loading history...
517
                        l2 = l2.group(0)
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 20 spaces were expected, but 24 were found.
Loading history...
518
                        if l3 and len(l3) > 0:
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 20 spaces were expected, but 24 were found.
Loading history...
519
                            for v in l3:
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 24 spaces were expected, but 28 were found.
Loading history...
520
                                new_arr.append(l1 + v + l2)
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 28 spaces were expected, but 32 were found.
Loading history...
521
                            else:
0 ignored issues
show
Bug introduced by
The else clause is not necessary as the loop does not contain a break statement.

If the loop cannot exit early through the use of break, the else part will always be executed. You can therefore just leave off the else.

Loading history...
Coding Style introduced by
The indentation here looks off. 24 spaces were expected, but 28 were found.
Loading history...
522
                                new_arr.append('{}{}{}'.format(l1, 't', l2))
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 28 spaces were expected, but 32 were found.
Loading history...
523
                                new_arr.append('{}{}{}'.format(l1, 's', l2))
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 28 spaces were expected, but 32 were found.
Loading history...
524
                                new_arr.append('{}{}{}'.format(l1, 'z', l2))
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 28 spaces were expected, but 32 were found.
Loading history...
525
                                new_arr.append('{}{}{}'.format(l1, 'x', l2))
0 ignored issues
show
Coding Style introduced by
The indentation here looks off. 28 spaces were expected, but 32 were found.
Loading history...
526
    return new_arr
527
528
529
def make_punctuator(words, replace):
530
    """Put some hyphens or dots, or a given punctutation.
531
532
    Works via :replace in the word, but only around vowels ala "del.ic.ious"
533
    """
534
    def _replace(words, replace, replace_type='.'):
0 ignored issues
show
Coding Style introduced by
This function should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
535
        return [word.replace(
536
            replace, replace + replace_type) for word in words]
537
538
    hyphens = _replace(words, replace, replace_type='-')
539
    periods = _replace(words, replace)
540
    return hyphens + periods
541
542
543
def make_punctuator_vowels(words):
544
    """Helper function that combines all possible combinations for vowels."""
545
    new_words = []
546
    new_words += make_punctuator(words, 'a')
547
    new_words += make_punctuator(words, 'e')
548
    new_words += make_punctuator(words, 'i')
549
    new_words += make_punctuator(words, 'o')
550
    new_words += make_punctuator(words, 'u')
551
    return new_words
552
553
554
def make_vowelify(words):
555
    """Chop off consonant ala nautica if second to last letter is a vowel."""
556
    new_arr = []
557
    for word in words:
558
        if re.search(_regexes['all_vowels'], word[:-2]):
559
            new_arr.append(word[:-1])
560
    return new_arr
561
562
563
def make_misspelling(words):
564
    """Misspell a word in numerous ways, to create interesting results."""
565
    token_groups = (
566
        ('ics', 'ix'),
567
        ('ph', 'f'),
568
        ('kew', 'cue'),
569
        ('f', 'ph'),
570
        ('o', 'ough'),
571
        # these seem to have
572
        # sucked in practice
573
        ('o', 'off'),
574
        ('ow', 'o'),
575
        ('x', 'ecks'),
576
        ('za', 'xa'),
577
        ('xa', 'za'),
578
        ('ze', 'xe'),
579
        ('xe', 'ze'),
580
        ('zi', 'xi'),
581
        ('xi', 'zi'),
582
        ('zo', 'xo'),
583
        ('xo', 'zo'),
584
        ('zu', 'xu'),
585
        ('xu', 'zu'),
586
        # number based
587
        ('one', '1'),
588
        ('1', 'one'),
589
        ('two', '2'),
590
        ('2', 'two'),
591
        ('three', '3'),
592
        ('3', 'three'),
593
        ('four', '4'),
594
        ('4', 'four'),
595
        ('five', '5'),
596
        ('5', 'five'),
597
        ('six', '6'),
598
        ('6', 'six'),
599
        ('seven', '7'),
600
        ('7', 'seven'),
601
        ('eight', '8'),
602
        ('8', 'eight'),
603
        ('nine', '9'),
604
        ('9', 'nine'),
605
        ('ten', '10'),
606
        ('10', 'ten'),
607
        ('ecks', 'x'),
608
        ('spir', 'speer'),
609
        ('speer', 'spir'),
610
        ('x', 'ex'),
611
        ('on', 'awn'),
612
        ('ow', 'owoo'),
613
        ('awn', 'on'),
614
        ('awf', 'off'),
615
        ('s', 'z'),
616
        ('ce', 'ze'),
617
        ('ss', 'zz'),
618
        ('ku', 'koo'),
619
        ('trate', 'trait'),
620
        ('trait', 'trate'),
621
        ('ance', 'anz'),
622
        ('il', 'yll'),
623
        ('ice', 'ize'),
624
        ('chr', 'kr'),
625
        # These should only be at end of word!
626
        ('er', 'r'),
627
        ('lee', 'ly'),
628
    )
629
    new_arr = []
630
    for word in words:
631
        for tokens in token_groups:
632
            new_arr.append(word.replace(*tokens))
633
    return normalization.uniquify(new_arr)
634
635
636
def _pig_latinize(word, postfix='ay'):
637
    """Generate standard pig latin style, with optional postfix argument."""
638
    # Common postfixes: ['ay', 'yay', 'way']
639
    if not type(postfix) is str:
640
        raise TypeError('Must use a string for postfix.')
641
642
    piggified = None
643
644
    vowel_re = re.compile(r'(a|e|i|o|u)')
645
    first_letter = word[0:1]
646
647
    # clean up non letters
648
    word = word.replace(r'[^a-zA-Z]', '')
649
650
    if vowel_re.match(first_letter):
651
        piggified = word + 'way'
652
    else:
653
        piggified = ''.join([word[1: len(word)], first_letter, postfix])
654
    return piggified
655
656
657
def pig_latinize(words, postfix='ay'):
658
    """Pig latinize a set of words.
659
660
    Args:
661
        words (list): A list of words.
662
        postfix (str, optional): A postfix to use. Default is `ay`.
663
664
    Returns:
665
        words (list): The updated list.
666
667
    """
668
    return [_pig_latinize(word, postfix=postfix) for word in words]
669
670
671
def acronym_lastname(description, lastname):
672
    """Create an acronym plus the last name.
673
674
    Inspiration: ALFA Romeo.
675
    """
676
    desc = ''.join([word[0].upper() for word
677
                   in normalization.remove_stop_words(description.split(' '))])
678
    return '{} {}'.format(desc, lastname)
679
680
681
def get_descriptors(words):
682
    """Group words by their NLTK part-of-speech descriptors.
683
684
    Use NLTK to first grab tokens by looping through words,
685
    then tag part-of-speech (in isolation)
686
    and provide a dictionary with a list of each type
687
    for later retrieval and usage.
688
    """
689
    descriptors = defaultdict(list)
690
    tokens = nltk.word_tokenize(' '.join(words))
691
    parts = nltk.pos_tag(tokens)
692
    # Then, push the word into the matching type
693
    for part in parts:
694
        descriptors[part[1]].append(part[0])
695
    return descriptors
696
697
698
def _add_pos_subtypes(nouns, verbs):
699
    """Combine alternating verbs and nouns into a new list.
700
701
    Args:
702
        nouns (list) - List of nouns, noun phrases, etc...
703
        verbs (list) - List of verbs, verb phrases, etc...
704
705
    Returns:
706
        words (list) - The newly combined list
707
    """
708
    words = []
709
    try:
710
        for noun in nouns:
711
            for verb in verbs:
712
                words.append('{} {}'.format(noun, verb))
713
                words.append('{} {}'.format(verb, noun))
714
    except KeyError:
0 ignored issues
show
Unused Code introduced by
This except handler seems to be unused and could be removed.

Except handlers which only contain pass and do not have an else clause can usually simply be removed:

try:
    raises_exception()
except:  # Could be removed
    pass
Loading history...
715
        pass
716
    return words
717
718
719
def _create_pos_subtypes(words):
720
    """Check part-of-speech tags for a noun-phrase, adding combinations if so.
721
722
    If it exists, add combinations with noun-phrase + verb-phrase,
723
    noun-phrase + verb, and noun-phrase + adverb,
724
    for each pos type that exists.
725
726
    Args:
727
        words (list) - List of verbs, verb phrases, etc...
728
729
    Returns:
730
        new_words (list) - The newly combined list
731
    """
732
    new_words = []
733
    types = words.keys()
734
    if 'NNP' in types:
735
        if 'VBP' in types:
736
            new_words += _add_pos_subtypes(words['NNP'], words['VBP'])
737
        if 'VB' in types:
738
            new_words += _add_pos_subtypes(words['NNP'], words['VB'])
739
        if 'RB' in types:
740
            new_words += _add_pos_subtypes(words['NNP'], words['RB'])
741
    return new_words
742
743
744
def make_descriptors(words):
745
    """Make descriptor names.
746
747
    Based from a verb + noun, adjective + noun combination.
748
    Examples:
749
        -Pop Cap,
750
        -Big Fish,
751
        -Red Fin,
752
        -Cold Water (grill), etc...
753
    Combines VBP/VB/RB, with NN/NNS
754
    """
755
    return list(set(_create_pos_subtypes(words)))
756
757
758
def all_prefix_first_vowel(word, letters=list(ascii_uppercase)):
0 ignored issues
show
Bug Best Practice introduced by
The default value list() (builtins.list) might cause unintended side-effects.

Objects as default values are only created once in Python and not on each invocation of the function. If the default object is modified, this modification is carried over to the next invocation of the method.

# Bad:
# If array_param is modified inside the function, the next invocation will
# receive the modified object.
def some_function(array_param=[]):
    # ...

# Better: Create an array on each invocation
def some_function(array_param=None):
    array_param = array_param or []
    # ...
Loading history...
759
    """Find the first vowel in a word and prefixes with consonants.
760
761
    Args:
762
        word (str) - the word to update
763
        letters (list) - the letters to use for prefixing.
764
765
    Returns:
766
        words (list) - All prefixed words
767
768
    """
769
    re_vowels = re.compile(r'[aeiouy]')
770
    matches = re.search(re_vowels, word)
771
    if matches is None:
772
        return [word]
773
    words = []
774
    vowels = ['A', 'E', 'I', 'O', 'U']
775
    first_match = matches.start(0)
776
    for letter in letters:
777
        if letter not in vowels:
778
            # If beginning letter is a vowel, don't offset the index
779
            if first_match == 0:
780
                words.append('{}{}'.format(letter, word))
781
            else:
782
                words.append('{}{}'.format(letter, word[first_match:]))
783
    return words
784
785
786
def recycle(words, func, times=2):
787
    """Run a set of words applied to a function repeatedly.
788
789
    It will re-run with the last output as the new input.
790
    `words` must be a list, and `func` must return a list.
791
    """
792
    if times > 0:
793
        return recycle(func(words), func, times - 1)
794
    return words
795
796
797
def backronym(acronym, theme, max_attempts=10):
798
    """Attempt to generate a backronym based on a given acronym and theme.
799
800
    Args:
801
        acronym (str): The starting acronym.
802
        theme (str): The seed word to base other words off of.
803
        max_attempts (int, optional): The number of attempts before failing.
804
805
    Returns:
806
        dict: The result dictionary. If a backronym was successfully generated,
807
            the `success` key will be True, otherwise False.
808
    """
809
    ret = {
810
        'acronym': '.'.join(list(acronym)).upper(),
811
        'backronym': '',
812
        'words': [],
813
        'success_ratio': 0.0,
814
        'success': False
815
    }
816
    if not acronym or not theme:
817
        return ret
818
    all_words = set()
819
    words = nlp._get_synset_words(theme)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _get_synset_words was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
820
    _backronym = []
821
    acronym = acronym.lower()
822
    # Add words if they contain the same first letter
823
    # as any in the given acronym.
824
    cur_step = 0
825
    while len(_backronym) < len(acronym) or cur_step < max_attempts:
826
        all_words.update(words)
827
        for word in words:
828
            if word[0].lower() in acronym:
829
                if '_' in word:
830
                    # Don't add multi-word strings, but don't leave it blank.
831
                    _backronym.append(word[0])
832
                else:
833
                    _backronym.append(word)
834
        sdict = {}
835
        # Sort the word in order of the acronyms
836
        # letters by re-arranging indices.
837
        for word in _backronym:
838
            try:
839
                index = acronym.index(word[0].lower())
840
                sdict[index] = word
841
            except IndexError:
842
                continue
843
        cur_step += 1
844
        # Refresh words for next attempt.
845
        words = nlp._get_synset_words(theme)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _get_synset_words was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
846
        # Try again if no words existed.
847
        if not words:
848
            continue
849
        # Get new theme, similar to originating theme.
850
        theme = words[0]
851
    vals = sdict.values()
852
    ret.update({
853
        'backronym': ' '.join(vals).upper(),
854
        'words': vals,
855
        'success_ratio': float(len(vals)) / float(len(acronym)),
856
        'success': len(vals) == len(acronym)
857
    })
858
    return ret
859
860
861
def super_scrub(data):
862
    """Run words through a comprehensive list of filtering functions.
863
864
    Expects a dictionary with key "words"
865
    """
866
    for technique in data['words']:
867
        data['words'][technique] = normalization.uniquify(
868
            normalization.remove_odd_sounding_words(
869
                normalization.clean_sort(
870
                    data['words'][technique])))
871
    return data
872
873
874
def generate_all_techniques(words):
875
    """Generate all techniques across the library in one place."""
876
    data = {
877
        'words': {
0 ignored issues
show
Bug introduced by
Duplicate key 'alliterations' in dictionary
Loading history...
878
            'alliterations': make_name_alliteration(words),
879
            'alliterations': make_name_alliteration(words),
880
            'portmanteau': make_portmanteau_default_vowel(words),
881
            'vowels': make_vowelify(words),
882
            'suffix': suffixify(words),
883
            'prefix': prefixify(words),
884
            'duplifix': duplifixify(words),
885
            'disfix': disfixify(words),
886
            'infix': infixify(words),
887
            'simulfix': simulfixify(words),
888
            'founder_product_name': make_founder_product_name(
889
                'Lindsey', 'Chris', 'Widgets'),
890
            'punctuator': make_punctuator_vowels(words),
891
            'name_abbreviation': make_name_abbreviation(words),
892
            'make_portmanteau_split': make_portmanteau_split(words),
893
            'forkerism': forkerism(words),
894
            'kniferism': kniferism(words),
895
            'spoonerism': spoonerism(words),
896
            'palindrome': palindromes(words),
897
            'reduplication_ablaut': reduplication_ablaut(words),
898
            'misspelling': make_misspelling(words),
899
            'descriptors': make_descriptors(
900
                get_descriptors(words))
901
        }
902
    }
903
    return super_scrub(data)
904