1
|
|
|
"""Primary techniques for the core functionality of namebot.""" |
2
|
|
|
|
3
|
|
|
from __future__ import absolute_import |
4
|
|
|
from __future__ import division |
5
|
|
|
|
6
|
|
|
import re |
7
|
|
|
from collections import defaultdict |
8
|
|
|
from random import choice |
9
|
|
|
from string import ascii_uppercase |
10
|
|
|
|
11
|
|
|
import nltk |
|
|
|
|
12
|
|
|
|
13
|
|
|
from . import nlp |
14
|
|
|
from . import normalization |
15
|
|
|
from . import settings as namebot_settings |
16
|
|
|
|
17
|
|
|
|
18
|
|
|
_prefixes = namebot_settings.PREFIXES |
19
|
|
|
_suffixes = namebot_settings.SUFFIXES |
20
|
|
|
_alphabet = namebot_settings.ALPHABET |
21
|
|
|
_consonants = namebot_settings.CONSONANTS |
22
|
|
|
_vowels = namebot_settings.VOWELS |
23
|
|
|
_regexes = namebot_settings.regexes |
24
|
|
|
|
25
|
|
|
|
26
|
|
|
def slice_ends(word, count=1): |
27
|
|
|
"""Slice letters off each side, in a symmetric fashion. |
28
|
|
|
|
29
|
|
|
The idea is to find interesting substring word combinations. |
30
|
|
|
|
31
|
|
|
:param word (string): the word to modify. |
32
|
|
|
:param count (int, optional): The number of letters to chop off each end. |
33
|
|
|
:rtype string: The modified string. |
34
|
|
|
|
35
|
|
|
>>> slice_ends('potatoes', count=2) |
36
|
|
|
>>> 'tato' |
37
|
|
|
""" |
38
|
|
|
if any([not count, count is None]): |
39
|
|
|
return word |
40
|
|
|
return word[count:len(word) - count] |
41
|
|
|
|
42
|
|
|
|
43
|
|
|
def domainify(words, tld='com'): |
44
|
|
|
"""Convert words into a domain format for testing domains. |
45
|
|
|
|
46
|
|
|
:param words (list): List of words |
47
|
|
|
:param tld (str, optional): The TLD (top-level domain) to use. |
48
|
|
|
:rtype list: The modified list of words. |
49
|
|
|
|
50
|
|
|
>>> domanify(['radio'], tld='.io') |
51
|
|
|
>>> ['rad.io'] |
52
|
|
|
""" |
53
|
|
|
_words = [] |
54
|
|
|
if tld.startswith('.'): |
55
|
|
|
tld = tld.replace('.', '') |
56
|
|
|
for word in words: |
57
|
|
|
if word.endswith(tld) and tld != '': |
58
|
|
|
word = word.replace(tld, '.{}'.format(tld)) |
59
|
|
|
_words.append(word) |
60
|
|
|
return _words |
61
|
|
|
|
62
|
|
|
|
63
|
|
|
def spoonerism(words): |
64
|
|
|
"""Convert a list of words formatted with the spoonerism technique. |
65
|
|
|
|
66
|
|
|
:param words (list) - The list of words to operate on |
67
|
|
|
:rtype words (list) - The updated list of words |
68
|
|
|
|
69
|
|
|
>>> spoonerism(['foo', 'bar']) |
70
|
|
|
>>> ['boo', 'far'] |
71
|
|
|
""" |
72
|
|
|
"First: [f]oo [b]ar => boo far" |
|
|
|
|
73
|
|
|
new_words = [] |
74
|
|
|
if len(words) < 2: |
75
|
|
|
raise ValueError('Need more than one word to combine') |
76
|
|
|
for k, word in enumerate(words): |
77
|
|
|
try: |
78
|
|
|
new_words.append('{}{} {}{}'.format( |
79
|
|
|
words[k + 1][0], # 2nd word, 1st letter |
80
|
|
|
word[1:], # 1st word, 2nd letter to end |
81
|
|
|
word[0], # 1st word, 1st letter |
82
|
|
|
words[k + 1][1:])) # 2nd word, 2nd letter to end |
83
|
|
|
except IndexError: |
84
|
|
|
continue |
85
|
|
|
return new_words |
86
|
|
|
|
87
|
|
|
|
88
|
|
|
def kniferism(words): |
89
|
|
|
"""Convert a list of words formatted with the kniferism technique. |
90
|
|
|
|
91
|
|
|
:param words (list) - The list of words to operate on |
92
|
|
|
:rtype words (list) - The updated list of words |
93
|
|
|
|
94
|
|
|
>>> kniferism(['foo', 'bar']) |
95
|
|
|
>>> ['fao', 'bor'] |
96
|
|
|
""" |
97
|
|
|
"Mid: f[o]o b[a]r => fao bor" |
|
|
|
|
98
|
|
|
if len(words) < 2: |
99
|
|
|
raise ValueError('Need more than one word to combine') |
100
|
|
|
new_words = [] |
101
|
|
|
for k, word in enumerate(words): |
102
|
|
|
try: |
103
|
|
|
middle_second = int(len(words[k + 1]) / 2) |
104
|
|
|
middle_first = int(len(word) / 2) |
105
|
|
|
new_words.append('{}{}{} {}{}{}'.format( |
106
|
|
|
word[:middle_first], |
107
|
|
|
words[k + 1][middle_second], |
108
|
|
|
word[middle_first + 1:], |
109
|
|
|
words[k + 1][:middle_second], |
110
|
|
|
word[middle_first], |
111
|
|
|
words[k + 1][middle_second + 1:])) |
112
|
|
|
except IndexError: |
113
|
|
|
continue |
114
|
|
|
return new_words |
115
|
|
|
|
116
|
|
|
|
117
|
|
|
def forkerism(words): |
118
|
|
|
"""Convert a list of words formatted with the forkerism technique. |
119
|
|
|
|
120
|
|
|
:param words (list) - The list of words to operate on |
121
|
|
|
:rtype words (list) - The updated list of words |
122
|
|
|
|
123
|
|
|
>>> forkerism(['foo', 'bar']) |
124
|
|
|
>>> ['for', 'bao'] |
125
|
|
|
""" |
126
|
|
|
"Last: fo[o] ba[r] => for bao" |
|
|
|
|
127
|
|
|
if len(words) < 2: |
128
|
|
|
raise ValueError('Need more than one word to combine') |
129
|
|
|
new_words = [] |
130
|
|
|
for k, word in enumerate(words): |
131
|
|
|
try: |
132
|
|
|
s_word = words[k + 1] |
133
|
|
|
s_word_len = len(s_word) |
134
|
|
|
f_word_len = len(word) |
135
|
|
|
f_w_last_letter = word[f_word_len - 1] |
136
|
|
|
s_w_last_letter = words[k + 1][s_word_len - 1] |
137
|
|
|
new_words.append('{}{} {}{}'.format( |
138
|
|
|
word[:f_word_len - 1], # 1st word, 1st letter to last - 1 |
139
|
|
|
s_w_last_letter, # 2nd word, last letter |
140
|
|
|
s_word[:s_word_len - 1], # 2nd word, 1st letter to last - 1 |
141
|
|
|
f_w_last_letter)) # 1st word, last letter |
142
|
|
|
except IndexError: |
143
|
|
|
continue |
144
|
|
|
return new_words |
145
|
|
|
|
146
|
|
|
|
147
|
|
|
def reduplication_ablaut(words, count=1, random=True, vowel='e'): |
148
|
|
|
"""A technique to combine words and altering the vowels. |
149
|
|
|
|
150
|
|
|
See http://phrases.org.uk/meanings/reduplication.html for origination. |
151
|
|
|
|
152
|
|
|
:param words (list): The list of words to operate on. |
153
|
|
|
:param count (int, optional): The number of regex substitutions to make. |
154
|
|
|
:param random (bool, optional): Whether or not to randomize vowel choices. |
155
|
|
|
:param vowel (string, optional): Which vowel to substitue. |
156
|
|
|
If not vowel is available the word |
157
|
|
|
will not change. |
158
|
|
|
|
159
|
|
|
>>> reduplication_ablaut(['cat', 'dog'], vowel='a') |
160
|
|
|
>>> ['dog', 'dag'] |
161
|
|
|
""" |
162
|
|
|
if len(words) < 2: |
163
|
|
|
raise ValueError('Need more than one word to combine') |
164
|
|
|
new_words = [] |
165
|
|
|
substitution = choice(_vowels) if random else vowel |
166
|
|
|
for word in words: |
167
|
|
|
second = re.sub(r'a|e|i|o|u', substitution, word, count=count) |
168
|
|
|
# Only append if the first and second are different. |
169
|
|
|
if word != second: |
170
|
|
|
new_words.append('{} {}'.format(word, second)) |
171
|
|
|
return new_words |
172
|
|
|
|
173
|
|
|
|
174
|
|
|
def prefixify(words): |
175
|
|
|
"""Apply a prefix technique to a set of words. |
176
|
|
|
|
177
|
|
|
:param words (list) - The list of words to operate on. |
178
|
|
|
:rtype new_arr (list): the updated *fixed words |
179
|
|
|
""" |
180
|
|
|
new_arr = [] |
181
|
|
|
for word in words: |
182
|
|
|
if not word: |
183
|
|
|
continue |
184
|
|
|
for prefix in _prefixes: |
185
|
|
|
first_prefix_no_vowel = re.search( |
186
|
|
|
_regexes['no_vowels'], word[0]) |
187
|
|
|
second_prefix_no_vowel = re.search( |
188
|
|
|
_regexes['no_vowels'], prefix[0]) |
189
|
|
|
if first_prefix_no_vowel or second_prefix_no_vowel: |
190
|
|
|
# if there's a vowel at the end of |
191
|
|
|
# prefix but not at the beginning |
192
|
|
|
# of the word (or vice versa) |
193
|
|
|
vowel_beginning = re.search(r'a|e|i|o|u', prefix[-1:]) |
194
|
|
|
vowel_end = re.search(r'^a|e|i|o|u', word[:1]) |
195
|
|
|
if vowel_beginning or vowel_end: |
196
|
|
|
new_arr.append('{}{}'.format(prefix, word)) |
197
|
|
|
return new_arr |
198
|
|
|
|
199
|
|
|
|
200
|
|
|
def suffixify(words): |
201
|
|
|
"""Apply a suffix technique to a set of words. |
202
|
|
|
|
203
|
|
|
:param words (list) - The list of words to operate on. |
204
|
|
|
:rtype new_arr (list): the updated *fixed words |
205
|
|
|
""" |
206
|
|
|
new_arr = [] |
207
|
|
|
for word in words: |
208
|
|
|
if not word: |
209
|
|
|
continue |
210
|
|
|
for suffix in _suffixes: |
211
|
|
|
prefix_start_vowel = re.search(_regexes['all_vowels'], word[0]) |
212
|
|
|
suffix_start_vowel = re.search(_regexes['all_vowels'], suffix[0]) |
213
|
|
|
if prefix_start_vowel or suffix_start_vowel: |
214
|
|
|
if suffix is 'ify': |
215
|
|
|
if word[-1] is 'e': |
216
|
|
|
if word[-2] is not 'i': |
217
|
|
|
new_arr.append('{}{}'.format(word[:-2], suffix)) |
218
|
|
|
else: |
219
|
|
|
new_arr.append('{}{}'.format(word[:-1], suffix)) |
220
|
|
|
new_arr.append(word + suffix) |
221
|
|
|
else: |
222
|
|
|
new_arr.append(word + suffix) |
223
|
|
|
return new_arr |
224
|
|
|
|
225
|
|
|
|
226
|
|
|
def duplifixify(words): |
227
|
|
|
"""Apply a duplifix technique to a set of words (e.g: teeny weeny, etc...). |
228
|
|
|
|
229
|
|
|
:param words (list) - The list of words to operate on. |
230
|
|
|
:rtype new_arr (list): the updated *fixed words |
231
|
|
|
""" |
232
|
|
|
new_arr = [] |
233
|
|
|
for word in words: |
234
|
|
|
if not word: |
235
|
|
|
continue |
236
|
|
|
for letter in _alphabet: |
237
|
|
|
# check if the first letter is NOT the same as the second letter, |
238
|
|
|
# or the combined word is not a duplicate of the first. |
239
|
|
|
duplicate_word = '{}{}'.format(letter, word[1:]) == word |
240
|
|
|
if word[0] is not letter and not duplicate_word: |
241
|
|
|
new_arr.append('{} {}{}'.format(word, letter, word[1:])) |
242
|
|
|
return new_arr |
243
|
|
|
|
244
|
|
|
|
245
|
|
|
def disfixify(words, replaces=1): |
246
|
|
|
"""Apply a disfix technique to a set of words. |
247
|
|
|
|
248
|
|
|
Disfixing is done by removing the first set of vowel-consonant pairs. |
249
|
|
|
|
250
|
|
|
Args: |
251
|
|
|
words (list) - The list of words to operate on. |
252
|
|
|
replaces (int, optional): Number of replacements |
253
|
|
|
to make on this string. |
254
|
|
|
|
255
|
|
|
Returns: |
256
|
|
|
new_arr (list): the updated *fixed words |
257
|
|
|
""" |
258
|
|
|
new_arr = [] |
259
|
|
|
vc_combo = r'[a-zA-Z][aeiou]{1}[qwrtypsdfghjklzxcvbnm]{1}' |
260
|
|
|
for word in words: |
261
|
|
|
if len(re.findall(vc_combo, word)) > 1: |
262
|
|
|
new_arr.append(re.sub(vc_combo, '', word, replaces)) |
263
|
|
|
else: |
264
|
|
|
new_arr.append(word) |
265
|
|
|
return new_arr |
266
|
|
|
|
267
|
|
|
|
268
|
|
|
def infixify(words): |
269
|
|
|
"""Apply a infix technique to a set of words. |
270
|
|
|
|
271
|
|
|
Adds all consonant+vowel pairs to all inner matching vowel+consonant pairs |
272
|
|
|
of a word, giving all combinations for each word. |
273
|
|
|
|
274
|
|
|
Args: |
275
|
|
|
words (list) - The list of words to operate on. |
276
|
|
|
|
277
|
|
|
Returns: |
278
|
|
|
new_arr (list): the updated *fixed words |
279
|
|
|
""" |
280
|
|
|
new_arr = [] |
281
|
|
|
vc_combo_pair = re.compile( |
282
|
|
|
r'[a-zA-Z][aeiou]{1}[qwrtypsdfghjklzxcvbnm]{1}[aeiou]' |
283
|
|
|
'{1}[qwrtypsdfghjklzxcvbnm]{1}') |
284
|
|
|
for word in words: |
285
|
|
|
matches = re.findall(vc_combo_pair, word) |
286
|
|
|
if matches: |
287
|
|
|
for match in matches: |
288
|
|
|
for infix_pair in namebot_settings.CV_TL_PAIRS: |
289
|
|
|
# Get midpoint of this string. |
290
|
|
|
mid = len(match) // 2 |
291
|
|
|
# Get the left and right substrings to join with. |
292
|
|
|
first, second = match[0:mid], match[mid:] |
293
|
|
|
# Check if the infix_pair is the same as start, or end. |
294
|
|
|
bad_matches = [ |
295
|
|
|
# Duplicates joined is bad. |
296
|
|
|
infix_pair == first, infix_pair == second, |
297
|
|
|
# Matching letters on start/end joining substrings |
298
|
|
|
# is bad. |
299
|
|
|
first[-1] == infix_pair[0], |
300
|
|
|
# Matching letters on end/start joining substrings |
301
|
|
|
# is also bad. |
302
|
|
|
first[0] == infix_pair[-1], |
303
|
|
|
] |
304
|
|
|
# Skip bad 'fusings' |
305
|
|
|
if any(bad_matches): |
306
|
|
|
continue |
307
|
|
|
replacer = '{}{}{}'.format(first, infix_pair, second) |
308
|
|
|
new_arr.append(word.replace(match, replacer)) |
309
|
|
|
else: |
310
|
|
|
new_arr.append(word) |
311
|
|
|
return new_arr |
312
|
|
|
|
313
|
|
|
|
314
|
|
|
def simulfixify(words, pairs=None, max=5): |
|
|
|
|
315
|
|
|
"""Generate simulfixed words. |
316
|
|
|
|
317
|
|
|
Args: |
318
|
|
|
words (list) - List of words to operate on. |
319
|
|
|
pairs (list, optional) - Simulfix pairs to use for each word. |
320
|
|
|
If not specified, these will be generated |
321
|
|
|
randomly as vowel + consonant strings. |
322
|
|
|
max (int, optional): The number of simulfix pairs to generate |
323
|
|
|
(if pairs is not specified.) |
324
|
|
|
|
325
|
|
|
Returns: |
326
|
|
|
results (list) - The simulfix version of each word, |
327
|
|
|
for each simulfix pair. |
328
|
|
|
""" |
329
|
|
|
results = [] |
330
|
|
|
if pairs is None: |
331
|
|
|
pairs = ['{}{}'.format(choice(_vowels), choice(_consonants)) |
332
|
|
|
for _ in range(max)] |
333
|
|
|
for word in words: |
334
|
|
|
for combo in pairs: |
335
|
|
|
mid = len(word) // 2 |
336
|
|
|
_word = '{}{}{}'.format(word[0:mid], combo, word[mid:]) |
337
|
|
|
results.append(_word) |
338
|
|
|
return results |
339
|
|
|
|
340
|
|
|
|
341
|
|
|
def palindrome(word): |
342
|
|
|
"""Create a palindrome from a word. |
343
|
|
|
|
344
|
|
|
Args: |
345
|
|
|
word (str): The word. |
346
|
|
|
|
347
|
|
|
Returns: |
348
|
|
|
str: The updated palindrome. |
349
|
|
|
|
350
|
|
|
>>> palindrome('cool') |
351
|
|
|
>>> 'coollooc' |
352
|
|
|
""" |
353
|
|
|
return '{}{}'.format(word, word[::-1]) |
354
|
|
|
|
355
|
|
|
|
356
|
|
|
def palindromes(words): |
357
|
|
|
"""Convert a list of words into their palindromic form. |
358
|
|
|
|
359
|
|
|
Args: |
360
|
|
|
words (list): The words. |
361
|
|
|
|
362
|
|
|
Returns: |
363
|
|
|
list: The list of palindromes. |
364
|
|
|
|
365
|
|
|
>>> palindrome(['cool', 'neat']) |
366
|
|
|
>>> ['coollooc', 'neattaen'] |
367
|
|
|
""" |
368
|
|
|
return [palindrome(word) for word in words] |
369
|
|
|
|
370
|
|
|
|
371
|
|
|
def make_founder_product_name(founder1, founder2, product): |
372
|
|
|
"""Get the name of two people forming a company and combine it. |
373
|
|
|
|
374
|
|
|
Args: |
375
|
|
|
founder1 (str): Your founder name 1. |
376
|
|
|
founder2 (str): Your founder name 2. |
377
|
|
|
product (str): Your product/feature/service name. |
378
|
|
|
|
379
|
|
|
Returns: |
380
|
|
|
str: The updated name. |
381
|
|
|
|
382
|
|
|
>>> make_founder_product_name('chris', 'ella', 'widgets') |
383
|
|
|
>>> 'chris & ella widgets' |
384
|
|
|
""" |
385
|
|
|
return '{} & {} {}'.format( |
386
|
|
|
founder1[0].upper(), |
387
|
|
|
founder2[0].upper(), |
388
|
|
|
product) |
389
|
|
|
|
390
|
|
|
|
391
|
|
|
def make_name_alliteration(words, divider=' '): |
392
|
|
|
"""Make an alliteration with a set of words, if applicable. |
393
|
|
|
|
394
|
|
|
Examples: |
395
|
|
|
java jacket |
396
|
|
|
singing sally |
397
|
|
|
earth engines |
398
|
|
|
...etc |
399
|
|
|
|
400
|
|
|
1. Loop through a given array of words |
401
|
|
|
2. group by words with the same first letter |
402
|
|
|
3. combine them and return to new array |
403
|
|
|
""" |
404
|
|
|
new_arr = [] |
405
|
|
|
words = sorted(words) |
406
|
|
|
|
407
|
|
|
for word1 in words: |
408
|
|
|
for word2 in words: |
409
|
|
|
if word1[:1] is word2[:1] and word1 is not word2: |
410
|
|
|
new_arr.append(word1 + divider + word2) |
411
|
|
|
return new_arr |
412
|
|
|
|
413
|
|
|
|
414
|
|
|
def make_name_abbreviation(words): |
415
|
|
|
"""Will make some kind of company acronym. |
416
|
|
|
|
417
|
|
|
eg: BASF, AT&T, A&W |
418
|
|
|
Returns a single string of the new word combined. |
419
|
|
|
""" |
420
|
|
|
return ''.join([word[:1].upper() for word in words]) |
421
|
|
|
|
422
|
|
|
|
423
|
|
|
def make_vowel(words, vowel_type, vowel_index): |
424
|
|
|
"""Primary for all Portmanteau generators. |
425
|
|
|
|
426
|
|
|
This creates the portmanteau based on :vowel_index, and :vowel_type. |
427
|
|
|
|
428
|
|
|
The algorithm works as following: |
429
|
|
|
|
430
|
|
|
It looks for the first occurrence of a specified vowel in the first word, |
431
|
|
|
then gets the matching occurrence (if any) of the second word, |
432
|
|
|
then determines which should be first or second position, based on |
433
|
|
|
the ratio of letters (for each word) divided by the position of the vowel |
434
|
|
|
in question (e.g. c[a]t (2/3) vs. cr[a]te (3/5)). |
435
|
|
|
|
436
|
|
|
The higher number is ordered first, and the two words are then fused |
437
|
|
|
together by the single matching vowel. |
438
|
|
|
""" |
439
|
|
|
new_arr = [] |
440
|
|
|
for i in words: |
441
|
|
|
for j in words: |
442
|
|
|
is_match_i = re.search(vowel_type, i) |
443
|
|
|
is_match_j = re.search(vowel_type, j) |
444
|
|
|
if i is not j and is_match_i and is_match_j: |
445
|
|
|
# get the indices and lengths to use in finding the ratio |
446
|
|
|
pos_i = i.index(vowel_index) |
447
|
|
|
len_i = len(i) |
448
|
|
|
pos_j = j.index(vowel_index) |
449
|
|
|
len_j = len(j) |
450
|
|
|
|
451
|
|
|
# If starting index is 0, |
452
|
|
|
# add 1 to it so we're not dividing by zero |
453
|
|
|
if pos_i is 0: |
454
|
|
|
pos_i = 1 |
455
|
|
|
if pos_j is 0: |
456
|
|
|
pos_j = 1 |
457
|
|
|
|
458
|
|
|
# Decide which word should be the |
459
|
|
|
# prefix and which should be suffix |
460
|
|
|
if round(pos_i / len_i) > round(pos_j / len_j): |
461
|
|
|
p = i[0: pos_i + 1] |
462
|
|
|
p2 = j[pos_j: len(j)] |
463
|
|
|
if len(p) + len(p2) > 2: |
464
|
|
|
if re.search( |
465
|
|
|
_regexes['all_vowels'], p) or re.search( |
466
|
|
|
_regexes['all_vowels'], p2): |
467
|
|
|
if p[-1] is p2[0]: |
|
|
|
|
468
|
|
|
new_arr.append(p[:-1] + p2) |
|
|
|
|
469
|
|
|
else: |
|
|
|
|
470
|
|
|
new_arr.append(p + p2) |
|
|
|
|
471
|
|
|
return new_arr |
472
|
|
|
|
473
|
|
|
|
474
|
|
|
def make_portmanteau_default_vowel(words): |
475
|
|
|
"""Make a portmanteau based on vowel matches. |
476
|
|
|
|
477
|
|
|
E.g. (ala Brad+Angelina = Brangelina) |
478
|
|
|
Only matches for second to last letter |
479
|
|
|
in first word and matching vowel in second word. |
480
|
|
|
|
481
|
|
|
This defers to the make_vowel function for all the internal |
482
|
|
|
magic, but is a helper in that it provides all types of vowel |
483
|
|
|
combinations in one function. |
484
|
|
|
""" |
485
|
|
|
new_arr = [] |
486
|
|
|
vowel_a_re = re.compile(r'a{1}') |
487
|
|
|
vowel_e_re = re.compile(r'e{1}') |
488
|
|
|
vowel_i_re = re.compile(r'i{1}') |
489
|
|
|
vowel_o_re = re.compile(r'o{1}') |
490
|
|
|
vowel_u_re = re.compile(r'u{1}') |
491
|
|
|
|
492
|
|
|
new_arr += make_vowel(words, vowel_a_re, 'a') |
493
|
|
|
new_arr += make_vowel(words, vowel_e_re, 'e') |
494
|
|
|
new_arr += make_vowel(words, vowel_i_re, 'i') |
495
|
|
|
new_arr += make_vowel(words, vowel_o_re, 'o') |
496
|
|
|
new_arr += make_vowel(words, vowel_u_re, 'u') |
497
|
|
|
return new_arr |
498
|
|
|
|
499
|
|
|
|
500
|
|
|
def make_portmanteau_split(words): |
501
|
|
|
"""Make a portmeanteau, split by vowel/consonant combos. |
502
|
|
|
|
503
|
|
|
Based on the word formation of nikon: [ni]pp[on] go[k]aku, |
504
|
|
|
which is comprised of Nippon + Gokaku. |
505
|
|
|
|
506
|
|
|
We get the first C+V in the first word, |
507
|
|
|
then last V+C in the first word, |
508
|
|
|
then all C in the second word. |
509
|
|
|
""" |
510
|
|
|
new_arr = [] |
511
|
|
|
for i in words: |
512
|
|
|
for j in words: |
513
|
|
|
if i is not j: |
|
|
|
|
514
|
|
|
l1 = re.search(r'[^a|e|i|o|u{1}]+[a|e|i|o|u{1}]', i) |
|
|
|
|
515
|
|
|
l2 = re.search(r'[a|e|i|o|u{1}]+[^a|e|i|o|u{1}]$', j) |
|
|
|
|
516
|
|
|
if i and l1 and l2: |
|
|
|
|
517
|
|
|
# Third letter used for |
518
|
|
|
# consonant middle splits only |
519
|
|
|
l3 = re.split(r'[a|e|i|o|u{1}]', i) |
|
|
|
|
520
|
|
|
l1 = l1.group(0) |
|
|
|
|
521
|
|
|
l2 = l2.group(0) |
|
|
|
|
522
|
|
|
if l3 and len(l3) > 0: |
|
|
|
|
523
|
|
|
for v in l3: |
|
|
|
|
524
|
|
|
new_arr.append(l1 + v + l2) |
|
|
|
|
525
|
|
|
else: |
|
|
|
|
526
|
|
|
new_arr.append('{}{}{}'.format(l1, 't', l2)) |
|
|
|
|
527
|
|
|
new_arr.append('{}{}{}'.format(l1, 's', l2)) |
|
|
|
|
528
|
|
|
new_arr.append('{}{}{}'.format(l1, 'z', l2)) |
|
|
|
|
529
|
|
|
new_arr.append('{}{}{}'.format(l1, 'x', l2)) |
|
|
|
|
530
|
|
|
return new_arr |
531
|
|
|
|
532
|
|
|
|
533
|
|
|
def make_punctuator(words, replace): |
534
|
|
|
"""Put some hyphens or dots, or a given punctutation. |
535
|
|
|
|
536
|
|
|
Works via :replace in the word, but only around vowels ala "del.ic.ious" |
537
|
|
|
""" |
538
|
|
|
def _replace(words, replace, replace_type='.'): |
|
|
|
|
539
|
|
|
return [word.replace( |
540
|
|
|
replace, replace + replace_type) for word in words] |
541
|
|
|
|
542
|
|
|
hyphens = _replace(words, replace, replace_type='-') |
543
|
|
|
periods = _replace(words, replace) |
544
|
|
|
return hyphens + periods |
545
|
|
|
|
546
|
|
|
|
547
|
|
|
def make_punctuator_vowels(words): |
548
|
|
|
"""Helper function that combines all possible combinations for vowels.""" |
549
|
|
|
new_words = [] |
550
|
|
|
new_words += make_punctuator(words, 'a') |
551
|
|
|
new_words += make_punctuator(words, 'e') |
552
|
|
|
new_words += make_punctuator(words, 'i') |
553
|
|
|
new_words += make_punctuator(words, 'o') |
554
|
|
|
new_words += make_punctuator(words, 'u') |
555
|
|
|
return new_words |
556
|
|
|
|
557
|
|
|
|
558
|
|
|
def make_vowelify(words): |
559
|
|
|
"""Chop off consonant ala nautica if second to last letter is a vowel.""" |
560
|
|
|
new_arr = [] |
561
|
|
|
for word in words: |
562
|
|
|
if re.search(_regexes['all_vowels'], word[:-2]): |
563
|
|
|
new_arr.append(word[:-1]) |
564
|
|
|
return new_arr |
565
|
|
|
|
566
|
|
|
|
567
|
|
|
def make_misspelling(words): |
568
|
|
|
"""Misspell a word in numerous ways, to create interesting results.""" |
569
|
|
|
token_groups = ( |
570
|
|
|
('ics', 'ix'), |
571
|
|
|
('ph', 'f'), |
572
|
|
|
('kew', 'cue'), |
573
|
|
|
('f', 'ph'), |
574
|
|
|
('o', 'ough'), |
575
|
|
|
# these seem to have |
576
|
|
|
# sucked in practice |
577
|
|
|
('o', 'off'), |
578
|
|
|
('ow', 'o'), |
579
|
|
|
('x', 'ecks'), |
580
|
|
|
('za', 'xa'), |
581
|
|
|
('xa', 'za'), |
582
|
|
|
('ze', 'xe'), |
583
|
|
|
('xe', 'ze'), |
584
|
|
|
('zi', 'xi'), |
585
|
|
|
('xi', 'zi'), |
586
|
|
|
('zo', 'xo'), |
587
|
|
|
('xo', 'zo'), |
588
|
|
|
('zu', 'xu'), |
589
|
|
|
('xu', 'zu'), |
590
|
|
|
# number based |
591
|
|
|
('one', '1'), |
592
|
|
|
('1', 'one'), |
593
|
|
|
('two', '2'), |
594
|
|
|
('2', 'two'), |
595
|
|
|
('three', '3'), |
596
|
|
|
('3', 'three'), |
597
|
|
|
('four', '4'), |
598
|
|
|
('4', 'four'), |
599
|
|
|
('five', '5'), |
600
|
|
|
('5', 'five'), |
601
|
|
|
('six', '6'), |
602
|
|
|
('6', 'six'), |
603
|
|
|
('seven', '7'), |
604
|
|
|
('7', 'seven'), |
605
|
|
|
('eight', '8'), |
606
|
|
|
('8', 'eight'), |
607
|
|
|
('nine', '9'), |
608
|
|
|
('9', 'nine'), |
609
|
|
|
('ten', '10'), |
610
|
|
|
('10', 'ten'), |
611
|
|
|
('ecks', 'x'), |
612
|
|
|
('spir', 'speer'), |
613
|
|
|
('speer', 'spir'), |
614
|
|
|
('x', 'ex'), |
615
|
|
|
('on', 'awn'), |
616
|
|
|
('ow', 'owoo'), |
617
|
|
|
('awn', 'on'), |
618
|
|
|
('awf', 'off'), |
619
|
|
|
('s', 'z'), |
620
|
|
|
('ce', 'ze'), |
621
|
|
|
('ss', 'zz'), |
622
|
|
|
('ku', 'koo'), |
623
|
|
|
('trate', 'trait'), |
624
|
|
|
('trait', 'trate'), |
625
|
|
|
('ance', 'anz'), |
626
|
|
|
('il', 'yll'), |
627
|
|
|
('ice', 'ize'), |
628
|
|
|
('chr', 'kr'), |
629
|
|
|
# These should only be at end of word! |
630
|
|
|
('er', 'r'), |
631
|
|
|
('lee', 'ly'), |
632
|
|
|
) |
633
|
|
|
new_arr = [] |
634
|
|
|
for word in words: |
635
|
|
|
for tokens in token_groups: |
636
|
|
|
new_arr.append(word.replace(*tokens)) |
637
|
|
|
return normalization.uniquify(new_arr) |
638
|
|
|
|
639
|
|
|
|
640
|
|
|
def _pig_latinize(word, postfix='ay'): |
641
|
|
|
"""Generate standard pig latin style, with optional postfix argument.""" |
642
|
|
|
# Common postfixes: ['ay', 'yay', 'way'] |
643
|
|
|
if not type(postfix) is str: |
644
|
|
|
raise TypeError('Must use a string for postfix.') |
645
|
|
|
|
646
|
|
|
piggified = None |
647
|
|
|
|
648
|
|
|
vowel_re = re.compile(r'(a|e|i|o|u)') |
649
|
|
|
first_letter = word[0:1] |
650
|
|
|
|
651
|
|
|
# clean up non letters |
652
|
|
|
word = word.replace(r'[^a-zA-Z]', '') |
653
|
|
|
|
654
|
|
|
if vowel_re.match(first_letter): |
655
|
|
|
piggified = word + 'way' |
656
|
|
|
else: |
657
|
|
|
piggified = ''.join([word[1: len(word)], first_letter, postfix]) |
658
|
|
|
return piggified |
659
|
|
|
|
660
|
|
|
|
661
|
|
|
def pig_latinize(words, postfix='ay'): |
662
|
|
|
"""Pig latinize a set of words. |
663
|
|
|
|
664
|
|
|
Args: |
665
|
|
|
words (list): A list of words. |
666
|
|
|
postfix (str, optional): A postfix to use. Default is `ay`. |
667
|
|
|
|
668
|
|
|
Returns: |
669
|
|
|
words (list): The updated list. |
670
|
|
|
|
671
|
|
|
""" |
672
|
|
|
return [_pig_latinize(word, postfix=postfix) for word in words] |
673
|
|
|
|
674
|
|
|
|
675
|
|
|
def acronym_lastname(description, lastname): |
676
|
|
|
"""Create an acronym plus the last name. |
677
|
|
|
|
678
|
|
|
Inspiration: ALFA Romeo. |
679
|
|
|
""" |
680
|
|
|
desc = ''.join([word[0].upper() for word |
681
|
|
|
in normalization.remove_stop_words(description.split(' '))]) |
682
|
|
|
return '{} {}'.format(desc, lastname) |
683
|
|
|
|
684
|
|
|
|
685
|
|
|
def get_descriptors(words): |
686
|
|
|
"""Group words by their NLTK part-of-speech descriptors. |
687
|
|
|
|
688
|
|
|
Use NLTK to first grab tokens by looping through words, |
689
|
|
|
then tag part-of-speech (in isolation) |
690
|
|
|
and provide a dictionary with a list of each type |
691
|
|
|
for later retrieval and usage. |
692
|
|
|
""" |
693
|
|
|
descriptors = defaultdict(list) |
694
|
|
|
tokens = nltk.word_tokenize(' '.join(words)) |
695
|
|
|
parts = nltk.pos_tag(tokens) |
696
|
|
|
# Then, push the word into the matching type |
697
|
|
|
for part in parts: |
698
|
|
|
descriptors[part[1]].append(part[0]) |
699
|
|
|
return descriptors |
700
|
|
|
|
701
|
|
|
|
702
|
|
|
def _add_pos_subtypes(nouns, verbs): |
703
|
|
|
"""Combine alternating verbs and nouns into a new list. |
704
|
|
|
|
705
|
|
|
Args: |
706
|
|
|
nouns (list) - List of nouns, noun phrases, etc... |
707
|
|
|
verbs (list) - List of verbs, verb phrases, etc... |
708
|
|
|
|
709
|
|
|
Returns: |
710
|
|
|
words (list) - The newly combined list |
711
|
|
|
""" |
712
|
|
|
words = [] |
713
|
|
|
try: |
714
|
|
|
for noun in nouns: |
715
|
|
|
for verb in verbs: |
716
|
|
|
words.append('{} {}'.format(noun, verb)) |
717
|
|
|
words.append('{} {}'.format(verb, noun)) |
718
|
|
|
except KeyError: |
|
|
|
|
719
|
|
|
pass |
720
|
|
|
return words |
721
|
|
|
|
722
|
|
|
|
723
|
|
|
def _create_pos_subtypes(words): |
724
|
|
|
"""Check part-of-speech tags for a noun-phrase, adding combinations if so. |
725
|
|
|
|
726
|
|
|
If it exists, add combinations with noun-phrase + verb-phrase, |
727
|
|
|
noun-phrase + verb, and noun-phrase + adverb, |
728
|
|
|
for each pos type that exists. |
729
|
|
|
|
730
|
|
|
:param words (list) - List of verbs, verb phrases, etc... |
731
|
|
|
:rtype new_words (list) - The newly combined list |
732
|
|
|
""" |
733
|
|
|
new_words = [] |
734
|
|
|
types = words.keys() |
735
|
|
|
if 'NNP' in types: |
736
|
|
|
if 'VBP' in types: |
737
|
|
|
new_words += _add_pos_subtypes(words['NNP'], words['VBP']) |
738
|
|
|
if 'VB' in types: |
739
|
|
|
new_words += _add_pos_subtypes(words['NNP'], words['VB']) |
740
|
|
|
if 'RB' in types: |
741
|
|
|
new_words += _add_pos_subtypes(words['NNP'], words['RB']) |
742
|
|
|
return new_words |
743
|
|
|
|
744
|
|
|
|
745
|
|
|
def make_descriptors(words): |
746
|
|
|
"""Make descriptor names. |
747
|
|
|
|
748
|
|
|
Based from a verb + noun, adjective + noun combination. |
749
|
|
|
Examples: |
750
|
|
|
-Pop Cap, |
751
|
|
|
-Big Fish, |
752
|
|
|
-Red Fin, |
753
|
|
|
-Cold Water (grill), etc... |
754
|
|
|
Combines VBP/VB/RB, with NN/NNS |
755
|
|
|
""" |
756
|
|
|
return list(set(_create_pos_subtypes(words))) |
757
|
|
|
|
758
|
|
|
|
759
|
|
|
def all_prefix_first_vowel(word, letters=list(ascii_uppercase)): |
|
|
|
|
760
|
|
|
"""Find the first vowel in a word and prefixes with consonants. |
761
|
|
|
|
762
|
|
|
:param word (str) - the word to update |
763
|
|
|
:param letters (list) - the letters to use for prefixing. |
764
|
|
|
:rtype words (list) - All prefixed words |
765
|
|
|
""" |
766
|
|
|
re_vowels = re.compile(r'[aeiouy]') |
767
|
|
|
matches = re.search(re_vowels, word) |
768
|
|
|
if matches is None: |
769
|
|
|
return [word] |
770
|
|
|
words = [] |
771
|
|
|
vowels = ['A', 'E', 'I', 'O', 'U'] |
772
|
|
|
first_match = matches.start(0) |
773
|
|
|
for letter in letters: |
774
|
|
|
if letter not in vowels: |
775
|
|
|
# If beginning letter is a vowel, don't offset the index |
776
|
|
|
if first_match == 0: |
777
|
|
|
words.append('{}{}'.format(letter, word)) |
778
|
|
|
else: |
779
|
|
|
words.append('{}{}'.format(letter, word[first_match:])) |
780
|
|
|
return words |
781
|
|
|
|
782
|
|
|
|
783
|
|
|
def recycle(words, func, times=2): |
784
|
|
|
"""Run a set of words applied to a function repeatedly. |
785
|
|
|
|
786
|
|
|
It will re-run with the last output as the new input. |
787
|
|
|
`words` must be a list, and `func` must return a list. |
788
|
|
|
|
789
|
|
|
:param words (list): The list of words. |
790
|
|
|
:param func (function): A function to recycle. |
791
|
|
|
This function must take a single argument, |
792
|
|
|
a list of strings. |
793
|
|
|
:param times (int, optional): The number of times to call the function. |
794
|
|
|
""" |
795
|
|
|
if times > 0: |
796
|
|
|
return recycle(func(words), func, times - 1) |
797
|
|
|
return words |
798
|
|
|
|
799
|
|
|
|
800
|
|
|
def backronym(acronym, theme, max_attempts=10): |
801
|
|
|
"""Attempt to generate a backronym based on a given acronym and theme. |
802
|
|
|
|
803
|
|
|
:param acronym (str): The starting acronym. |
804
|
|
|
:param theme (str): The seed word to base other words off of. |
805
|
|
|
:param max_attempts (int, optional): The number of attempts before failing. |
806
|
|
|
:rtype dict: The result dictionary. If a backronym was successfully |
807
|
|
|
generated, the `success` key will be True, otherwise False. |
808
|
|
|
""" |
809
|
|
|
ret = { |
810
|
|
|
'acronym': '.'.join(list(acronym)).upper(), |
811
|
|
|
'backronym': '', |
812
|
|
|
'words': [], |
813
|
|
|
'success_ratio': 0.0, |
814
|
|
|
'success': False |
815
|
|
|
} |
816
|
|
|
if not acronym or not theme: |
817
|
|
|
return ret |
818
|
|
|
all_words = set() |
819
|
|
|
words = nlp._get_synset_words(theme) |
|
|
|
|
820
|
|
|
_backronym = [] |
821
|
|
|
acronym = acronym.lower() |
822
|
|
|
# Add words if they contain the same first letter |
823
|
|
|
# as any in the given acronym. |
824
|
|
|
cur_step = 0 |
825
|
|
|
while len(_backronym) < len(acronym) or cur_step < max_attempts: |
826
|
|
|
all_words.update(words) |
827
|
|
|
for word in words: |
828
|
|
|
if word[0].lower() in acronym: |
829
|
|
|
if '_' in word: |
830
|
|
|
# Don't add multi-word strings, but don't leave it blank. |
831
|
|
|
_backronym.append(word[0]) |
832
|
|
|
else: |
833
|
|
|
_backronym.append(word) |
834
|
|
|
sdict = {} |
835
|
|
|
# Sort the word in order of the acronyms |
836
|
|
|
# letters by re-arranging indices. |
837
|
|
|
for word in _backronym: |
838
|
|
|
try: |
839
|
|
|
index = acronym.index(word[0].lower()) |
840
|
|
|
sdict[index] = word |
841
|
|
|
except IndexError: |
842
|
|
|
continue |
843
|
|
|
cur_step += 1 |
844
|
|
|
# Refresh words for next attempt. |
845
|
|
|
words = nlp._get_synset_words(theme) |
|
|
|
|
846
|
|
|
# Try again if no words existed. |
847
|
|
|
if not words: |
848
|
|
|
continue |
849
|
|
|
# Get new theme, similar to originating theme. |
850
|
|
|
theme = words[0] |
851
|
|
|
vals = sdict.values() |
852
|
|
|
ret.update({ |
853
|
|
|
'backronym': ' '.join(vals).upper(), |
854
|
|
|
'words': vals, |
855
|
|
|
'success_ratio': float(len(vals)) / float(len(acronym)), |
856
|
|
|
'success': len(vals) == len(acronym) |
857
|
|
|
}) |
858
|
|
|
return ret |
859
|
|
|
|
860
|
|
|
|
861
|
|
|
def super_scrub(data): |
862
|
|
|
"""Run words through a comprehensive list of filtering functions. |
863
|
|
|
|
864
|
|
|
Expects a dictionary with key "words" |
865
|
|
|
""" |
866
|
|
|
for technique in data['words']: |
867
|
|
|
data['words'][technique] = normalization.uniquify( |
868
|
|
|
normalization.remove_odd_sounding_words( |
869
|
|
|
normalization.clean_sort( |
870
|
|
|
data['words'][technique]))) |
871
|
|
|
return data |
872
|
|
|
|
873
|
|
|
|
874
|
|
|
def generate_all_techniques(words): |
875
|
|
|
"""Generate all techniques across the library in one place.""" |
876
|
|
|
data = { |
877
|
|
|
'words': { |
878
|
|
|
'alliterations': make_name_alliteration(words), |
879
|
|
|
'portmanteau': make_portmanteau_default_vowel(words), |
880
|
|
|
'vowels': make_vowelify(words), |
881
|
|
|
'suffix': suffixify(words), |
882
|
|
|
'prefix': prefixify(words), |
883
|
|
|
'duplifix': duplifixify(words), |
884
|
|
|
'disfix': disfixify(words), |
885
|
|
|
'infix': infixify(words), |
886
|
|
|
'simulfix': simulfixify(words), |
887
|
|
|
'founder_product_name': make_founder_product_name( |
888
|
|
|
'Lindsey', 'Chris', 'Widgets'), |
889
|
|
|
'punctuator': make_punctuator_vowels(words), |
890
|
|
|
'name_abbreviation': make_name_abbreviation(words), |
891
|
|
|
'make_portmanteau_split': make_portmanteau_split(words), |
892
|
|
|
'forkerism': forkerism(words), |
893
|
|
|
'kniferism': kniferism(words), |
894
|
|
|
'spoonerism': spoonerism(words), |
895
|
|
|
'palindrome': palindromes(words), |
896
|
|
|
'reduplication_ablaut': reduplication_ablaut(words), |
897
|
|
|
'misspelling': make_misspelling(words), |
898
|
|
|
'descriptors': make_descriptors( |
899
|
|
|
get_descriptors(words)) |
900
|
|
|
} |
901
|
|
|
} |
902
|
|
|
return super_scrub(data) |
903
|
|
|
|
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.