1
|
|
|
"""Primary techniques for the core functionality of namebot.""" |
2
|
|
|
|
3
|
|
|
from __future__ import absolute_import |
4
|
|
|
from __future__ import division |
5
|
|
|
|
6
|
|
|
import re |
7
|
|
|
from collections import defaultdict |
8
|
|
|
from random import choice |
9
|
|
|
from string import ascii_uppercase |
10
|
|
|
|
11
|
|
|
import nltk |
|
|
|
|
12
|
|
|
|
13
|
|
|
from . import nlp |
14
|
|
|
from . import normalization |
15
|
|
|
from . import settings as namebot_settings |
16
|
|
|
|
17
|
|
|
|
18
|
|
|
_prefixes = namebot_settings.PREFIXES |
19
|
|
|
_suffixes = namebot_settings.SUFFIXES |
20
|
|
|
_alphabet = namebot_settings.ALPHABET |
21
|
|
|
_consonants = namebot_settings.CONSONANTS |
22
|
|
|
_vowels = namebot_settings.VOWELS |
23
|
|
|
_regexes = namebot_settings.regexes |
24
|
|
|
|
25
|
|
|
|
26
|
|
|
def domainify(words, tld='com'): |
27
|
|
|
"""Convert words into a domain format for testing domains. |
28
|
|
|
|
29
|
|
|
Args: |
30
|
|
|
words (list): List of words |
31
|
|
|
tld (str, optional): The TLD (top-level domain) to use. |
32
|
|
|
|
33
|
|
|
Returns: |
34
|
|
|
list: The modified list of words. |
35
|
|
|
|
36
|
|
|
>>> domanify(['radio'], tld='.io') |
37
|
|
|
>>> ['rad.io'] |
38
|
|
|
""" |
39
|
|
|
_words = [] |
40
|
|
|
if tld.startswith('.'): |
41
|
|
|
tld = tld.replace('.', '') |
42
|
|
|
for word in words: |
43
|
|
|
if word.endswith(tld) and tld != '': |
44
|
|
|
word = word.replace(tld, '.{}'.format(tld)) |
45
|
|
|
_words.append(word) |
46
|
|
|
return _words |
47
|
|
|
|
48
|
|
|
|
49
|
|
|
def spoonerism(words): |
50
|
|
|
"""Convert a list of words formatted with the spoonerism technique. |
51
|
|
|
|
52
|
|
|
Args: |
53
|
|
|
words (list) - The list of words to operate on |
54
|
|
|
|
55
|
|
|
Returns: |
56
|
|
|
words (list) - The updated list of words |
57
|
|
|
|
58
|
|
|
>>> spoonerism(['foo', 'bar']) |
59
|
|
|
>>> ['boo', 'far'] |
60
|
|
|
""" |
61
|
|
|
"First: [f]oo [b]ar => boo far" |
|
|
|
|
62
|
|
|
new_words = [] |
63
|
|
|
if len(words) < 2: |
64
|
|
|
raise ValueError('Need more than one word to combine') |
65
|
|
|
for k, word in enumerate(words): |
66
|
|
|
try: |
67
|
|
|
new_words.append('{}{} {}{}'.format( |
68
|
|
|
words[k + 1][0], # 2nd word, 1st letter |
69
|
|
|
word[1:], # 1st word, 2nd letter to end |
70
|
|
|
word[0], # 1st word, 1st letter |
71
|
|
|
words[k + 1][1:])) # 2nd word, 2nd letter to end |
72
|
|
|
except IndexError: |
73
|
|
|
continue |
74
|
|
|
return new_words |
75
|
|
|
|
76
|
|
|
|
77
|
|
|
def kniferism(words): |
78
|
|
|
"""Convert a list of words formatted with the kniferism technique. |
79
|
|
|
|
80
|
|
|
Args: |
81
|
|
|
words (list) - The list of words to operate on |
82
|
|
|
|
83
|
|
|
Returns: |
84
|
|
|
words (list) - The updated list of words |
85
|
|
|
|
86
|
|
|
>>> kniferism(['foo', 'bar']) |
87
|
|
|
>>> ['fao', 'bor'] |
88
|
|
|
""" |
89
|
|
|
"Mid: f[o]o b[a]r => fao bor" |
|
|
|
|
90
|
|
|
if len(words) < 2: |
91
|
|
|
raise ValueError('Need more than one word to combine') |
92
|
|
|
new_words = [] |
93
|
|
|
for k, word in enumerate(words): |
94
|
|
|
try: |
95
|
|
|
middle_second = int(len(words[k + 1]) / 2) |
96
|
|
|
middle_first = int(len(word) / 2) |
97
|
|
|
new_words.append('{}{}{} {}{}{}'.format( |
98
|
|
|
word[:middle_first], |
99
|
|
|
words[k + 1][middle_second], |
100
|
|
|
word[middle_first + 1:], |
101
|
|
|
words[k + 1][:middle_second], |
102
|
|
|
word[middle_first], |
103
|
|
|
words[k + 1][middle_second + 1:])) |
104
|
|
|
except IndexError: |
105
|
|
|
continue |
106
|
|
|
return new_words |
107
|
|
|
|
108
|
|
|
|
109
|
|
|
def forkerism(words): |
110
|
|
|
"""Convert a list of words formatted with the forkerism technique. |
111
|
|
|
|
112
|
|
|
Args: |
113
|
|
|
words (list) - The list of words to operate on |
114
|
|
|
|
115
|
|
|
Returns: |
116
|
|
|
words (list) - The updated list of words |
117
|
|
|
|
118
|
|
|
>>> forkerism(['foo', 'bar']) |
119
|
|
|
>>> ['for', 'bao'] |
120
|
|
|
""" |
121
|
|
|
"Last: fo[o] ba[r] => for bao" |
|
|
|
|
122
|
|
|
if len(words) < 2: |
123
|
|
|
raise ValueError('Need more than one word to combine') |
124
|
|
|
new_words = [] |
125
|
|
|
for k, word in enumerate(words): |
126
|
|
|
try: |
127
|
|
|
s_word = words[k + 1] |
128
|
|
|
s_word_len = len(s_word) |
129
|
|
|
f_word_len = len(word) |
130
|
|
|
f_w_last_letter = word[f_word_len - 1] |
131
|
|
|
s_w_last_letter = words[k + 1][s_word_len - 1] |
132
|
|
|
new_words.append('{}{} {}{}'.format( |
133
|
|
|
word[:f_word_len - 1], # 1st word, 1st letter to last - 1 |
134
|
|
|
s_w_last_letter, # 2nd word, last letter |
135
|
|
|
s_word[:s_word_len - 1], # 2nd word, 1st letter to last - 1 |
136
|
|
|
f_w_last_letter)) # 1st word, last letter |
137
|
|
|
except IndexError: |
138
|
|
|
continue |
139
|
|
|
return new_words |
140
|
|
|
|
141
|
|
|
|
142
|
|
|
def reduplication_ablaut(words, count=1, random=True, vowel='e'): |
143
|
|
|
"""A technique to combine words and altering the vowels. |
144
|
|
|
|
145
|
|
|
e.g ch[i]t-ch[a]t, d[i]lly, d[a]lly. |
146
|
|
|
See http://phrases.org.uk/meanings/reduplication.html. |
147
|
|
|
""" |
148
|
|
|
if len(words) < 2: |
149
|
|
|
raise ValueError('Need more than one word to combine') |
150
|
|
|
new_words = [] |
151
|
|
|
substitution = choice(_vowels) if random else vowel |
152
|
|
|
for word in words: |
153
|
|
|
second = re.sub(r'a|e|i|o|u', substitution, word, count=count) |
154
|
|
|
# Only append if the first and second are different. |
155
|
|
|
if word != second: |
156
|
|
|
new_words.append('{} {}'.format(word, second)) |
157
|
|
|
return new_words |
158
|
|
|
|
159
|
|
|
|
160
|
|
|
def prefixify(words): |
161
|
|
|
"""Apply a prefix technique to a set of words. |
162
|
|
|
|
163
|
|
|
Args: |
164
|
|
|
words (list) - The list of words to operate on. |
165
|
|
|
|
166
|
|
|
Returns: |
167
|
|
|
new_arr (list): the updated *fixed words |
168
|
|
|
""" |
169
|
|
|
new_arr = [] |
170
|
|
|
for word in words: |
171
|
|
|
if not word: |
172
|
|
|
continue |
173
|
|
|
for prefix in _prefixes: |
174
|
|
|
first_prefix_no_vowel = re.search( |
175
|
|
|
_regexes['no_vowels'], word[0]) |
176
|
|
|
second_prefix_no_vowel = re.search( |
177
|
|
|
_regexes['no_vowels'], prefix[0]) |
178
|
|
|
if first_prefix_no_vowel or second_prefix_no_vowel: |
179
|
|
|
# if there's a vowel at the end of |
180
|
|
|
# prefix but not at the beginning |
181
|
|
|
# of the word (or vice versa) |
182
|
|
|
vowel_beginning = re.search(r'a|e|i|o|u', prefix[-1:]) |
183
|
|
|
vowel_end = re.search(r'^a|e|i|o|u', word[:1]) |
184
|
|
|
if vowel_beginning or vowel_end: |
185
|
|
|
new_arr.append('{}{}'.format(prefix, word)) |
186
|
|
|
return new_arr |
187
|
|
|
|
188
|
|
|
|
189
|
|
|
def suffixify(words): |
190
|
|
|
"""Apply a suffix technique to a set of words. |
191
|
|
|
|
192
|
|
|
Args: |
193
|
|
|
words (list) - The list of words to operate on. |
194
|
|
|
(e.g -> chard + ard = chardard -> chard) |
195
|
|
|
|
196
|
|
|
Returns: |
197
|
|
|
new_arr (list): the updated *fixed words |
198
|
|
|
""" |
199
|
|
|
new_arr = [] |
200
|
|
|
for word in words: |
201
|
|
|
if not word: |
202
|
|
|
continue |
203
|
|
|
for suffix in _suffixes: |
204
|
|
|
prefix_start_vowel = re.search(_regexes['all_vowels'], word[0]) |
205
|
|
|
suffix_start_vowel = re.search(_regexes['all_vowels'], suffix[0]) |
206
|
|
|
if prefix_start_vowel or suffix_start_vowel: |
207
|
|
|
if suffix is 'ify': |
208
|
|
|
if word[-1] is 'e': |
209
|
|
|
if word[-2] is not 'i': |
210
|
|
|
new_arr.append('{}{}'.format(word[:-2], suffix)) |
211
|
|
|
else: |
212
|
|
|
new_arr.append('{}{}'.format(word[:-1], suffix)) |
213
|
|
|
new_arr.append(word + suffix) |
214
|
|
|
else: |
215
|
|
|
new_arr.append(word + suffix) |
216
|
|
|
return new_arr |
217
|
|
|
|
218
|
|
|
|
219
|
|
|
def duplifixify(words): |
220
|
|
|
"""Apply a duplifix technique to a set of words (e.g: teeny weeny, etc...). |
221
|
|
|
|
222
|
|
|
Args: |
223
|
|
|
words (list) - The list of words to operate on. |
224
|
|
|
|
225
|
|
|
Returns: |
226
|
|
|
new_arr (list): the updated *fixed words |
227
|
|
|
""" |
228
|
|
|
new_arr = [] |
229
|
|
|
for word in words: |
230
|
|
|
if not word: |
231
|
|
|
continue |
232
|
|
|
for letter in _alphabet: |
233
|
|
|
# check if the first letter is NOT the same as the second letter, |
234
|
|
|
# or the combined word is not a duplicate of the first. |
235
|
|
|
duplicate_word = '{}{}'.format(letter, word[1:]) == word |
236
|
|
|
if word[0] is not letter and not duplicate_word: |
237
|
|
|
new_arr.append('{} {}{}'.format(word, letter, word[1:])) |
238
|
|
|
return new_arr |
239
|
|
|
|
240
|
|
|
|
241
|
|
|
def disfixify(words, replaces=1): |
242
|
|
|
"""Apply a disfix technique to a set of words. |
243
|
|
|
|
244
|
|
|
Disfixing is done by removing the first set of vowel-consonant pairs. |
245
|
|
|
|
246
|
|
|
Args: |
247
|
|
|
words (list) - The list of words to operate on. |
248
|
|
|
replaces (int, optional): Number of replacements |
249
|
|
|
to make on this string. |
250
|
|
|
|
251
|
|
|
Returns: |
252
|
|
|
new_arr (list): the updated *fixed words |
253
|
|
|
""" |
254
|
|
|
new_arr = [] |
255
|
|
|
vc_combo = r'[a-zA-Z][aeiou]{1}[qwrtypsdfghjklzxcvbnm]{1}' |
256
|
|
|
for word in words: |
257
|
|
|
if len(re.findall(vc_combo, word)) > 1: |
258
|
|
|
new_arr.append(re.sub(vc_combo, '', word, replaces)) |
259
|
|
|
else: |
260
|
|
|
new_arr.append(word) |
261
|
|
|
return new_arr |
262
|
|
|
|
263
|
|
|
|
264
|
|
|
def infixify(words): |
265
|
|
|
"""Apply a infix technique to a set of words. |
266
|
|
|
|
267
|
|
|
Adds all consonant+vowel pairs to all inner matching vowel+consonant pairs |
268
|
|
|
of a word, giving all combinations for each word. |
269
|
|
|
|
270
|
|
|
Args: |
271
|
|
|
words (list) - The list of words to operate on. |
272
|
|
|
|
273
|
|
|
Returns: |
274
|
|
|
new_arr (list): the updated *fixed words |
275
|
|
|
""" |
276
|
|
|
new_arr = [] |
277
|
|
|
vc_combo_pair = re.compile( |
278
|
|
|
r'[a-zA-Z][aeiou]{1}[qwrtypsdfghjklzxcvbnm]{1}[aeiou]' |
279
|
|
|
'{1}[qwrtypsdfghjklzxcvbnm]{1}') |
280
|
|
|
for word in words: |
281
|
|
|
matches = re.findall(vc_combo_pair, word) |
282
|
|
|
if matches: |
283
|
|
|
for match in matches: |
284
|
|
|
for infix_pair in namebot_settings.CV_TL_PAIRS: |
285
|
|
|
# Get midpoint of this string. |
286
|
|
|
mid = len(match) // 2 |
287
|
|
|
# Get the left and right substrings to join with. |
288
|
|
|
first, second = match[0:mid], match[mid:] |
289
|
|
|
# Check if the infix_pair is the same as start, or end. |
290
|
|
|
bad_matches = [ |
291
|
|
|
# Duplicates joined is bad. |
292
|
|
|
infix_pair == first, infix_pair == second, |
293
|
|
|
# Matching letters on start/end joining substrings |
294
|
|
|
# is bad. |
295
|
|
|
first[-1] == infix_pair[0], |
296
|
|
|
# Matching letters on end/start joining substrings |
297
|
|
|
# is also bad. |
298
|
|
|
first[0] == infix_pair[-1], |
299
|
|
|
] |
300
|
|
|
# Skip bad 'fusings' |
301
|
|
|
if any(bad_matches): |
302
|
|
|
continue |
303
|
|
|
replacer = '{}{}{}'.format(first, infix_pair, second) |
304
|
|
|
new_arr.append(word.replace(match, replacer)) |
305
|
|
|
else: |
306
|
|
|
new_arr.append(word) |
307
|
|
|
return new_arr |
308
|
|
|
|
309
|
|
|
|
310
|
|
|
def simulfixify(words, pairs=None, max=5): |
|
|
|
|
311
|
|
|
"""Generate simulfixed words. |
312
|
|
|
|
313
|
|
|
Args: |
314
|
|
|
words (list) - List of words to operate on. |
315
|
|
|
pairs (list, optional) - Simulfix pairs to use for each word. |
316
|
|
|
If not specified, these will be generated |
317
|
|
|
randomly as vowel + consonant strings. |
318
|
|
|
max (int, optional): The number of simulfix pairs to generate |
319
|
|
|
(if pairs is not specified.) |
320
|
|
|
|
321
|
|
|
Returns: |
322
|
|
|
results (list) - The simulfix version of each word, |
323
|
|
|
for each simulfix pair. |
324
|
|
|
""" |
325
|
|
|
results = [] |
326
|
|
|
if pairs is None: |
327
|
|
|
pairs = ['{}{}'.format(choice(_vowels), choice(_consonants)) |
328
|
|
|
for _ in range(max)] |
329
|
|
|
for word in words: |
330
|
|
|
for combo in pairs: |
331
|
|
|
mid = len(word) // 2 |
332
|
|
|
_word = '{}{}{}'.format(word[0:mid], combo, word[mid:]) |
333
|
|
|
results.append(_word) |
334
|
|
|
return results |
335
|
|
|
|
336
|
|
|
|
337
|
|
|
def palindrome(word): |
338
|
|
|
"""Create a palindrome from a word. |
339
|
|
|
|
340
|
|
|
Args: |
341
|
|
|
word (str): The word. |
342
|
|
|
|
343
|
|
|
Returns: |
344
|
|
|
str: The updated palindrome. |
345
|
|
|
|
346
|
|
|
>>> palindrome('cool') |
347
|
|
|
>>> 'coollooc' |
348
|
|
|
""" |
349
|
|
|
return '{}{}'.format(word, word[::-1]) |
350
|
|
|
|
351
|
|
|
|
352
|
|
|
def palindromes(words): |
353
|
|
|
"""Convert a list of words into their palindromic form. |
354
|
|
|
|
355
|
|
|
Args: |
356
|
|
|
words (list): The words. |
357
|
|
|
|
358
|
|
|
Returns: |
359
|
|
|
list: The list of palindromes. |
360
|
|
|
|
361
|
|
|
>>> palindrome(['cool', 'neat']) |
362
|
|
|
>>> ['coollooc', 'neattaen'] |
363
|
|
|
""" |
364
|
|
|
return [palindrome(word) for word in words] |
365
|
|
|
|
366
|
|
|
|
367
|
|
|
def make_founder_product_name(founder1, founder2, product): |
368
|
|
|
"""Get the name of two people forming a company and combine it. |
369
|
|
|
|
370
|
|
|
Args: |
371
|
|
|
founder1 (str): Your founder name 1. |
372
|
|
|
founder2 (str): Your founder name 2. |
373
|
|
|
product (str): Your product/feature/service name. |
374
|
|
|
|
375
|
|
|
Returns: |
376
|
|
|
str: The updated name. |
377
|
|
|
|
378
|
|
|
>>> make_founder_product_name('chris', 'ella', 'widgets') |
379
|
|
|
>>> 'chris & ella widgets' |
380
|
|
|
""" |
381
|
|
|
return '{} & {} {}'.format( |
382
|
|
|
founder1[0].upper(), |
383
|
|
|
founder2[0].upper(), |
384
|
|
|
product) |
385
|
|
|
|
386
|
|
|
|
387
|
|
|
def make_name_alliteration(words, divider=' '): |
388
|
|
|
"""Make an alliteration with a set of words, if applicable. |
389
|
|
|
|
390
|
|
|
Examples: |
391
|
|
|
java jacket |
392
|
|
|
singing sally |
393
|
|
|
earth engines |
394
|
|
|
...etc |
395
|
|
|
|
396
|
|
|
1. Loop through a given array of words |
397
|
|
|
2. group by words with the same first letter |
398
|
|
|
3. combine them and return to new array |
399
|
|
|
""" |
400
|
|
|
new_arr = [] |
401
|
|
|
words = sorted(words) |
402
|
|
|
|
403
|
|
|
for word1 in words: |
404
|
|
|
for word2 in words: |
405
|
|
|
if word1[:1] is word2[:1] and word1 is not word2: |
406
|
|
|
new_arr.append(word1 + divider + word2) |
407
|
|
|
return new_arr |
408
|
|
|
|
409
|
|
|
|
410
|
|
|
def make_name_abbreviation(words): |
411
|
|
|
"""Will make some kind of company acronym. |
412
|
|
|
|
413
|
|
|
eg: BASF, AT&T, A&W |
414
|
|
|
Returns a single string of the new word combined. |
415
|
|
|
""" |
416
|
|
|
return ''.join([word[:1].upper() for word in words]) |
417
|
|
|
|
418
|
|
|
|
419
|
|
|
def make_vowel(words, vowel_type, vowel_index): |
420
|
|
|
"""Primary for all Portmanteau generators. |
421
|
|
|
|
422
|
|
|
This creates the portmanteau based on :vowel_index, and :vowel_type. |
423
|
|
|
|
424
|
|
|
The algorithm works as following: |
425
|
|
|
|
426
|
|
|
It looks for the first occurrence of a specified vowel in the first word, |
427
|
|
|
then gets the matching occurrence (if any) of the second word, |
428
|
|
|
then determines which should be first or second position, based on |
429
|
|
|
the ratio of letters (for each word) divided by the position of the vowel |
430
|
|
|
in question (e.g. c[a]t (2/3) vs. cr[a]te (3/5)). |
431
|
|
|
|
432
|
|
|
The higher number is ordered first, and the two words are then fused |
433
|
|
|
together by the single matching vowel. |
434
|
|
|
""" |
435
|
|
|
new_arr = [] |
436
|
|
|
for i in words: |
437
|
|
|
for j in words: |
438
|
|
|
is_match_i = re.search(vowel_type, i) |
439
|
|
|
is_match_j = re.search(vowel_type, j) |
440
|
|
|
if i is not j and is_match_i and is_match_j: |
441
|
|
|
# get the indices and lengths to use in finding the ratio |
442
|
|
|
pos_i = i.index(vowel_index) |
443
|
|
|
len_i = len(i) |
444
|
|
|
pos_j = j.index(vowel_index) |
445
|
|
|
len_j = len(j) |
446
|
|
|
|
447
|
|
|
# If starting index is 0, |
448
|
|
|
# add 1 to it so we're not dividing by zero |
449
|
|
|
if pos_i is 0: |
450
|
|
|
pos_i = 1 |
451
|
|
|
if pos_j is 0: |
452
|
|
|
pos_j = 1 |
453
|
|
|
|
454
|
|
|
# Decide which word should be the |
455
|
|
|
# prefix and which should be suffix |
456
|
|
|
if round(pos_i / len_i) > round(pos_j / len_j): |
457
|
|
|
p = i[0: pos_i + 1] |
458
|
|
|
p2 = j[pos_j: len(j)] |
459
|
|
|
if len(p) + len(p2) > 2: |
460
|
|
|
if re.search( |
461
|
|
|
_regexes['all_vowels'], p) or re.search( |
462
|
|
|
_regexes['all_vowels'], p2): |
463
|
|
|
if p[-1] is p2[0]: |
|
|
|
|
464
|
|
|
new_arr.append(p[:-1] + p2) |
|
|
|
|
465
|
|
|
else: |
|
|
|
|
466
|
|
|
new_arr.append(p + p2) |
|
|
|
|
467
|
|
|
return new_arr |
468
|
|
|
|
469
|
|
|
|
470
|
|
|
def make_portmanteau_default_vowel(words): |
471
|
|
|
"""Make a portmanteau based on vowel matches. |
472
|
|
|
|
473
|
|
|
E.g. (ala Brad+Angelina = Brangelina) |
474
|
|
|
Only matches for second to last letter |
475
|
|
|
in first word and matching vowel in second word. |
476
|
|
|
|
477
|
|
|
This defers to the make_vowel function for all the internal |
478
|
|
|
magic, but is a helper in that it provides all types of vowel |
479
|
|
|
combinations in one function. |
480
|
|
|
""" |
481
|
|
|
new_arr = [] |
482
|
|
|
vowel_a_re = re.compile(r'a{1}') |
483
|
|
|
vowel_e_re = re.compile(r'e{1}') |
484
|
|
|
vowel_i_re = re.compile(r'i{1}') |
485
|
|
|
vowel_o_re = re.compile(r'o{1}') |
486
|
|
|
vowel_u_re = re.compile(r'u{1}') |
487
|
|
|
|
488
|
|
|
new_arr += make_vowel(words, vowel_a_re, 'a') |
489
|
|
|
new_arr += make_vowel(words, vowel_e_re, 'e') |
490
|
|
|
new_arr += make_vowel(words, vowel_i_re, 'i') |
491
|
|
|
new_arr += make_vowel(words, vowel_o_re, 'o') |
492
|
|
|
new_arr += make_vowel(words, vowel_u_re, 'u') |
493
|
|
|
return new_arr |
494
|
|
|
|
495
|
|
|
|
496
|
|
|
def make_portmanteau_split(words): |
497
|
|
|
"""Make a portmeanteau, split by vowel/consonant combos. |
498
|
|
|
|
499
|
|
|
Based on the word formation of nikon: [ni]pp[on] go[k]aku, |
500
|
|
|
which is comprised of Nippon + Gokaku. |
501
|
|
|
|
502
|
|
|
We get the first C+V in the first word, |
503
|
|
|
then last V+C in the first word, |
504
|
|
|
then all C in the second word. |
505
|
|
|
""" |
506
|
|
|
new_arr = [] |
507
|
|
|
for i in words: |
508
|
|
|
for j in words: |
509
|
|
|
if i is not j: |
|
|
|
|
510
|
|
|
l1 = re.search(r'[^a|e|i|o|u{1}]+[a|e|i|o|u{1}]', i) |
|
|
|
|
511
|
|
|
l2 = re.search(r'[a|e|i|o|u{1}]+[^a|e|i|o|u{1}]$', j) |
|
|
|
|
512
|
|
|
if i and l1 and l2: |
|
|
|
|
513
|
|
|
# Third letter used for |
514
|
|
|
# consonant middle splits only |
515
|
|
|
l3 = re.split(r'[a|e|i|o|u{1}]', i) |
|
|
|
|
516
|
|
|
l1 = l1.group(0) |
|
|
|
|
517
|
|
|
l2 = l2.group(0) |
|
|
|
|
518
|
|
|
if l3 and len(l3) > 0: |
|
|
|
|
519
|
|
|
for v in l3: |
|
|
|
|
520
|
|
|
new_arr.append(l1 + v + l2) |
|
|
|
|
521
|
|
|
else: |
|
|
|
|
522
|
|
|
new_arr.append('{}{}{}'.format(l1, 't', l2)) |
|
|
|
|
523
|
|
|
new_arr.append('{}{}{}'.format(l1, 's', l2)) |
|
|
|
|
524
|
|
|
new_arr.append('{}{}{}'.format(l1, 'z', l2)) |
|
|
|
|
525
|
|
|
new_arr.append('{}{}{}'.format(l1, 'x', l2)) |
|
|
|
|
526
|
|
|
return new_arr |
527
|
|
|
|
528
|
|
|
|
529
|
|
|
def make_punctuator(words, replace): |
530
|
|
|
"""Put some hyphens or dots, or a given punctutation. |
531
|
|
|
|
532
|
|
|
Works via :replace in the word, but only around vowels ala "del.ic.ious" |
533
|
|
|
""" |
534
|
|
|
def _replace(words, replace, replace_type='.'): |
|
|
|
|
535
|
|
|
return [word.replace( |
536
|
|
|
replace, replace + replace_type) for word in words] |
537
|
|
|
|
538
|
|
|
hyphens = _replace(words, replace, replace_type='-') |
539
|
|
|
periods = _replace(words, replace) |
540
|
|
|
return hyphens + periods |
541
|
|
|
|
542
|
|
|
|
543
|
|
|
def make_punctuator_vowels(words): |
544
|
|
|
"""Helper function that combines all possible combinations for vowels.""" |
545
|
|
|
new_words = [] |
546
|
|
|
new_words += make_punctuator(words, 'a') |
547
|
|
|
new_words += make_punctuator(words, 'e') |
548
|
|
|
new_words += make_punctuator(words, 'i') |
549
|
|
|
new_words += make_punctuator(words, 'o') |
550
|
|
|
new_words += make_punctuator(words, 'u') |
551
|
|
|
return new_words |
552
|
|
|
|
553
|
|
|
|
554
|
|
|
def make_vowelify(words): |
555
|
|
|
"""Chop off consonant ala nautica if second to last letter is a vowel.""" |
556
|
|
|
new_arr = [] |
557
|
|
|
for word in words: |
558
|
|
|
if re.search(_regexes['all_vowels'], word[:-2]): |
559
|
|
|
new_arr.append(word[:-1]) |
560
|
|
|
return new_arr |
561
|
|
|
|
562
|
|
|
|
563
|
|
|
def make_misspelling(words): |
564
|
|
|
"""Misspell a word in numerous ways, to create interesting results.""" |
565
|
|
|
token_groups = ( |
566
|
|
|
('ics', 'ix'), |
567
|
|
|
('ph', 'f'), |
568
|
|
|
('kew', 'cue'), |
569
|
|
|
('f', 'ph'), |
570
|
|
|
('o', 'ough'), |
571
|
|
|
# these seem to have |
572
|
|
|
# sucked in practice |
573
|
|
|
('o', 'off'), |
574
|
|
|
('ow', 'o'), |
575
|
|
|
('x', 'ecks'), |
576
|
|
|
('za', 'xa'), |
577
|
|
|
('xa', 'za'), |
578
|
|
|
('ze', 'xe'), |
579
|
|
|
('xe', 'ze'), |
580
|
|
|
('zi', 'xi'), |
581
|
|
|
('xi', 'zi'), |
582
|
|
|
('zo', 'xo'), |
583
|
|
|
('xo', 'zo'), |
584
|
|
|
('zu', 'xu'), |
585
|
|
|
('xu', 'zu'), |
586
|
|
|
# number based |
587
|
|
|
('one', '1'), |
588
|
|
|
('1', 'one'), |
589
|
|
|
('two', '2'), |
590
|
|
|
('2', 'two'), |
591
|
|
|
('three', '3'), |
592
|
|
|
('3', 'three'), |
593
|
|
|
('four', '4'), |
594
|
|
|
('4', 'four'), |
595
|
|
|
('five', '5'), |
596
|
|
|
('5', 'five'), |
597
|
|
|
('six', '6'), |
598
|
|
|
('6', 'six'), |
599
|
|
|
('seven', '7'), |
600
|
|
|
('7', 'seven'), |
601
|
|
|
('eight', '8'), |
602
|
|
|
('8', 'eight'), |
603
|
|
|
('nine', '9'), |
604
|
|
|
('9', 'nine'), |
605
|
|
|
('ten', '10'), |
606
|
|
|
('10', 'ten'), |
607
|
|
|
('ecks', 'x'), |
608
|
|
|
('spir', 'speer'), |
609
|
|
|
('speer', 'spir'), |
610
|
|
|
('x', 'ex'), |
611
|
|
|
('on', 'awn'), |
612
|
|
|
('ow', 'owoo'), |
613
|
|
|
('awn', 'on'), |
614
|
|
|
('awf', 'off'), |
615
|
|
|
('s', 'z'), |
616
|
|
|
('ce', 'ze'), |
617
|
|
|
('ss', 'zz'), |
618
|
|
|
('ku', 'koo'), |
619
|
|
|
('trate', 'trait'), |
620
|
|
|
('trait', 'trate'), |
621
|
|
|
('ance', 'anz'), |
622
|
|
|
('il', 'yll'), |
623
|
|
|
('ice', 'ize'), |
624
|
|
|
('chr', 'kr'), |
625
|
|
|
# These should only be at end of word! |
626
|
|
|
('er', 'r'), |
627
|
|
|
('lee', 'ly'), |
628
|
|
|
) |
629
|
|
|
new_arr = [] |
630
|
|
|
for word in words: |
631
|
|
|
for tokens in token_groups: |
632
|
|
|
new_arr.append(word.replace(*tokens)) |
633
|
|
|
return normalization.uniquify(new_arr) |
634
|
|
|
|
635
|
|
|
|
636
|
|
|
def _pig_latinize(word, postfix='ay'): |
637
|
|
|
"""Generate standard pig latin style, with optional postfix argument.""" |
638
|
|
|
# Common postfixes: ['ay', 'yay', 'way'] |
639
|
|
|
if not type(postfix) is str: |
640
|
|
|
raise TypeError('Must use a string for postfix.') |
641
|
|
|
|
642
|
|
|
piggified = None |
643
|
|
|
|
644
|
|
|
vowel_re = re.compile(r'(a|e|i|o|u)') |
645
|
|
|
first_letter = word[0:1] |
646
|
|
|
|
647
|
|
|
# clean up non letters |
648
|
|
|
word = word.replace(r'[^a-zA-Z]', '') |
649
|
|
|
|
650
|
|
|
if vowel_re.match(first_letter): |
651
|
|
|
piggified = word + 'way' |
652
|
|
|
else: |
653
|
|
|
piggified = ''.join([word[1: len(word)], first_letter, postfix]) |
654
|
|
|
return piggified |
655
|
|
|
|
656
|
|
|
|
657
|
|
|
def pig_latinize(words, postfix='ay'): |
658
|
|
|
"""Pig latinize a set of words. |
659
|
|
|
|
660
|
|
|
Args: |
661
|
|
|
words (list): A list of words. |
662
|
|
|
postfix (str, optional): A postfix to use. Default is `ay`. |
663
|
|
|
|
664
|
|
|
Returns: |
665
|
|
|
words (list): The updated list. |
666
|
|
|
|
667
|
|
|
""" |
668
|
|
|
return [_pig_latinize(word, postfix=postfix) for word in words] |
669
|
|
|
|
670
|
|
|
|
671
|
|
|
def acronym_lastname(description, lastname): |
672
|
|
|
"""Create an acronym plus the last name. |
673
|
|
|
|
674
|
|
|
Inspiration: ALFA Romeo. |
675
|
|
|
""" |
676
|
|
|
desc = ''.join([word[0].upper() for word |
677
|
|
|
in normalization.remove_stop_words(description.split(' '))]) |
678
|
|
|
return '{} {}'.format(desc, lastname) |
679
|
|
|
|
680
|
|
|
|
681
|
|
|
def get_descriptors(words): |
682
|
|
|
"""Group words by their NLTK part-of-speech descriptors. |
683
|
|
|
|
684
|
|
|
Use NLTK to first grab tokens by looping through words, |
685
|
|
|
then tag part-of-speech (in isolation) |
686
|
|
|
and provide a dictionary with a list of each type |
687
|
|
|
for later retrieval and usage. |
688
|
|
|
""" |
689
|
|
|
descriptors = defaultdict(list) |
690
|
|
|
tokens = nltk.word_tokenize(' '.join(words)) |
691
|
|
|
parts = nltk.pos_tag(tokens) |
692
|
|
|
# Then, push the word into the matching type |
693
|
|
|
for part in parts: |
694
|
|
|
descriptors[part[1]].append(part[0]) |
695
|
|
|
return descriptors |
696
|
|
|
|
697
|
|
|
|
698
|
|
|
def _add_pos_subtypes(nouns, verbs): |
699
|
|
|
"""Combine alternating verbs and nouns into a new list. |
700
|
|
|
|
701
|
|
|
Args: |
702
|
|
|
nouns (list) - List of nouns, noun phrases, etc... |
703
|
|
|
verbs (list) - List of verbs, verb phrases, etc... |
704
|
|
|
|
705
|
|
|
Returns: |
706
|
|
|
words (list) - The newly combined list |
707
|
|
|
""" |
708
|
|
|
words = [] |
709
|
|
|
try: |
710
|
|
|
for noun in nouns: |
711
|
|
|
for verb in verbs: |
712
|
|
|
words.append('{} {}'.format(noun, verb)) |
713
|
|
|
words.append('{} {}'.format(verb, noun)) |
714
|
|
|
except KeyError: |
|
|
|
|
715
|
|
|
pass |
716
|
|
|
return words |
717
|
|
|
|
718
|
|
|
|
719
|
|
|
def _create_pos_subtypes(words): |
720
|
|
|
"""Check part-of-speech tags for a noun-phrase, adding combinations if so. |
721
|
|
|
|
722
|
|
|
If it exists, add combinations with noun-phrase + verb-phrase, |
723
|
|
|
noun-phrase + verb, and noun-phrase + adverb, |
724
|
|
|
for each pos type that exists. |
725
|
|
|
|
726
|
|
|
Args: |
727
|
|
|
words (list) - List of verbs, verb phrases, etc... |
728
|
|
|
|
729
|
|
|
Returns: |
730
|
|
|
new_words (list) - The newly combined list |
731
|
|
|
""" |
732
|
|
|
new_words = [] |
733
|
|
|
types = words.keys() |
734
|
|
|
if 'NNP' in types: |
735
|
|
|
if 'VBP' in types: |
736
|
|
|
new_words += _add_pos_subtypes(words['NNP'], words['VBP']) |
737
|
|
|
if 'VB' in types: |
738
|
|
|
new_words += _add_pos_subtypes(words['NNP'], words['VB']) |
739
|
|
|
if 'RB' in types: |
740
|
|
|
new_words += _add_pos_subtypes(words['NNP'], words['RB']) |
741
|
|
|
return new_words |
742
|
|
|
|
743
|
|
|
|
744
|
|
|
def make_descriptors(words): |
745
|
|
|
"""Make descriptor names. |
746
|
|
|
|
747
|
|
|
Based from a verb + noun, adjective + noun combination. |
748
|
|
|
Examples: |
749
|
|
|
-Pop Cap, |
750
|
|
|
-Big Fish, |
751
|
|
|
-Red Fin, |
752
|
|
|
-Cold Water (grill), etc... |
753
|
|
|
Combines VBP/VB/RB, with NN/NNS |
754
|
|
|
""" |
755
|
|
|
return list(set(_create_pos_subtypes(words))) |
756
|
|
|
|
757
|
|
|
|
758
|
|
|
def all_prefix_first_vowel(word, letters=list(ascii_uppercase)): |
|
|
|
|
759
|
|
|
"""Find the first vowel in a word and prefixes with consonants. |
760
|
|
|
|
761
|
|
|
Args: |
762
|
|
|
word (str) - the word to update |
763
|
|
|
letters (list) - the letters to use for prefixing. |
764
|
|
|
|
765
|
|
|
Returns: |
766
|
|
|
words (list) - All prefixed words |
767
|
|
|
|
768
|
|
|
""" |
769
|
|
|
re_vowels = re.compile(r'[aeiouy]') |
770
|
|
|
matches = re.search(re_vowels, word) |
771
|
|
|
if matches is None: |
772
|
|
|
return [word] |
773
|
|
|
words = [] |
774
|
|
|
vowels = ['A', 'E', 'I', 'O', 'U'] |
775
|
|
|
first_match = matches.start(0) |
776
|
|
|
for letter in letters: |
777
|
|
|
if letter not in vowels: |
778
|
|
|
# If beginning letter is a vowel, don't offset the index |
779
|
|
|
if first_match == 0: |
780
|
|
|
words.append('{}{}'.format(letter, word)) |
781
|
|
|
else: |
782
|
|
|
words.append('{}{}'.format(letter, word[first_match:])) |
783
|
|
|
return words |
784
|
|
|
|
785
|
|
|
|
786
|
|
|
def recycle(words, func, times=2): |
787
|
|
|
"""Run a set of words applied to a function repeatedly. |
788
|
|
|
|
789
|
|
|
It will re-run with the last output as the new input. |
790
|
|
|
`words` must be a list, and `func` must return a list. |
791
|
|
|
""" |
792
|
|
|
if times > 0: |
793
|
|
|
return recycle(func(words), func, times - 1) |
794
|
|
|
return words |
795
|
|
|
|
796
|
|
|
|
797
|
|
|
def backronym(acronym, theme, max_attempts=10): |
798
|
|
|
"""Attempt to generate a backronym based on a given acronym and theme. |
799
|
|
|
|
800
|
|
|
Args: |
801
|
|
|
acronym (str): The starting acronym. |
802
|
|
|
theme (str): The seed word to base other words off of. |
803
|
|
|
max_attempts (int, optional): The number of attempts before failing. |
804
|
|
|
|
805
|
|
|
Returns: |
806
|
|
|
dict: The result dictionary. If a backronym was successfully generated, |
807
|
|
|
the `success` key will be True, otherwise False. |
808
|
|
|
""" |
809
|
|
|
ret = { |
810
|
|
|
'acronym': '.'.join(list(acronym)).upper(), |
811
|
|
|
'backronym': '', |
812
|
|
|
'words': [], |
813
|
|
|
'success_ratio': 0.0, |
814
|
|
|
'success': False |
815
|
|
|
} |
816
|
|
|
if not acronym or not theme: |
817
|
|
|
return ret |
818
|
|
|
all_words = set() |
819
|
|
|
words = nlp._get_synset_words(theme) |
|
|
|
|
820
|
|
|
_backronym = [] |
821
|
|
|
acronym = acronym.lower() |
822
|
|
|
# Add words if they contain the same first letter |
823
|
|
|
# as any in the given acronym. |
824
|
|
|
cur_step = 0 |
825
|
|
|
while len(_backronym) < len(acronym) or cur_step < max_attempts: |
826
|
|
|
all_words.update(words) |
827
|
|
|
for word in words: |
828
|
|
|
if word[0].lower() in acronym: |
829
|
|
|
if '_' in word: |
830
|
|
|
# Don't add multi-word strings, but don't leave it blank. |
831
|
|
|
_backronym.append(word[0]) |
832
|
|
|
else: |
833
|
|
|
_backronym.append(word) |
834
|
|
|
sdict = {} |
835
|
|
|
# Sort the word in order of the acronyms |
836
|
|
|
# letters by re-arranging indices. |
837
|
|
|
for word in _backronym: |
838
|
|
|
try: |
839
|
|
|
index = acronym.index(word[0].lower()) |
840
|
|
|
sdict[index] = word |
841
|
|
|
except IndexError: |
842
|
|
|
continue |
843
|
|
|
cur_step += 1 |
844
|
|
|
# Refresh words for next attempt. |
845
|
|
|
words = nlp._get_synset_words(theme) |
|
|
|
|
846
|
|
|
# Try again if no words existed. |
847
|
|
|
if not words: |
848
|
|
|
continue |
849
|
|
|
# Get new theme, similar to originating theme. |
850
|
|
|
theme = words[0] |
851
|
|
|
vals = sdict.values() |
852
|
|
|
ret.update({ |
853
|
|
|
'backronym': ' '.join(vals).upper(), |
854
|
|
|
'words': vals, |
855
|
|
|
'success_ratio': float(len(vals)) / float(len(acronym)), |
856
|
|
|
'success': len(vals) == len(acronym) |
857
|
|
|
}) |
858
|
|
|
return ret |
859
|
|
|
|
860
|
|
|
|
861
|
|
|
def super_scrub(data): |
862
|
|
|
"""Run words through a comprehensive list of filtering functions. |
863
|
|
|
|
864
|
|
|
Expects a dictionary with key "words" |
865
|
|
|
""" |
866
|
|
|
for technique in data['words']: |
867
|
|
|
data['words'][technique] = normalization.uniquify( |
868
|
|
|
normalization.remove_odd_sounding_words( |
869
|
|
|
normalization.clean_sort( |
870
|
|
|
data['words'][technique]))) |
871
|
|
|
return data |
872
|
|
|
|
873
|
|
|
|
874
|
|
|
def generate_all_techniques(words): |
875
|
|
|
"""Generate all techniques across the library in one place.""" |
876
|
|
|
data = { |
877
|
|
|
'words': { |
|
|
|
|
878
|
|
|
'alliterations': make_name_alliteration(words), |
879
|
|
|
'alliterations': make_name_alliteration(words), |
880
|
|
|
'portmanteau': make_portmanteau_default_vowel(words), |
881
|
|
|
'vowels': make_vowelify(words), |
882
|
|
|
'suffix': suffixify(words), |
883
|
|
|
'prefix': prefixify(words), |
884
|
|
|
'duplifix': duplifixify(words), |
885
|
|
|
'disfix': disfixify(words), |
886
|
|
|
'infix': infixify(words), |
887
|
|
|
'simulfix': simulfixify(words), |
888
|
|
|
'founder_product_name': make_founder_product_name( |
889
|
|
|
'Lindsey', 'Chris', 'Widgets'), |
890
|
|
|
'punctuator': make_punctuator_vowels(words), |
891
|
|
|
'name_abbreviation': make_name_abbreviation(words), |
892
|
|
|
'make_portmanteau_split': make_portmanteau_split(words), |
893
|
|
|
'forkerism': forkerism(words), |
894
|
|
|
'kniferism': kniferism(words), |
895
|
|
|
'spoonerism': spoonerism(words), |
896
|
|
|
'palindrome': palindromes(words), |
897
|
|
|
'reduplication_ablaut': reduplication_ablaut(words), |
898
|
|
|
'misspelling': make_misspelling(words), |
899
|
|
|
'descriptors': make_descriptors( |
900
|
|
|
get_descriptors(words)) |
901
|
|
|
} |
902
|
|
|
} |
903
|
|
|
return super_scrub(data) |
904
|
|
|
|
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.