|
1
|
|
|
"""Primary techniques for the core functionality of namebot.""" |
|
2
|
|
|
|
|
3
|
|
|
from __future__ import absolute_import |
|
4
|
|
|
from __future__ import division |
|
5
|
|
|
|
|
6
|
|
|
import re |
|
7
|
|
|
from collections import defaultdict |
|
8
|
|
|
from random import choice |
|
9
|
|
|
from string import ascii_uppercase |
|
10
|
|
|
|
|
11
|
|
|
import nltk |
|
|
|
|
|
|
12
|
|
|
|
|
13
|
|
|
from . import nlp |
|
14
|
|
|
from . import normalization |
|
15
|
|
|
from . import settings as namebot_settings |
|
16
|
|
|
|
|
17
|
|
|
|
|
18
|
|
|
_prefixes = namebot_settings.PREFIXES |
|
19
|
|
|
_suffixes = namebot_settings.SUFFIXES |
|
20
|
|
|
_alphabet = namebot_settings.ALPHABET |
|
21
|
|
|
_consonants = namebot_settings.CONSONANTS |
|
22
|
|
|
_vowels = namebot_settings.VOWELS |
|
23
|
|
|
_regexes = namebot_settings.regexes |
|
24
|
|
|
|
|
25
|
|
|
|
|
26
|
|
|
def domainify(words, tld='com'): |
|
27
|
|
|
"""Convert words into a domain format for testing domains. |
|
28
|
|
|
|
|
29
|
|
|
Args: |
|
30
|
|
|
words (list): List of words |
|
31
|
|
|
tld (str, optional): The TLD (top-level domain) to use. |
|
32
|
|
|
|
|
33
|
|
|
Returns: |
|
34
|
|
|
list: The modified list of words. |
|
35
|
|
|
""" |
|
36
|
|
|
_words = [] |
|
37
|
|
|
if tld.startswith('.'): |
|
38
|
|
|
tld = tld.replace('.', '') |
|
39
|
|
|
for word in words: |
|
40
|
|
|
if word.endswith(tld) and tld != '': |
|
41
|
|
|
word = word.replace(tld, '.{}'.format(tld)) |
|
42
|
|
|
_words.append(word) |
|
43
|
|
|
return _words |
|
44
|
|
|
|
|
45
|
|
|
|
|
46
|
|
|
def spoonerism(words): |
|
47
|
|
|
"""Convert a list of words formatted with the spoonerism technique. |
|
48
|
|
|
|
|
49
|
|
|
Args: |
|
50
|
|
|
words (list) - The list of words to operate on |
|
51
|
|
|
|
|
52
|
|
|
Returns: |
|
53
|
|
|
words (list) - The updated list of words |
|
54
|
|
|
|
|
55
|
|
|
>>> spoonerism(['foo', 'bar']) |
|
56
|
|
|
>>> ['boo', 'far'] |
|
57
|
|
|
""" |
|
58
|
|
|
"First: [f]oo [b]ar => boo far" |
|
|
|
|
|
|
59
|
|
|
new_words = [] |
|
60
|
|
|
if len(words) < 2: |
|
61
|
|
|
raise ValueError('Need more than one word to combine') |
|
62
|
|
|
for k, word in enumerate(words): |
|
63
|
|
|
try: |
|
64
|
|
|
new_words.append('{}{} {}{}'.format( |
|
65
|
|
|
words[k + 1][0], # 2nd word, 1st letter |
|
66
|
|
|
word[1:], # 1st word, 2nd letter to end |
|
67
|
|
|
word[0], # 1st word, 1st letter |
|
68
|
|
|
words[k + 1][1:])) # 2nd word, 2nd letter to end |
|
69
|
|
|
except IndexError: |
|
70
|
|
|
continue |
|
71
|
|
|
return new_words |
|
72
|
|
|
|
|
73
|
|
|
|
|
74
|
|
|
def kniferism(words): |
|
75
|
|
|
"""Convert a list of words formatted with the kniferism technique. |
|
76
|
|
|
|
|
77
|
|
|
Args: |
|
78
|
|
|
words (list) - The list of words to operate on |
|
79
|
|
|
|
|
80
|
|
|
Returns: |
|
81
|
|
|
words (list) - The updated list of words |
|
82
|
|
|
|
|
83
|
|
|
>>> kniferism(['foo', 'bar']) |
|
84
|
|
|
>>> ['fao', 'bor'] |
|
85
|
|
|
""" |
|
86
|
|
|
"Mid: f[o]o b[a]r => fao bor" |
|
|
|
|
|
|
87
|
|
|
if len(words) < 2: |
|
88
|
|
|
raise ValueError('Need more than one word to combine') |
|
89
|
|
|
new_words = [] |
|
90
|
|
|
for k, word in enumerate(words): |
|
91
|
|
|
try: |
|
92
|
|
|
middle_second = int(len(words[k + 1]) / 2) |
|
93
|
|
|
middle_first = int(len(word) / 2) |
|
94
|
|
|
new_words.append('{}{}{} {}{}{}'.format( |
|
95
|
|
|
word[:middle_first], |
|
96
|
|
|
words[k + 1][middle_second], |
|
97
|
|
|
word[middle_first + 1:], |
|
98
|
|
|
words[k + 1][:middle_second], |
|
99
|
|
|
word[middle_first], |
|
100
|
|
|
words[k + 1][middle_second + 1:])) |
|
101
|
|
|
except IndexError: |
|
102
|
|
|
continue |
|
103
|
|
|
return new_words |
|
104
|
|
|
|
|
105
|
|
|
|
|
106
|
|
|
def forkerism(words): |
|
107
|
|
|
"""Convert a list of words formatted with the forkerism technique. |
|
108
|
|
|
|
|
109
|
|
|
Args: |
|
110
|
|
|
words (list) - The list of words to operate on |
|
111
|
|
|
|
|
112
|
|
|
Returns: |
|
113
|
|
|
words (list) - The updated list of words |
|
114
|
|
|
|
|
115
|
|
|
>>> forkerism(['foo', 'bar']) |
|
116
|
|
|
>>> ['for', 'bao'] |
|
117
|
|
|
""" |
|
118
|
|
|
"Last: fo[o] ba[r] => for bao" |
|
|
|
|
|
|
119
|
|
|
if len(words) < 2: |
|
120
|
|
|
raise ValueError('Need more than one word to combine') |
|
121
|
|
|
new_words = [] |
|
122
|
|
|
for k, word in enumerate(words): |
|
123
|
|
|
try: |
|
124
|
|
|
s_word = words[k + 1] |
|
125
|
|
|
s_word_len = len(s_word) |
|
126
|
|
|
f_word_len = len(word) |
|
127
|
|
|
f_w_last_letter = word[f_word_len - 1] |
|
128
|
|
|
s_w_last_letter = words[k + 1][s_word_len - 1] |
|
129
|
|
|
new_words.append('{}{} {}{}'.format( |
|
130
|
|
|
word[:f_word_len - 1], # 1st word, 1st letter to last - 1 |
|
131
|
|
|
s_w_last_letter, # 2nd word, last letter |
|
132
|
|
|
s_word[:s_word_len - 1], # 2nd word, 1st letter to last - 1 |
|
133
|
|
|
f_w_last_letter)) # 1st word, last letter |
|
134
|
|
|
except IndexError: |
|
135
|
|
|
continue |
|
136
|
|
|
return new_words |
|
137
|
|
|
|
|
138
|
|
|
|
|
139
|
|
|
def reduplication_ablaut(words, count=1, random=True, vowel='e'): |
|
140
|
|
|
"""A technique to combine words and altering the vowels. |
|
141
|
|
|
|
|
142
|
|
|
e.g ch[i]t-ch[a]t, d[i]lly, d[a]lly. |
|
143
|
|
|
See http://phrases.org.uk/meanings/reduplication.html. |
|
144
|
|
|
""" |
|
145
|
|
|
if len(words) < 2: |
|
146
|
|
|
raise ValueError('Need more than one word to combine') |
|
147
|
|
|
new_words = [] |
|
148
|
|
|
substitution = choice(_vowels) if random else vowel |
|
149
|
|
|
for word in words: |
|
150
|
|
|
second = re.sub(r'a|e|i|o|u', substitution, word, count=count) |
|
151
|
|
|
# Only append if the first and second are different. |
|
152
|
|
|
if word != second: |
|
153
|
|
|
new_words.append('{} {}'.format(word, second)) |
|
154
|
|
|
return new_words |
|
155
|
|
|
|
|
156
|
|
|
|
|
157
|
|
|
def prefixify(words): |
|
158
|
|
|
"""Apply a prefix technique to a set of words. |
|
159
|
|
|
|
|
160
|
|
|
Args: |
|
161
|
|
|
words (list) - The list of words to operate on. |
|
162
|
|
|
|
|
163
|
|
|
Returns: |
|
164
|
|
|
new_arr (list): the updated *fixed words |
|
165
|
|
|
""" |
|
166
|
|
|
new_arr = [] |
|
167
|
|
|
for word in words: |
|
168
|
|
|
if not word: |
|
169
|
|
|
continue |
|
170
|
|
|
for prefix in _prefixes: |
|
171
|
|
|
first_prefix_no_vowel = re.search( |
|
172
|
|
|
_regexes['no_vowels'], word[0]) |
|
173
|
|
|
second_prefix_no_vowel = re.search( |
|
174
|
|
|
_regexes['no_vowels'], prefix[0]) |
|
175
|
|
|
if first_prefix_no_vowel or second_prefix_no_vowel: |
|
176
|
|
|
# if there's a vowel at the end of |
|
177
|
|
|
# prefix but not at the beginning |
|
178
|
|
|
# of the word (or vice versa) |
|
179
|
|
|
vowel_beginning = re.search(r'a|e|i|o|u', prefix[-1:]) |
|
180
|
|
|
vowel_end = re.search(r'^a|e|i|o|u', word[:1]) |
|
181
|
|
|
if vowel_beginning or vowel_end: |
|
182
|
|
|
new_arr.append('{}{}'.format(prefix, word)) |
|
183
|
|
|
return new_arr |
|
184
|
|
|
|
|
185
|
|
|
|
|
186
|
|
|
def suffixify(words): |
|
187
|
|
|
"""Apply a suffix technique to a set of words. |
|
188
|
|
|
|
|
189
|
|
|
Args: |
|
190
|
|
|
words (list) - The list of words to operate on. |
|
191
|
|
|
(e.g -> chard + ard = chardard -> chard) |
|
192
|
|
|
|
|
193
|
|
|
Returns: |
|
194
|
|
|
new_arr (list): the updated *fixed words |
|
195
|
|
|
""" |
|
196
|
|
|
new_arr = [] |
|
197
|
|
|
for word in words: |
|
198
|
|
|
if not word: |
|
199
|
|
|
continue |
|
200
|
|
|
for suffix in _suffixes: |
|
201
|
|
|
prefix_start_vowel = re.search(_regexes['all_vowels'], word[0]) |
|
202
|
|
|
suffix_start_vowel = re.search(_regexes['all_vowels'], suffix[0]) |
|
203
|
|
|
if prefix_start_vowel or suffix_start_vowel: |
|
204
|
|
|
if suffix is 'ify': |
|
205
|
|
|
if word[-1] is 'e': |
|
206
|
|
|
if word[-2] is not 'i': |
|
207
|
|
|
new_arr.append('{}{}'.format(word[:-2], suffix)) |
|
208
|
|
|
else: |
|
209
|
|
|
new_arr.append('{}{}'.format(word[:-1], suffix)) |
|
210
|
|
|
new_arr.append(word + suffix) |
|
211
|
|
|
else: |
|
212
|
|
|
new_arr.append(word + suffix) |
|
213
|
|
|
return new_arr |
|
214
|
|
|
|
|
215
|
|
|
|
|
216
|
|
|
def duplifixify(words): |
|
217
|
|
|
"""Apply a duplifix technique to a set of words (e.g: teeny weeny, etc...). |
|
218
|
|
|
|
|
219
|
|
|
Args: |
|
220
|
|
|
words (list) - The list of words to operate on. |
|
221
|
|
|
|
|
222
|
|
|
Returns: |
|
223
|
|
|
new_arr (list): the updated *fixed words |
|
224
|
|
|
""" |
|
225
|
|
|
new_arr = [] |
|
226
|
|
|
for word in words: |
|
227
|
|
|
if not word: |
|
228
|
|
|
continue |
|
229
|
|
|
for letter in _alphabet: |
|
230
|
|
|
# check if the first letter is NOT the same as the second letter, |
|
231
|
|
|
# or the combined word is not a duplicate of the first. |
|
232
|
|
|
duplicate_word = '{}{}'.format(letter, word[1:]) == word |
|
233
|
|
|
if word[0] is not letter and not duplicate_word: |
|
234
|
|
|
new_arr.append('{} {}{}'.format(word, letter, word[1:])) |
|
235
|
|
|
return new_arr |
|
236
|
|
|
|
|
237
|
|
|
|
|
238
|
|
|
def disfixify(words, replaces=1): |
|
239
|
|
|
"""Apply a disfix technique to a set of words. |
|
240
|
|
|
|
|
241
|
|
|
Disfixing is done by removing the first set of vowel-consonant pairs. |
|
242
|
|
|
|
|
243
|
|
|
Args: |
|
244
|
|
|
words (list) - The list of words to operate on. |
|
245
|
|
|
replaces (int, optional): Number of replacements |
|
246
|
|
|
to make on this string. |
|
247
|
|
|
|
|
248
|
|
|
Returns: |
|
249
|
|
|
new_arr (list): the updated *fixed words |
|
250
|
|
|
""" |
|
251
|
|
|
new_arr = [] |
|
252
|
|
|
vc_combo = r'[a-zA-Z][aeiou]{1}[qwrtypsdfghjklzxcvbnm]{1}' |
|
253
|
|
|
for word in words: |
|
254
|
|
|
if len(re.findall(vc_combo, word)) > 1: |
|
255
|
|
|
new_arr.append(re.sub(vc_combo, '', word, replaces)) |
|
256
|
|
|
else: |
|
257
|
|
|
new_arr.append(word) |
|
258
|
|
|
return new_arr |
|
259
|
|
|
|
|
260
|
|
|
|
|
261
|
|
|
def infixify(words): |
|
262
|
|
|
"""Apply a infix technique to a set of words. |
|
263
|
|
|
|
|
264
|
|
|
Adds all consonant+vowel pairs to all inner matching vowel+consonant pairs |
|
265
|
|
|
of a word, giving all combinations for each word. |
|
266
|
|
|
|
|
267
|
|
|
Args: |
|
268
|
|
|
words (list) - The list of words to operate on. |
|
269
|
|
|
|
|
270
|
|
|
Returns: |
|
271
|
|
|
new_arr (list): the updated *fixed words |
|
272
|
|
|
""" |
|
273
|
|
|
new_arr = [] |
|
274
|
|
|
vc_combo_pair = re.compile( |
|
275
|
|
|
r'[a-zA-Z][aeiou]{1}[qwrtypsdfghjklzxcvbnm]{1}[aeiou]' |
|
276
|
|
|
'{1}[qwrtypsdfghjklzxcvbnm]{1}') |
|
277
|
|
|
for word in words: |
|
278
|
|
|
matches = re.findall(vc_combo_pair, word) |
|
279
|
|
|
if matches: |
|
280
|
|
|
for match in matches: |
|
281
|
|
|
for infix_pair in namebot_settings.CV_TL_PAIRS: |
|
282
|
|
|
# Get midpoint of this string. |
|
283
|
|
|
mid = len(match) // 2 |
|
284
|
|
|
# Get the left and right substrings to join with. |
|
285
|
|
|
first, second = match[0:mid], match[mid:] |
|
286
|
|
|
# Check if the infix_pair is the same as start, or end. |
|
287
|
|
|
bad_matches = [ |
|
288
|
|
|
# Duplicates joined is bad. |
|
289
|
|
|
infix_pair == first, infix_pair == second, |
|
290
|
|
|
# Matching letters on start/end joining substrings |
|
291
|
|
|
# is bad. |
|
292
|
|
|
first[-1] == infix_pair[0], |
|
293
|
|
|
# Matching letters on end/start joining substrings |
|
294
|
|
|
# is also bad. |
|
295
|
|
|
first[0] == infix_pair[-1], |
|
296
|
|
|
] |
|
297
|
|
|
# Skip bad 'fusings' |
|
298
|
|
|
if any(bad_matches): |
|
299
|
|
|
continue |
|
300
|
|
|
replacer = '{}{}{}'.format(first, infix_pair, second) |
|
301
|
|
|
new_arr.append(word.replace(match, replacer)) |
|
302
|
|
|
else: |
|
303
|
|
|
new_arr.append(word) |
|
304
|
|
|
return new_arr |
|
305
|
|
|
|
|
306
|
|
|
|
|
307
|
|
|
def simulfixify(words, pairs=None, max=5): |
|
|
|
|
|
|
308
|
|
|
"""Generate simulfixed words. |
|
309
|
|
|
|
|
310
|
|
|
Args: |
|
311
|
|
|
words (list) - List of words to operate on. |
|
312
|
|
|
pairs (list, optional) - Simulfix pairs to use for each word. |
|
313
|
|
|
If not specified, these will be generated |
|
314
|
|
|
randomly as vowel + consonant strings. |
|
315
|
|
|
max (int, optional): The number of simulfix pairs to generate |
|
316
|
|
|
(if pairs is not specified.) |
|
317
|
|
|
|
|
318
|
|
|
Returns: |
|
319
|
|
|
results (list) - The simulfix version of each word, |
|
320
|
|
|
for each simulfix pair. |
|
321
|
|
|
""" |
|
322
|
|
|
results = [] |
|
323
|
|
|
if pairs is None: |
|
324
|
|
|
pairs = ['{}{}'.format(choice(_vowels), choice(_consonants)) |
|
325
|
|
|
for _ in range(max)] |
|
326
|
|
|
for word in words: |
|
327
|
|
|
for combo in pairs: |
|
328
|
|
|
mid = len(word) // 2 |
|
329
|
|
|
_word = '{}{}{}'.format(word[0:mid], combo, word[mid:]) |
|
330
|
|
|
results.append(_word) |
|
331
|
|
|
return results |
|
332
|
|
|
|
|
333
|
|
|
|
|
334
|
|
|
def palindrome(word): |
|
335
|
|
|
"""Create a palindrome from a word. |
|
336
|
|
|
|
|
337
|
|
|
Args: |
|
338
|
|
|
word (str): The word. |
|
339
|
|
|
|
|
340
|
|
|
Returns: |
|
341
|
|
|
str: The updated palindrome. |
|
342
|
|
|
""" |
|
343
|
|
|
return '{}{}'.format(word, word[::-1]) |
|
344
|
|
|
|
|
345
|
|
|
|
|
346
|
|
|
def palindromes(words): |
|
347
|
|
|
"""Convert a list of words into their palindromic form. |
|
348
|
|
|
|
|
349
|
|
|
Args: |
|
350
|
|
|
words (list): The words. |
|
351
|
|
|
|
|
352
|
|
|
Returns: |
|
353
|
|
|
list: The list of palindromes. |
|
354
|
|
|
""" |
|
355
|
|
|
return [palindrome(word) for word in words] |
|
356
|
|
|
|
|
357
|
|
|
|
|
358
|
|
|
def make_founder_product_name(founder1, founder2, product): |
|
359
|
|
|
"""Get the name of two people forming a company and combine it.""" |
|
360
|
|
|
return '{} & {} {}'.format( |
|
361
|
|
|
founder1[0].upper(), |
|
362
|
|
|
founder2[0].upper(), |
|
363
|
|
|
product) |
|
364
|
|
|
|
|
365
|
|
|
|
|
366
|
|
|
def make_name_alliteration(word_array, divider=' '): |
|
367
|
|
|
"""Make an alliteration with a set of words, if applicable. |
|
368
|
|
|
|
|
369
|
|
|
Examples: |
|
370
|
|
|
java jacket |
|
371
|
|
|
singing sally |
|
372
|
|
|
earth engines |
|
373
|
|
|
...etc |
|
374
|
|
|
|
|
375
|
|
|
1. Loop through a given array of words |
|
376
|
|
|
2. group by words with the same first letter |
|
377
|
|
|
3. combine them and return to new array |
|
378
|
|
|
""" |
|
379
|
|
|
new_arr = [] |
|
380
|
|
|
word_array = sorted(word_array) |
|
381
|
|
|
|
|
382
|
|
|
for word1 in word_array: |
|
383
|
|
|
for word2 in word_array: |
|
384
|
|
|
if word1[:1] is word2[:1] and word1 is not word2: |
|
385
|
|
|
new_arr.append(word1 + divider + word2) |
|
386
|
|
|
return new_arr |
|
387
|
|
|
|
|
388
|
|
|
|
|
389
|
|
|
def make_name_abbreviation(words): |
|
390
|
|
|
"""Will make some kind of company acronym. |
|
391
|
|
|
|
|
392
|
|
|
eg: BASF, AT&T, A&W |
|
393
|
|
|
Returns a single string of the new word combined. |
|
394
|
|
|
""" |
|
395
|
|
|
return ''.join([word[:1].upper() for word in words]) |
|
396
|
|
|
|
|
397
|
|
|
|
|
398
|
|
|
def make_vowel(words, vowel_type, vowel_index): |
|
399
|
|
|
"""Primary for all Portmanteau generators. |
|
400
|
|
|
|
|
401
|
|
|
This creates the portmanteau based on :vowel_index, and :vowel_type. |
|
402
|
|
|
|
|
403
|
|
|
The algorithm works as following: |
|
404
|
|
|
|
|
405
|
|
|
It looks for the first occurrence of a specified vowel in the first word, |
|
406
|
|
|
then gets the matching occurrence (if any) of the second word, |
|
407
|
|
|
then determines which should be first or second position, based on |
|
408
|
|
|
the ratio of letters (for each word) divided by the position of the vowel |
|
409
|
|
|
in question (e.g. c[a]t (2/3) vs. cr[a]te (3/5)). |
|
410
|
|
|
|
|
411
|
|
|
The higher number is ordered first, and the two words are then fused |
|
412
|
|
|
together by the single matching vowel. |
|
413
|
|
|
""" |
|
414
|
|
|
new_arr = [] |
|
415
|
|
|
for i in words: |
|
416
|
|
|
for j in words: |
|
417
|
|
|
is_match_i = re.search(vowel_type, i) |
|
418
|
|
|
is_match_j = re.search(vowel_type, j) |
|
419
|
|
|
if i is not j and is_match_i and is_match_j: |
|
420
|
|
|
# get the indices and lengths to use in finding the ratio |
|
421
|
|
|
pos_i = i.index(vowel_index) |
|
422
|
|
|
len_i = len(i) |
|
423
|
|
|
pos_j = j.index(vowel_index) |
|
424
|
|
|
len_j = len(j) |
|
425
|
|
|
|
|
426
|
|
|
# If starting index is 0, |
|
427
|
|
|
# add 1 to it so we're not dividing by zero |
|
428
|
|
|
if pos_i is 0: |
|
429
|
|
|
pos_i = 1 |
|
430
|
|
|
if pos_j is 0: |
|
431
|
|
|
pos_j = 1 |
|
432
|
|
|
|
|
433
|
|
|
# Decide which word should be the |
|
434
|
|
|
# prefix and which should be suffix |
|
435
|
|
|
if round(pos_i / len_i) > round(pos_j / len_j): |
|
436
|
|
|
p = i[0: pos_i + 1] |
|
437
|
|
|
p2 = j[pos_j: len(j)] |
|
438
|
|
|
if len(p) + len(p2) > 2: |
|
439
|
|
|
if re.search( |
|
440
|
|
|
_regexes['all_vowels'], p) or re.search( |
|
441
|
|
|
_regexes['all_vowels'], p2): |
|
442
|
|
|
if p[-1] is p2[0]: |
|
|
|
|
|
|
443
|
|
|
new_arr.append(p[:-1] + p2) |
|
|
|
|
|
|
444
|
|
|
else: |
|
|
|
|
|
|
445
|
|
|
new_arr.append(p + p2) |
|
|
|
|
|
|
446
|
|
|
return new_arr |
|
447
|
|
|
|
|
448
|
|
|
|
|
449
|
|
|
def make_portmanteau_default_vowel(words): |
|
450
|
|
|
"""Make a portmanteau based on vowel matches. |
|
451
|
|
|
|
|
452
|
|
|
E.g. (ala Brad+Angelina = Brangelina) |
|
453
|
|
|
Only matches for second to last letter |
|
454
|
|
|
in first word and matching vowel in second word. |
|
455
|
|
|
|
|
456
|
|
|
This defers to the make_vowel function for all the internal |
|
457
|
|
|
magic, but is a helper in that it provides all types of vowel |
|
458
|
|
|
combinations in one function. |
|
459
|
|
|
""" |
|
460
|
|
|
new_arr = [] |
|
461
|
|
|
vowel_a_re = re.compile(r'a{1}') |
|
462
|
|
|
vowel_e_re = re.compile(r'e{1}') |
|
463
|
|
|
vowel_i_re = re.compile(r'i{1}') |
|
464
|
|
|
vowel_o_re = re.compile(r'o{1}') |
|
465
|
|
|
vowel_u_re = re.compile(r'u{1}') |
|
466
|
|
|
|
|
467
|
|
|
new_arr += make_vowel(words, vowel_a_re, 'a') |
|
468
|
|
|
new_arr += make_vowel(words, vowel_e_re, 'e') |
|
469
|
|
|
new_arr += make_vowel(words, vowel_i_re, 'i') |
|
470
|
|
|
new_arr += make_vowel(words, vowel_o_re, 'o') |
|
471
|
|
|
new_arr += make_vowel(words, vowel_u_re, 'u') |
|
472
|
|
|
return new_arr |
|
473
|
|
|
|
|
474
|
|
|
|
|
475
|
|
|
def make_portmanteau_split(words): |
|
476
|
|
|
"""Make a portmeanteau, split by vowel/consonant combos. |
|
477
|
|
|
|
|
478
|
|
|
Based on the word formation of nikon: [ni]pp[on] go[k]aku, |
|
479
|
|
|
which is comprised of Nippon + Gokaku. |
|
480
|
|
|
|
|
481
|
|
|
We get the first C+V in the first word, |
|
482
|
|
|
then last V+C in the first word, |
|
483
|
|
|
then all C in the second word. |
|
484
|
|
|
""" |
|
485
|
|
|
new_arr = [] |
|
486
|
|
|
for i in words: |
|
487
|
|
|
for j in words: |
|
488
|
|
|
if i is not j: |
|
|
|
|
|
|
489
|
|
|
l1 = re.search(r'[^a|e|i|o|u{1}]+[a|e|i|o|u{1}]', i) |
|
|
|
|
|
|
490
|
|
|
l2 = re.search(r'[a|e|i|o|u{1}]+[^a|e|i|o|u{1}]$', j) |
|
|
|
|
|
|
491
|
|
|
if i and l1 and l2: |
|
|
|
|
|
|
492
|
|
|
# Third letter used for |
|
493
|
|
|
# consonant middle splits only |
|
494
|
|
|
l3 = re.split(r'[a|e|i|o|u{1}]', i) |
|
|
|
|
|
|
495
|
|
|
l1 = l1.group(0) |
|
|
|
|
|
|
496
|
|
|
l2 = l2.group(0) |
|
|
|
|
|
|
497
|
|
|
if l3 and len(l3) > 0: |
|
|
|
|
|
|
498
|
|
|
for v in l3: |
|
|
|
|
|
|
499
|
|
|
new_arr.append(l1 + v + l2) |
|
|
|
|
|
|
500
|
|
|
else: |
|
|
|
|
|
|
501
|
|
|
new_arr.append('{}{}{}'.format(l1, 't', l2)) |
|
|
|
|
|
|
502
|
|
|
new_arr.append('{}{}{}'.format(l1, 's', l2)) |
|
|
|
|
|
|
503
|
|
|
new_arr.append('{}{}{}'.format(l1, 'z', l2)) |
|
|
|
|
|
|
504
|
|
|
new_arr.append('{}{}{}'.format(l1, 'x', l2)) |
|
|
|
|
|
|
505
|
|
|
return new_arr |
|
506
|
|
|
|
|
507
|
|
|
|
|
508
|
|
|
def make_punctuator(words, replace): |
|
509
|
|
|
"""Put some hyphens or dots, or a given punctutation. |
|
510
|
|
|
|
|
511
|
|
|
Works via :replace in the word, but only around vowels ala "del.ic.ious" |
|
512
|
|
|
""" |
|
513
|
|
|
def _replace(words, replace, replace_type='.'): |
|
|
|
|
|
|
514
|
|
|
return [word.replace( |
|
515
|
|
|
replace, replace + replace_type) for word in words] |
|
516
|
|
|
|
|
517
|
|
|
hyphens = _replace(words, replace, replace_type='-') |
|
518
|
|
|
periods = _replace(words, replace) |
|
519
|
|
|
return hyphens + periods |
|
520
|
|
|
|
|
521
|
|
|
|
|
522
|
|
|
def make_punctuator_vowels(words): |
|
523
|
|
|
"""Helper function that combines all possible combinations for vowels.""" |
|
524
|
|
|
new_words = [] |
|
525
|
|
|
new_words += make_punctuator(words, 'a') |
|
526
|
|
|
new_words += make_punctuator(words, 'e') |
|
527
|
|
|
new_words += make_punctuator(words, 'i') |
|
528
|
|
|
new_words += make_punctuator(words, 'o') |
|
529
|
|
|
new_words += make_punctuator(words, 'u') |
|
530
|
|
|
return new_words |
|
531
|
|
|
|
|
532
|
|
|
|
|
533
|
|
|
def make_vowelify(words): |
|
534
|
|
|
"""Chop off consonant ala nautica if second to last letter is a vowel.""" |
|
535
|
|
|
new_arr = [] |
|
536
|
|
|
for word in words: |
|
537
|
|
|
if re.search(_regexes['all_vowels'], word[:-2]): |
|
538
|
|
|
new_arr.append(word[:-1]) |
|
539
|
|
|
return new_arr |
|
540
|
|
|
|
|
541
|
|
|
|
|
542
|
|
|
def make_misspelling(words): |
|
543
|
|
|
"""Misspell a word in numerous ways, to create interesting results.""" |
|
544
|
|
|
token_groups = ( |
|
545
|
|
|
('ics', 'ix'), |
|
546
|
|
|
('ph', 'f'), |
|
547
|
|
|
('kew', 'cue'), |
|
548
|
|
|
('f', 'ph'), |
|
549
|
|
|
('o', 'ough'), |
|
550
|
|
|
# these seem to have |
|
551
|
|
|
# sucked in practice |
|
552
|
|
|
('o', 'off'), |
|
553
|
|
|
('ow', 'o'), |
|
554
|
|
|
('x', 'ecks'), |
|
555
|
|
|
('za', 'xa'), |
|
556
|
|
|
('xa', 'za'), |
|
557
|
|
|
('ze', 'xe'), |
|
558
|
|
|
('xe', 'ze'), |
|
559
|
|
|
('zi', 'xi'), |
|
560
|
|
|
('xi', 'zi'), |
|
561
|
|
|
('zo', 'xo'), |
|
562
|
|
|
('xo', 'zo'), |
|
563
|
|
|
('zu', 'xu'), |
|
564
|
|
|
('xu', 'zu'), |
|
565
|
|
|
# number based |
|
566
|
|
|
('one', '1'), |
|
567
|
|
|
('1', 'one'), |
|
568
|
|
|
('two', '2'), |
|
569
|
|
|
('2', 'two'), |
|
570
|
|
|
('three', '3'), |
|
571
|
|
|
('3', 'three'), |
|
572
|
|
|
('four', '4'), |
|
573
|
|
|
('4', 'four'), |
|
574
|
|
|
('five', '5'), |
|
575
|
|
|
('5', 'five'), |
|
576
|
|
|
('six', '6'), |
|
577
|
|
|
('6', 'six'), |
|
578
|
|
|
('seven', '7'), |
|
579
|
|
|
('7', 'seven'), |
|
580
|
|
|
('eight', '8'), |
|
581
|
|
|
('8', 'eight'), |
|
582
|
|
|
('nine', '9'), |
|
583
|
|
|
('9', 'nine'), |
|
584
|
|
|
('ten', '10'), |
|
585
|
|
|
('10', 'ten'), |
|
586
|
|
|
('ecks', 'x'), |
|
587
|
|
|
('spir', 'speer'), |
|
588
|
|
|
('speer', 'spir'), |
|
589
|
|
|
('x', 'ex'), |
|
590
|
|
|
('on', 'awn'), |
|
591
|
|
|
('ow', 'owoo'), |
|
592
|
|
|
('awn', 'on'), |
|
593
|
|
|
('awf', 'off'), |
|
594
|
|
|
('s', 'z'), |
|
595
|
|
|
('ce', 'ze'), |
|
596
|
|
|
('ss', 'zz'), |
|
597
|
|
|
('ku', 'koo'), |
|
598
|
|
|
('trate', 'trait'), |
|
599
|
|
|
('trait', 'trate'), |
|
600
|
|
|
('ance', 'anz'), |
|
601
|
|
|
('il', 'yll'), |
|
602
|
|
|
('ice', 'ize'), |
|
603
|
|
|
('chr', 'kr'), |
|
604
|
|
|
# These should only be at end of word! |
|
605
|
|
|
('er', 'r'), |
|
606
|
|
|
('lee', 'ly'), |
|
607
|
|
|
) |
|
608
|
|
|
new_arr = [] |
|
609
|
|
|
for word in words: |
|
610
|
|
|
for tokens in token_groups: |
|
611
|
|
|
new_arr.append(word.replace(*tokens)) |
|
612
|
|
|
return normalization.uniquify(new_arr) |
|
613
|
|
|
|
|
614
|
|
|
|
|
615
|
|
|
def _pig_latinize(word, postfix='ay'): |
|
616
|
|
|
"""Generate standard pig latin style, with optional postfix argument.""" |
|
617
|
|
|
# Common postfixes: ['ay', 'yay', 'way'] |
|
618
|
|
|
if not type(postfix) is str: |
|
619
|
|
|
raise TypeError('Must use a string for postfix.') |
|
620
|
|
|
|
|
621
|
|
|
piggified = None |
|
622
|
|
|
|
|
623
|
|
|
vowel_re = re.compile(r'(a|e|i|o|u)') |
|
624
|
|
|
first_letter = word[0:1] |
|
625
|
|
|
|
|
626
|
|
|
# clean up non letters |
|
627
|
|
|
word = word.replace(r'[^a-zA-Z]', '') |
|
628
|
|
|
|
|
629
|
|
|
if vowel_re.match(first_letter): |
|
630
|
|
|
piggified = word + 'way' |
|
631
|
|
|
else: |
|
632
|
|
|
piggified = ''.join([word[1: len(word)], first_letter, postfix]) |
|
633
|
|
|
return piggified |
|
634
|
|
|
|
|
635
|
|
|
|
|
636
|
|
|
def pig_latinize(words, postfix='ay'): |
|
637
|
|
|
"""Pig latinize a set of words. |
|
638
|
|
|
|
|
639
|
|
|
Args: |
|
640
|
|
|
words (list): A list of words. |
|
641
|
|
|
postfix (str, optional): A postfix to use. Default is `ay`. |
|
642
|
|
|
|
|
643
|
|
|
Returns: |
|
644
|
|
|
words (list): The updated list. |
|
645
|
|
|
|
|
646
|
|
|
""" |
|
647
|
|
|
return [_pig_latinize(word, postfix=postfix) for word in words] |
|
648
|
|
|
|
|
649
|
|
|
|
|
650
|
|
|
def acronym_lastname(description, lastname): |
|
651
|
|
|
"""Create an acronym plus the last name. |
|
652
|
|
|
|
|
653
|
|
|
Inspiration: ALFA Romeo. |
|
654
|
|
|
""" |
|
655
|
|
|
desc = ''.join([word[0].upper() for word |
|
656
|
|
|
in normalization.remove_stop_words(description.split(' '))]) |
|
657
|
|
|
return '{} {}'.format(desc, lastname) |
|
658
|
|
|
|
|
659
|
|
|
|
|
660
|
|
|
def get_descriptors(words): |
|
661
|
|
|
"""Group words by their NLTK part-of-speech descriptors. |
|
662
|
|
|
|
|
663
|
|
|
Use NLTK to first grab tokens by looping through words, |
|
664
|
|
|
then tag part-of-speech (in isolation) |
|
665
|
|
|
and provide a dictionary with a list of each type |
|
666
|
|
|
for later retrieval and usage. |
|
667
|
|
|
""" |
|
668
|
|
|
descriptors = defaultdict(list) |
|
669
|
|
|
tokens = nltk.word_tokenize(' '.join(words)) |
|
670
|
|
|
parts = nltk.pos_tag(tokens) |
|
671
|
|
|
# Then, push the word into the matching type |
|
672
|
|
|
for part in parts: |
|
673
|
|
|
descriptors[part[1]].append(part[0]) |
|
674
|
|
|
return descriptors |
|
675
|
|
|
|
|
676
|
|
|
|
|
677
|
|
|
def _add_pos_subtypes(nouns, verbs): |
|
678
|
|
|
"""Combine alternating verbs and nouns into a new list. |
|
679
|
|
|
|
|
680
|
|
|
Args: |
|
681
|
|
|
nouns (list) - List of nouns, noun phrases, etc... |
|
682
|
|
|
verbs (list) - List of verbs, verb phrases, etc... |
|
683
|
|
|
|
|
684
|
|
|
Returns: |
|
685
|
|
|
words (list) - The newly combined list |
|
686
|
|
|
""" |
|
687
|
|
|
words = [] |
|
688
|
|
|
try: |
|
689
|
|
|
for noun in nouns: |
|
690
|
|
|
for verb in verbs: |
|
691
|
|
|
words.append('{} {}'.format(noun, verb)) |
|
692
|
|
|
words.append('{} {}'.format(verb, noun)) |
|
693
|
|
|
except KeyError: |
|
|
|
|
|
|
694
|
|
|
pass |
|
695
|
|
|
return words |
|
696
|
|
|
|
|
697
|
|
|
|
|
698
|
|
|
def _create_pos_subtypes(words): |
|
699
|
|
|
"""Check part-of-speech tags for a noun-phrase, adding combinations if so. |
|
700
|
|
|
|
|
701
|
|
|
If it exists, add combinations with noun-phrase + verb-phrase, |
|
702
|
|
|
noun-phrase + verb, and noun-phrase + adverb, |
|
703
|
|
|
for each pos type that exists. |
|
704
|
|
|
|
|
705
|
|
|
Args: |
|
706
|
|
|
words (list) - List of verbs, verb phrases, etc... |
|
707
|
|
|
|
|
708
|
|
|
Returns: |
|
709
|
|
|
new_words (list) - The newly combined list |
|
710
|
|
|
""" |
|
711
|
|
|
new_words = [] |
|
712
|
|
|
types = words.keys() |
|
713
|
|
|
if 'NNP' in types: |
|
714
|
|
|
if 'VBP' in types: |
|
715
|
|
|
new_words += _add_pos_subtypes(words['NNP'], words['VBP']) |
|
716
|
|
|
if 'VB' in types: |
|
717
|
|
|
new_words += _add_pos_subtypes(words['NNP'], words['VB']) |
|
718
|
|
|
if 'RB' in types: |
|
719
|
|
|
new_words += _add_pos_subtypes(words['NNP'], words['RB']) |
|
720
|
|
|
return new_words |
|
721
|
|
|
|
|
722
|
|
|
|
|
723
|
|
|
def make_descriptors(words): |
|
724
|
|
|
"""Make descriptor names. |
|
725
|
|
|
|
|
726
|
|
|
Based from a verb + noun, adjective + noun combination. |
|
727
|
|
|
Examples: |
|
728
|
|
|
-Pop Cap, |
|
729
|
|
|
-Big Fish, |
|
730
|
|
|
-Red Fin, |
|
731
|
|
|
-Cold Water (grill), etc... |
|
732
|
|
|
Combines VBP/VB/RB, with NN/NNS |
|
733
|
|
|
""" |
|
734
|
|
|
return list(set(_create_pos_subtypes(words))) |
|
735
|
|
|
|
|
736
|
|
|
|
|
737
|
|
|
def all_prefix_first_vowel(word, letters=list(ascii_uppercase)): |
|
|
|
|
|
|
738
|
|
|
"""Find the first vowel in a word and prefixes with consonants. |
|
739
|
|
|
|
|
740
|
|
|
Args: |
|
741
|
|
|
word (str) - the word to update |
|
742
|
|
|
letters (list) - the letters to use for prefixing. |
|
743
|
|
|
|
|
744
|
|
|
Returns: |
|
745
|
|
|
words (list) - All prefixed words |
|
746
|
|
|
|
|
747
|
|
|
""" |
|
748
|
|
|
re_vowels = re.compile(r'[aeiouy]') |
|
749
|
|
|
matches = re.search(re_vowels, word) |
|
750
|
|
|
if matches is None: |
|
751
|
|
|
return [word] |
|
752
|
|
|
words = [] |
|
753
|
|
|
vowels = ['A', 'E', 'I', 'O', 'U'] |
|
754
|
|
|
first_match = matches.start(0) |
|
755
|
|
|
for letter in letters: |
|
756
|
|
|
if letter not in vowels: |
|
757
|
|
|
# If beginning letter is a vowel, don't offset the index |
|
758
|
|
|
if first_match == 0: |
|
759
|
|
|
words.append('{}{}'.format(letter, word)) |
|
760
|
|
|
else: |
|
761
|
|
|
words.append('{}{}'.format(letter, word[first_match:])) |
|
762
|
|
|
return words |
|
763
|
|
|
|
|
764
|
|
|
|
|
765
|
|
|
def recycle(words, func, times=2): |
|
766
|
|
|
"""Run a set of words applied to a function repeatedly. |
|
767
|
|
|
|
|
768
|
|
|
It will re-run with the last output as the new input. |
|
769
|
|
|
`words` must be a list, and `func` must return a list. |
|
770
|
|
|
""" |
|
771
|
|
|
if times > 0: |
|
772
|
|
|
return recycle(func(words), func, times - 1) |
|
773
|
|
|
return words |
|
774
|
|
|
|
|
775
|
|
|
|
|
776
|
|
|
def backronym(acronym, theme, max_attempts=10): |
|
777
|
|
|
"""Attempt to generate a backronym based on a given acronym and theme. |
|
778
|
|
|
|
|
779
|
|
|
Args: |
|
780
|
|
|
acronym (str): The starting acronym. |
|
781
|
|
|
theme (str): The seed word to base other words off of. |
|
782
|
|
|
max_attempts (int, optional): The number of attempts before failing. |
|
783
|
|
|
|
|
784
|
|
|
Returns: |
|
785
|
|
|
dict: The result dictionary. If a backronym was successfully generated, |
|
786
|
|
|
the `success` key will be True, otherwise False. |
|
787
|
|
|
""" |
|
788
|
|
|
ret = { |
|
789
|
|
|
'acronym': '.'.join(list(acronym)).upper(), |
|
790
|
|
|
'backronym': '', |
|
791
|
|
|
'words': [], |
|
792
|
|
|
'success_ratio': 0.0, |
|
793
|
|
|
'success': False |
|
794
|
|
|
} |
|
795
|
|
|
if not acronym or not theme: |
|
796
|
|
|
return ret |
|
797
|
|
|
all_words = set() |
|
798
|
|
|
words = nlp._get_synset_words(theme) |
|
|
|
|
|
|
799
|
|
|
_backronym = [] |
|
800
|
|
|
acronym = acronym.lower() |
|
801
|
|
|
# Add words if they contain the same first letter |
|
802
|
|
|
# as any in the given acronym. |
|
803
|
|
|
cur_step = 0 |
|
804
|
|
|
while len(_backronym) < len(acronym) or cur_step < max_attempts: |
|
805
|
|
|
all_words.update(words) |
|
806
|
|
|
for word in words: |
|
807
|
|
|
if word[0].lower() in acronym: |
|
808
|
|
|
if '_' in word: |
|
809
|
|
|
# Don't add multi-word strings, but don't leave it blank. |
|
810
|
|
|
_backronym.append(word[0]) |
|
811
|
|
|
else: |
|
812
|
|
|
_backronym.append(word) |
|
813
|
|
|
sdict = {} |
|
814
|
|
|
# Sort the word in order of the acronyms |
|
815
|
|
|
# letters by re-arranging indices. |
|
816
|
|
|
for word in _backronym: |
|
817
|
|
|
try: |
|
818
|
|
|
index = acronym.index(word[0].lower()) |
|
819
|
|
|
sdict[index] = word |
|
820
|
|
|
except IndexError: |
|
821
|
|
|
continue |
|
822
|
|
|
cur_step += 1 |
|
823
|
|
|
# Refresh words for next attempt. |
|
824
|
|
|
words = nlp._get_synset_words(theme) |
|
|
|
|
|
|
825
|
|
|
# Try again if no words existed. |
|
826
|
|
|
if not words: |
|
827
|
|
|
continue |
|
828
|
|
|
# Get new theme, similar to originating theme. |
|
829
|
|
|
theme = words[0] |
|
830
|
|
|
vals = sdict.values() |
|
831
|
|
|
ret.update({ |
|
832
|
|
|
'backronym': ' '.join(vals).upper(), |
|
833
|
|
|
'words': vals, |
|
834
|
|
|
'success_ratio': float(len(vals)) / float(len(acronym)), |
|
835
|
|
|
'success': len(vals) == len(acronym) |
|
836
|
|
|
}) |
|
837
|
|
|
return ret |
|
838
|
|
|
|
|
839
|
|
|
|
|
840
|
|
|
def super_scrub(data): |
|
841
|
|
|
"""Run words through a comprehensive list of filtering functions. |
|
842
|
|
|
|
|
843
|
|
|
Expects a dictionary with key "words" |
|
844
|
|
|
""" |
|
845
|
|
|
for technique in data['words']: |
|
846
|
|
|
data['words'][technique] = normalization.uniquify( |
|
847
|
|
|
normalization.remove_odd_sounding_words( |
|
848
|
|
|
normalization.clean_sort( |
|
849
|
|
|
data['words'][technique]))) |
|
850
|
|
|
return data |
|
851
|
|
|
|
|
852
|
|
|
|
|
853
|
|
|
def generate_all_techniques(words): |
|
854
|
|
|
"""Generate all techniques across the library in one place.""" |
|
855
|
|
|
data = { |
|
856
|
|
|
'words': { |
|
|
|
|
|
|
857
|
|
|
'alliterations': make_name_alliteration(words), |
|
858
|
|
|
'alliterations': make_name_alliteration(words), |
|
859
|
|
|
'portmanteau': make_portmanteau_default_vowel(words), |
|
860
|
|
|
'vowels': make_vowelify(words), |
|
861
|
|
|
'suffix': suffixify(words), |
|
862
|
|
|
'prefix': prefixify(words), |
|
863
|
|
|
'duplifix': duplifixify(words), |
|
864
|
|
|
'disfix': disfixify(words), |
|
865
|
|
|
'infix': infixify(words), |
|
866
|
|
|
'simulfix': simulfixify(words), |
|
867
|
|
|
'founder_product_name': make_founder_product_name( |
|
868
|
|
|
'Lindsey', 'Chris', 'Widgets'), |
|
869
|
|
|
'punctuator': make_punctuator_vowels(words), |
|
870
|
|
|
'name_abbreviation': make_name_abbreviation(words), |
|
871
|
|
|
'make_portmanteau_split': make_portmanteau_split(words), |
|
872
|
|
|
'forkerism': forkerism(words), |
|
873
|
|
|
'kniferism': kniferism(words), |
|
874
|
|
|
'spoonerism': spoonerism(words), |
|
875
|
|
|
'palindrome': palindromes(words), |
|
876
|
|
|
'reduplication_ablaut': reduplication_ablaut(words), |
|
877
|
|
|
'misspelling': make_misspelling(words), |
|
878
|
|
|
'descriptors': make_descriptors( |
|
879
|
|
|
get_descriptors(words)) |
|
880
|
|
|
} |
|
881
|
|
|
} |
|
882
|
|
|
return super_scrub(data) |
|
883
|
|
|
|
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.pyfiles in your module folders. Make sure that you place one file in each sub-folder.