1
|
|
|
"""Primary techniques for the core functionality of namebot.""" |
2
|
|
|
|
3
|
|
|
from __future__ import absolute_import |
4
|
|
|
from __future__ import division |
5
|
|
|
|
6
|
|
|
from random import choice |
7
|
|
|
from string import ascii_uppercase |
8
|
|
|
from collections import defaultdict |
9
|
|
|
import re |
10
|
|
|
import nltk |
11
|
|
|
|
12
|
|
|
from . import settings as namebot_settings |
13
|
|
|
from . import normalization |
14
|
|
|
|
15
|
|
|
|
16
|
|
|
_prefixes = namebot_settings.PREFIXES |
17
|
|
|
_suffixes = namebot_settings.SUFFIXES |
18
|
|
|
_alphabet = namebot_settings.ALPHABET |
19
|
|
|
_consonants = namebot_settings.CONSONANTS |
20
|
|
|
_vowels = namebot_settings.VOWELS |
21
|
|
|
_regexes = namebot_settings.regexes |
22
|
|
|
|
23
|
|
|
|
24
|
|
|
def domainify(words, tld='com'): |
25
|
|
|
"""Convert words into a domain format for testing domains. |
26
|
|
|
|
27
|
|
|
Args: |
28
|
|
|
words (list): List of words |
29
|
|
|
tld (str, optional): The TLD (top-level domain) to use. |
30
|
|
|
|
31
|
|
|
Returns: |
32
|
|
|
list: The modified list of words. |
33
|
|
|
""" |
34
|
|
|
_words = [] |
35
|
|
|
if tld.startswith('.'): |
36
|
|
|
tld = tld.replace('.', '') |
37
|
|
|
for word in words: |
38
|
|
|
if word.endswith(tld) and tld != '': |
39
|
|
|
word = word.replace(tld, '.{}'.format(tld)) |
40
|
|
|
_words.append(word) |
41
|
|
|
return _words |
42
|
|
|
|
43
|
|
|
|
44
|
|
|
def spoonerism(words): |
45
|
|
|
"""Convert a list of words formatted with the spoonerism technique. |
46
|
|
|
|
47
|
|
|
Args: |
48
|
|
|
words (list) - The list of words to operate on |
49
|
|
|
|
50
|
|
|
Returns: |
51
|
|
|
words (list) - The updated list of words |
52
|
|
|
|
53
|
|
|
>>> spoonerism(['foo', 'bar']) |
54
|
|
|
>>> ['boo', 'far'] |
55
|
|
|
""" |
56
|
|
|
"First: [f]oo [b]ar => boo far" |
57
|
|
|
new_words = [] |
58
|
|
|
if len(words) < 2: |
59
|
|
|
raise ValueError('Need more than one word to combine') |
60
|
|
|
for k, word in enumerate(words): |
61
|
|
|
try: |
62
|
|
|
new_words.append('{}{} {}{}'.format( |
63
|
|
|
words[k + 1][0], # 2nd word, 1st letter |
64
|
|
|
word[1:], # 1st word, 2nd letter to end |
65
|
|
|
word[0], # 1st word, 1st letter |
66
|
|
|
words[k + 1][1:])) # 2nd word, 2nd letter to end |
67
|
|
|
except IndexError: |
68
|
|
|
continue |
69
|
|
|
return new_words |
70
|
|
|
|
71
|
|
|
|
72
|
|
|
def kniferism(words): |
73
|
|
|
"""Convert a list of words formatted with the kniferism technique. |
74
|
|
|
|
75
|
|
|
Args: |
76
|
|
|
words (list) - The list of words to operate on |
77
|
|
|
|
78
|
|
|
Returns: |
79
|
|
|
words (list) - The updated list of words |
80
|
|
|
|
81
|
|
|
>>> kniferism(['foo', 'bar']) |
82
|
|
|
>>> ['fao', 'bor'] |
83
|
|
|
""" |
84
|
|
|
"Mid: f[o]o b[a]r => fao bor" |
85
|
|
|
if len(words) < 2: |
86
|
|
|
raise ValueError('Need more than one word to combine') |
87
|
|
|
new_words = [] |
88
|
|
|
for k, word in enumerate(words): |
89
|
|
|
try: |
90
|
|
|
middle_second = int(len(words[k + 1]) / 2) |
91
|
|
|
middle_first = int(len(word) / 2) |
92
|
|
|
new_words.append('{}{}{} {}{}{}'.format( |
93
|
|
|
word[:middle_first], |
94
|
|
|
words[k + 1][middle_second], |
95
|
|
|
word[middle_first + 1:], |
96
|
|
|
words[k + 1][:middle_second], |
97
|
|
|
word[middle_first], |
98
|
|
|
words[k + 1][middle_second + 1:])) |
99
|
|
|
except IndexError: |
100
|
|
|
continue |
101
|
|
|
return new_words |
102
|
|
|
|
103
|
|
|
|
104
|
|
|
def forkerism(words): |
105
|
|
|
"""Convert a list of words formatted with the forkerism technique. |
106
|
|
|
|
107
|
|
|
Args: |
108
|
|
|
words (list) - The list of words to operate on |
109
|
|
|
|
110
|
|
|
Returns: |
111
|
|
|
words (list) - The updated list of words |
112
|
|
|
|
113
|
|
|
>>> forkerism(['foo', 'bar']) |
114
|
|
|
>>> ['for', 'bao'] |
115
|
|
|
""" |
116
|
|
|
"Last: fo[o] ba[r] => for bao" |
117
|
|
|
if len(words) < 2: |
118
|
|
|
raise ValueError('Need more than one word to combine') |
119
|
|
|
new_words = [] |
120
|
|
|
for k, word in enumerate(words): |
121
|
|
|
try: |
122
|
|
|
s_word = words[k + 1] |
123
|
|
|
s_word_len = len(s_word) |
124
|
|
|
f_word_len = len(word) |
125
|
|
|
f_w_last_letter = word[f_word_len - 1] |
126
|
|
|
s_w_last_letter = words[k + 1][s_word_len - 1] |
127
|
|
|
new_words.append('{}{} {}{}'.format( |
128
|
|
|
word[:f_word_len - 1], # 1st word, 1st letter to last - 1 |
129
|
|
|
s_w_last_letter, # 2nd word, last letter |
130
|
|
|
s_word[:s_word_len - 1], # 2nd word, 1st letter to last - 1 |
131
|
|
|
f_w_last_letter)) # 1st word, last letter |
132
|
|
|
except IndexError: |
133
|
|
|
continue |
134
|
|
|
return new_words |
135
|
|
|
|
136
|
|
|
|
137
|
|
|
def reduplication_ablaut(words, count=1, random=True, vowel='e'): |
138
|
|
|
"""A technique to combine words and altering the vowels. |
139
|
|
|
|
140
|
|
|
e.g ch[i]t-ch[a]t, d[i]lly, d[a]lly. |
141
|
|
|
See http://phrases.org.uk/meanings/reduplication.html. |
142
|
|
|
""" |
143
|
|
|
if len(words) < 2: |
144
|
|
|
raise ValueError('Need more than one word to combine') |
145
|
|
|
new_words = [] |
146
|
|
|
substitution = choice(_vowels) if random else vowel |
147
|
|
|
for word in words: |
148
|
|
|
second = re.sub(r'a|e|i|o|u', substitution, word, count=count) |
149
|
|
|
# Only append if the first and second are different. |
150
|
|
|
if word != second: |
151
|
|
|
new_words.append('{} {}'.format(word, second)) |
152
|
|
|
return new_words |
153
|
|
|
|
154
|
|
|
|
155
|
|
|
def prefixify(words): |
156
|
|
|
"""Apply a prefix technique to a set of words. |
157
|
|
|
|
158
|
|
|
Args: |
159
|
|
|
words (list) - The list of words to operate on. |
160
|
|
|
|
161
|
|
|
Returns: |
162
|
|
|
new_arr (list): the updated *fixed words |
163
|
|
|
""" |
164
|
|
|
new_arr = [] |
165
|
|
|
for word in words: |
166
|
|
|
if not word: |
167
|
|
|
continue |
168
|
|
|
for prefix in _prefixes: |
169
|
|
|
first_prefix_no_vowel = re.search( |
170
|
|
|
_regexes['no_vowels'], word[0]) |
171
|
|
|
second_prefix_no_vowel = re.search( |
172
|
|
|
_regexes['no_vowels'], prefix[0]) |
173
|
|
|
if first_prefix_no_vowel or second_prefix_no_vowel: |
174
|
|
|
# if there's a vowel at the end of |
175
|
|
|
# prefix but not at the beginning |
176
|
|
|
# of the word (or vice versa) |
177
|
|
|
vowel_beginning = re.search(r'a|e|i|o|u', prefix[-1:]) |
178
|
|
|
vowel_end = re.search(r'^a|e|i|o|u', word[:1]) |
179
|
|
|
if vowel_beginning or vowel_end: |
180
|
|
|
new_arr.append('{}{}'.format(prefix, word)) |
181
|
|
|
return new_arr |
182
|
|
|
|
183
|
|
|
|
184
|
|
|
def suffixify(words): |
185
|
|
|
"""Apply a suffix technique to a set of words. |
186
|
|
|
|
187
|
|
|
Args: |
188
|
|
|
words (list) - The list of words to operate on. |
189
|
|
|
(e.g -> chard + ard = chardard -> chard) |
190
|
|
|
|
191
|
|
|
Returns: |
192
|
|
|
new_arr (list): the updated *fixed words |
193
|
|
|
""" |
194
|
|
|
new_arr = [] |
195
|
|
|
for word in words: |
196
|
|
|
if not word: |
197
|
|
|
continue |
198
|
|
|
for suffix in _suffixes: |
199
|
|
|
prefix_start_vowel = re.search(_regexes['all_vowels'], word[0]) |
200
|
|
|
suffix_start_vowel = re.search(_regexes['all_vowels'], suffix[0]) |
201
|
|
|
if prefix_start_vowel or suffix_start_vowel: |
202
|
|
|
if suffix is 'ify': |
203
|
|
|
if word[-1] is 'e': |
204
|
|
|
if word[-2] is not 'i': |
205
|
|
|
new_arr.append('{}{}'.format(word[:-2], suffix)) |
206
|
|
|
else: |
207
|
|
|
new_arr.append('{}{}'.format(word[:-1], suffix)) |
208
|
|
|
new_arr.append(word + suffix) |
209
|
|
|
else: |
210
|
|
|
new_arr.append(word + suffix) |
211
|
|
|
return new_arr |
212
|
|
|
|
213
|
|
|
|
214
|
|
|
def duplifixify(words): |
215
|
|
|
"""Apply a duplifix technique to a set of words (e.g: teeny weeny, etc...). |
216
|
|
|
|
217
|
|
|
Args: |
218
|
|
|
words (list) - The list of words to operate on. |
219
|
|
|
|
220
|
|
|
Returns: |
221
|
|
|
new_arr (list): the updated *fixed words |
222
|
|
|
""" |
223
|
|
|
new_arr = [] |
224
|
|
|
for word in words: |
225
|
|
|
if not word: |
226
|
|
|
continue |
227
|
|
|
for letter in _alphabet: |
228
|
|
|
# check if the first letter is NOT the same as the second letter, |
229
|
|
|
# or the combined word is not a duplicate of the first. |
230
|
|
|
duplicate_word = '{}{}'.format(letter, word[1:]) == word |
231
|
|
|
if word[0] is not letter and not duplicate_word: |
232
|
|
|
new_arr.append('{} {}{}'.format(word, letter, word[1:])) |
233
|
|
|
return new_arr |
234
|
|
|
|
235
|
|
|
|
236
|
|
|
def disfixify(words, replaces=1): |
237
|
|
|
"""Apply a disfix technique to a set of words. |
238
|
|
|
|
239
|
|
|
Disfixing is done by removing the first set of vowel-consonant pairs. |
240
|
|
|
|
241
|
|
|
Args: |
242
|
|
|
words (list) - The list of words to operate on. |
243
|
|
|
replaces (int, optional): Number of replacements |
244
|
|
|
to make on this string. |
245
|
|
|
|
246
|
|
|
Returns: |
247
|
|
|
new_arr (list): the updated *fixed words |
248
|
|
|
""" |
249
|
|
|
new_arr = [] |
250
|
|
|
vc_combo = r'[a-zA-Z][aeiou]{1}[qwrtypsdfghjklzxcvbnm]{1}' |
251
|
|
|
for word in words: |
252
|
|
|
if len(re.findall(vc_combo, word)) > 1: |
253
|
|
|
new_arr.append(re.sub(vc_combo, '', word, replaces)) |
254
|
|
|
else: |
255
|
|
|
new_arr.append(word) |
256
|
|
|
return new_arr |
257
|
|
|
|
258
|
|
|
|
259
|
|
|
def infixify(words): |
260
|
|
|
"""Apply a infix technique to a set of words. |
261
|
|
|
|
262
|
|
|
Adds all consonant+vowel pairs to all inner matching vowel+consonant pairs |
263
|
|
|
of a word, giving all combinations for each word. |
264
|
|
|
|
265
|
|
|
Args: |
266
|
|
|
words (list) - The list of words to operate on. |
267
|
|
|
|
268
|
|
|
Returns: |
269
|
|
|
new_arr (list): the updated *fixed words |
270
|
|
|
""" |
271
|
|
|
new_arr = [] |
272
|
|
|
vc_combo_pair = re.compile( |
273
|
|
|
r'[a-zA-Z][aeiou]{1}[qwrtypsdfghjklzxcvbnm]{1}[aeiou]' |
274
|
|
|
'{1}[qwrtypsdfghjklzxcvbnm]{1}') |
275
|
|
|
for word in words: |
276
|
|
|
matches = re.findall(vc_combo_pair, word) |
277
|
|
|
if matches: |
278
|
|
|
for match in matches: |
279
|
|
|
for infix_pair in namebot_settings.CV_TL_PAIRS: |
280
|
|
|
# Get midpoint of this string. |
281
|
|
|
mid = len(match) // 2 |
282
|
|
|
# Get the left and right substrings to join with. |
283
|
|
|
first, second = match[0:mid], match[mid:] |
284
|
|
|
# Check if the infix_pair is the same as start, or end. |
285
|
|
|
bad_matches = [ |
286
|
|
|
# Duplicates joined is bad. |
287
|
|
|
infix_pair == first, infix_pair == second, |
288
|
|
|
# Matching letters on start/end joining substrings |
289
|
|
|
# is bad. |
290
|
|
|
first[-1] == infix_pair[0], |
291
|
|
|
# Matching letters on end/start joining substrings |
292
|
|
|
# is also bad. |
293
|
|
|
first[0] == infix_pair[-1], |
294
|
|
|
] |
295
|
|
|
# Skip bad 'fusings' |
296
|
|
|
if any(bad_matches): |
297
|
|
|
continue |
298
|
|
|
replacer = '{}{}{}'.format(first, infix_pair, second) |
299
|
|
|
new_arr.append(word.replace(match, replacer)) |
300
|
|
|
else: |
301
|
|
|
new_arr.append(word) |
302
|
|
|
return new_arr |
303
|
|
|
|
304
|
|
|
|
305
|
|
|
def simulfixify(words, pairs=None, max=5): |
306
|
|
|
"""Generate simulfixed words. |
307
|
|
|
|
308
|
|
|
Args: |
309
|
|
|
words (list) - List of words to operate on. |
310
|
|
|
pairs (list, optional) - Simulfix pairs to use for each word. |
311
|
|
|
If not specified, these will be generated |
312
|
|
|
randomly as vowel + consonant strings. |
313
|
|
|
max (int, optional): The number of simulfix pairs to generate |
314
|
|
|
(if pairs is not specified.) |
315
|
|
|
|
316
|
|
|
Returns: |
317
|
|
|
results (list) - The simulfix version of each word, |
318
|
|
|
for each simulfix pair. |
319
|
|
|
""" |
320
|
|
|
results = [] |
321
|
|
|
if pairs is None: |
322
|
|
|
pairs = ['{}{}'.format(choice(_vowels), choice(_consonants)) |
323
|
|
|
for _ in range(max)] |
324
|
|
|
for word in words: |
325
|
|
|
for combo in pairs: |
326
|
|
|
mid = len(word) // 2 |
327
|
|
|
_word = '{}{}{}'.format(word[0:mid], combo, word[mid:]) |
328
|
|
|
results.append(_word) |
329
|
|
|
return results |
330
|
|
|
|
331
|
|
|
|
332
|
|
|
def palindrome(word): |
333
|
|
|
"""Create a palindrome from a word. |
334
|
|
|
|
335
|
|
|
Args: |
336
|
|
|
word (str): The word. |
337
|
|
|
|
338
|
|
|
Returns: |
339
|
|
|
str: The updated palindrome. |
340
|
|
|
""" |
341
|
|
|
return '{}{}'.format(word, word[::-1]) |
342
|
|
|
|
343
|
|
|
|
344
|
|
|
def palindromes(words): |
345
|
|
|
"""Convert a list of words into their palindromic form. |
346
|
|
|
|
347
|
|
|
Args: |
348
|
|
|
words (list): The words. |
349
|
|
|
|
350
|
|
|
Returns: |
351
|
|
|
list: The list of palindromes. |
352
|
|
|
""" |
353
|
|
|
return [palindrome(word) for word in words] |
354
|
|
|
|
355
|
|
|
|
356
|
|
|
def make_founder_product_name(founder1, founder2, product): |
357
|
|
|
"""Get the name of two people forming a company and combine it.""" |
358
|
|
|
return '{} & {} {}'.format( |
359
|
|
|
founder1[0].upper(), |
360
|
|
|
founder2[0].upper(), |
361
|
|
|
product) |
362
|
|
|
|
363
|
|
|
|
364
|
|
|
def make_name_alliteration(word_array, divider=' '): |
365
|
|
|
"""Make an alliteration with a set of words, if applicable. |
366
|
|
|
|
367
|
|
|
Examples: |
368
|
|
|
java jacket |
369
|
|
|
singing sally |
370
|
|
|
earth engines |
371
|
|
|
...etc |
372
|
|
|
|
373
|
|
|
1. Loop through a given array of words |
374
|
|
|
2. group by words with the same first letter |
375
|
|
|
3. combine them and return to new array |
376
|
|
|
""" |
377
|
|
|
new_arr = [] |
378
|
|
|
word_array = sorted(word_array) |
379
|
|
|
|
380
|
|
|
for word1 in word_array: |
381
|
|
|
for word2 in word_array: |
382
|
|
|
if word1[:1] is word2[:1] and word1 is not word2: |
383
|
|
|
new_arr.append(word1 + divider + word2) |
384
|
|
|
return new_arr |
385
|
|
|
|
386
|
|
|
|
387
|
|
|
def make_name_abbreviation(words): |
388
|
|
|
"""Will make some kind of company acronym. |
389
|
|
|
|
390
|
|
|
eg: BASF, AT&T, A&W |
391
|
|
|
Returns a single string of the new word combined. |
392
|
|
|
""" |
393
|
|
|
return ''.join([word[:1].upper() for word in words]) |
394
|
|
|
|
395
|
|
|
|
396
|
|
|
def make_vowel(words, vowel_type, vowel_index): |
397
|
|
|
"""Primary for all Portmanteau generators. |
398
|
|
|
|
399
|
|
|
This creates the portmanteau based on :vowel_index, and :vowel_type. |
400
|
|
|
|
401
|
|
|
The algorithm works as following: |
402
|
|
|
|
403
|
|
|
It looks for the first occurrence of a specified vowel in the first word, |
404
|
|
|
then gets the matching occurrence (if any) of the second word, |
405
|
|
|
then determines which should be first or second position, based on |
406
|
|
|
the ratio of letters (for each word) divided by the position of the vowel |
407
|
|
|
in question (e.g. c[a]t (2/3) vs. cr[a]te (3/5)). |
408
|
|
|
|
409
|
|
|
The higher number is ordered first, and the two words are then fused |
410
|
|
|
together by the single matching vowel. |
411
|
|
|
""" |
412
|
|
|
new_arr = [] |
413
|
|
|
for i in words: |
414
|
|
|
for j in words: |
415
|
|
|
is_match_i = re.search(vowel_type, i) |
416
|
|
|
is_match_j = re.search(vowel_type, j) |
417
|
|
|
if i is not j and is_match_i and is_match_j: |
418
|
|
|
# get the indices and lengths to use in finding the ratio |
419
|
|
|
pos_i = i.index(vowel_index) |
420
|
|
|
len_i = len(i) |
421
|
|
|
pos_j = j.index(vowel_index) |
422
|
|
|
len_j = len(j) |
423
|
|
|
|
424
|
|
|
# If starting index is 0, |
425
|
|
|
# add 1 to it so we're not dividing by zero |
426
|
|
|
if pos_i is 0: |
427
|
|
|
pos_i = 1 |
428
|
|
|
if pos_j is 0: |
429
|
|
|
pos_j = 1 |
430
|
|
|
|
431
|
|
|
# Decide which word should be the |
432
|
|
|
# prefix and which should be suffix |
433
|
|
|
if round(pos_i / len_i) > round(pos_j / len_j): |
434
|
|
|
p = i[0: pos_i + 1] |
435
|
|
|
p2 = j[pos_j: len(j)] |
436
|
|
|
if len(p) + len(p2) > 2: |
437
|
|
|
if re.search( |
438
|
|
|
_regexes['all_vowels'], p) or re.search( |
439
|
|
|
_regexes['all_vowels'], p2): |
440
|
|
|
if p[-1] is p2[0]: |
441
|
|
|
new_arr.append(p[:-1] + p2) |
442
|
|
|
else: |
443
|
|
|
new_arr.append(p + p2) |
444
|
|
|
return new_arr |
445
|
|
|
|
446
|
|
|
|
447
|
|
|
def make_portmanteau_default_vowel(words): |
448
|
|
|
"""Make a portmanteau based on vowel matches. |
449
|
|
|
|
450
|
|
|
E.g. (ala Brad+Angelina = Brangelina) |
451
|
|
|
Only matches for second to last letter |
452
|
|
|
in first word and matching vowel in second word. |
453
|
|
|
|
454
|
|
|
This defers to the make_vowel function for all the internal |
455
|
|
|
magic, but is a helper in that it provides all types of vowel |
456
|
|
|
combinations in one function. |
457
|
|
|
""" |
458
|
|
|
new_arr = [] |
459
|
|
|
vowel_a_re = re.compile(r'a{1}') |
460
|
|
|
vowel_e_re = re.compile(r'e{1}') |
461
|
|
|
vowel_i_re = re.compile(r'i{1}') |
462
|
|
|
vowel_o_re = re.compile(r'o{1}') |
463
|
|
|
vowel_u_re = re.compile(r'u{1}') |
464
|
|
|
|
465
|
|
|
new_arr += make_vowel(words, vowel_a_re, 'a') |
466
|
|
|
new_arr += make_vowel(words, vowel_e_re, 'e') |
467
|
|
|
new_arr += make_vowel(words, vowel_i_re, 'i') |
468
|
|
|
new_arr += make_vowel(words, vowel_o_re, 'o') |
469
|
|
|
new_arr += make_vowel(words, vowel_u_re, 'u') |
470
|
|
|
return new_arr |
471
|
|
|
|
472
|
|
|
|
473
|
|
|
def make_portmanteau_split(words): |
474
|
|
|
"""Make a portmeanteau, split by vowel/consonant combos. |
475
|
|
|
|
476
|
|
|
Based on the word formation of nikon: [ni]pp[on] go[k]aku, |
477
|
|
|
which is comprised of Nippon + Gokaku. |
478
|
|
|
|
479
|
|
|
We get the first C+V in the first word, |
480
|
|
|
then last V+C in the first word, |
481
|
|
|
then all C in the second word. |
482
|
|
|
""" |
483
|
|
|
new_arr = [] |
484
|
|
|
for i in words: |
485
|
|
|
for j in words: |
486
|
|
|
if i is not j: |
487
|
|
|
l1 = re.search(r'[^a|e|i|o|u{1}]+[a|e|i|o|u{1}]', i) |
488
|
|
|
l2 = re.search(r'[a|e|i|o|u{1}]+[^a|e|i|o|u{1}]$', j) |
489
|
|
|
if i and l1 and l2: |
490
|
|
|
# Third letter used for |
491
|
|
|
# consonant middle splits only |
492
|
|
|
l3 = re.split(r'[a|e|i|o|u{1}]', i) |
493
|
|
|
l1 = l1.group(0) |
494
|
|
|
l2 = l2.group(0) |
495
|
|
|
if l3 and len(l3) > 0: |
496
|
|
|
for v in l3: |
497
|
|
|
new_arr.append(l1 + v + l2) |
498
|
|
|
else: |
499
|
|
|
new_arr.append('{}{}{}'.format(l1, 't', l2)) |
500
|
|
|
new_arr.append('{}{}{}'.format(l1, 's', l2)) |
501
|
|
|
new_arr.append('{}{}{}'.format(l1, 'z', l2)) |
502
|
|
|
new_arr.append('{}{}{}'.format(l1, 'x', l2)) |
503
|
|
|
return new_arr |
504
|
|
|
|
505
|
|
|
|
506
|
|
|
def make_punctuator(words, replace): |
507
|
|
|
"""Put some hyphens or dots, or a given punctutation. |
508
|
|
|
|
509
|
|
|
Works via :replace in the word, but only around vowels ala "del.ic.ious" |
510
|
|
|
""" |
511
|
|
|
def _replace(words, replace, replace_type='.'): |
512
|
|
|
return [word.replace( |
513
|
|
|
replace, replace + replace_type) for word in words] |
514
|
|
|
|
515
|
|
|
hyphens = _replace(words, replace, replace_type='-') |
516
|
|
|
periods = _replace(words, replace) |
517
|
|
|
return hyphens + periods |
518
|
|
|
|
519
|
|
|
|
520
|
|
|
def make_punctuator_vowels(words): |
521
|
|
|
"""Helper function that combines all possible combinations for vowels.""" |
522
|
|
|
new_words = [] |
523
|
|
|
new_words += make_punctuator(words, 'a') |
524
|
|
|
new_words += make_punctuator(words, 'e') |
525
|
|
|
new_words += make_punctuator(words, 'i') |
526
|
|
|
new_words += make_punctuator(words, 'o') |
527
|
|
|
new_words += make_punctuator(words, 'u') |
528
|
|
|
return new_words |
529
|
|
|
|
530
|
|
|
|
531
|
|
|
def make_vowelify(words): |
532
|
|
|
"""Chop off consonant ala nautica if second to last letter is a vowel.""" |
533
|
|
|
new_arr = [] |
534
|
|
|
for word in words: |
535
|
|
|
if re.search(_regexes['all_vowels'], word[:-2]): |
536
|
|
|
new_arr.append(word[:-1]) |
537
|
|
|
return new_arr |
538
|
|
|
|
539
|
|
|
|
540
|
|
|
def make_misspelling(words): |
541
|
|
|
"""Misspell a word in numerous ways, to create interesting results.""" |
542
|
|
|
token_groups = ( |
543
|
|
|
('ics', 'ix'), |
544
|
|
|
('ph', 'f'), |
545
|
|
|
('kew', 'cue'), |
546
|
|
|
('f', 'ph'), |
547
|
|
|
('o', 'ough'), |
548
|
|
|
# these seem to have |
549
|
|
|
# sucked in practice |
550
|
|
|
('o', 'off'), |
551
|
|
|
('ow', 'o'), |
552
|
|
|
('x', 'ecks'), |
553
|
|
|
('za', 'xa'), |
554
|
|
|
('xa', 'za'), |
555
|
|
|
('ze', 'xe'), |
556
|
|
|
('xe', 'ze'), |
557
|
|
|
('zi', 'xi'), |
558
|
|
|
('xi', 'zi'), |
559
|
|
|
('zo', 'xo'), |
560
|
|
|
('xo', 'zo'), |
561
|
|
|
('zu', 'xu'), |
562
|
|
|
('xu', 'zu'), |
563
|
|
|
# number based |
564
|
|
|
('one', '1'), |
565
|
|
|
('1', 'one'), |
566
|
|
|
('two', '2'), |
567
|
|
|
('2', 'two'), |
568
|
|
|
('three', '3'), |
569
|
|
|
('3', 'three'), |
570
|
|
|
('four', '4'), |
571
|
|
|
('4', 'four'), |
572
|
|
|
('five', '5'), |
573
|
|
|
('5', 'five'), |
574
|
|
|
('six', '6'), |
575
|
|
|
('6', 'six'), |
576
|
|
|
('seven', '7'), |
577
|
|
|
('7', 'seven'), |
578
|
|
|
('eight', '8'), |
579
|
|
|
('8', 'eight'), |
580
|
|
|
('nine', '9'), |
581
|
|
|
('9', 'nine'), |
582
|
|
|
('ten', '10'), |
583
|
|
|
('10', 'ten'), |
584
|
|
|
('ecks', 'x'), |
585
|
|
|
('spir', 'speer'), |
586
|
|
|
('speer', 'spir'), |
587
|
|
|
('x', 'ex'), |
588
|
|
|
('on', 'awn'), |
589
|
|
|
('ow', 'owoo'), |
590
|
|
|
('awn', 'on'), |
591
|
|
|
('awf', 'off'), |
592
|
|
|
('s', 'z'), |
593
|
|
|
('ce', 'ze'), |
594
|
|
|
('ss', 'zz'), |
595
|
|
|
('ku', 'koo'), |
596
|
|
|
('trate', 'trait'), |
597
|
|
|
('trait', 'trate'), |
598
|
|
|
('ance', 'anz'), |
599
|
|
|
('il', 'yll'), |
600
|
|
|
('ice', 'ize'), |
601
|
|
|
('chr', 'kr'), |
602
|
|
|
# These should only be at end of word! |
603
|
|
|
('er', 'r'), |
604
|
|
|
('lee', 'ly'), |
605
|
|
|
) |
606
|
|
|
new_arr = [] |
607
|
|
|
for word in words: |
608
|
|
|
for tokens in token_groups: |
609
|
|
|
new_arr.append(word.replace(*tokens)) |
610
|
|
|
return normalization.uniquify(new_arr) |
611
|
|
|
|
612
|
|
|
|
613
|
|
|
def _pig_latinize(word, postfix='ay'): |
614
|
|
|
"""Generate standard pig latin style, with optional postfix argument.""" |
615
|
|
|
# Common postfixes: ['ay', 'yay', 'way'] |
616
|
|
|
if not type(postfix) is str: |
617
|
|
|
raise TypeError('Must use a string for postfix.') |
618
|
|
|
|
619
|
|
|
piggified = None |
620
|
|
|
|
621
|
|
|
vowel_re = re.compile(r'(a|e|i|o|u)') |
622
|
|
|
first_letter = word[0:1] |
623
|
|
|
|
624
|
|
|
# clean up non letters |
625
|
|
|
word = word.replace(r'[^a-zA-Z]', '') |
626
|
|
|
|
627
|
|
|
if vowel_re.match(first_letter): |
628
|
|
|
piggified = word + 'way' |
629
|
|
|
else: |
630
|
|
|
piggified = ''.join([word[1: len(word)], first_letter, postfix]) |
631
|
|
|
return piggified |
632
|
|
|
|
633
|
|
|
|
634
|
|
|
def pig_latinize(words, postfix='ay'): |
635
|
|
|
"""Pig latinize a set of words. |
636
|
|
|
|
637
|
|
|
Args: |
638
|
|
|
words (list): A list of words. |
639
|
|
|
postfix (str, optional): A postfix to use. Default is `ay`. |
640
|
|
|
|
641
|
|
|
Returns: |
642
|
|
|
words (list): The updated list. |
643
|
|
|
|
644
|
|
|
""" |
645
|
|
|
return [_pig_latinize(word, postfix=postfix) for word in words] |
646
|
|
|
|
647
|
|
|
|
648
|
|
|
def acronym_lastname(description, lastname): |
649
|
|
|
"""Create an acronym plus the last name. |
650
|
|
|
|
651
|
|
|
Inspiration: ALFA Romeo. |
652
|
|
|
""" |
653
|
|
|
desc = ''.join([word[0].upper() for word |
654
|
|
|
in normalization.remove_stop_words(description.split(' '))]) |
655
|
|
|
return '{} {}'.format(desc, lastname) |
656
|
|
|
|
657
|
|
|
|
658
|
|
|
def get_descriptors(words): |
659
|
|
|
"""Group words by their NLTK part-of-speech descriptors. |
660
|
|
|
|
661
|
|
|
Use NLTK to first grab tokens by looping through words, |
662
|
|
|
then tag part-of-speech (in isolation) |
663
|
|
|
and provide a dictionary with a list of each type |
664
|
|
|
for later retrieval and usage. |
665
|
|
|
""" |
666
|
|
|
descriptors = defaultdict(list) |
667
|
|
|
tokens = nltk.word_tokenize(' '.join(words)) |
668
|
|
|
parts = nltk.pos_tag(tokens) |
669
|
|
|
# Then, push the word into the matching type |
670
|
|
|
for part in parts: |
671
|
|
|
descriptors[part[1]].append(part[0]) |
672
|
|
|
return descriptors |
673
|
|
|
|
674
|
|
|
|
675
|
|
|
def _add_pos_subtypes(nouns, verbs): |
676
|
|
|
"""Combine alternating verbs and nouns into a new list. |
677
|
|
|
|
678
|
|
|
Args: |
679
|
|
|
nouns (list) - List of nouns, noun phrases, etc... |
680
|
|
|
verbs (list) - List of verbs, verb phrases, etc... |
681
|
|
|
|
682
|
|
|
Returns: |
683
|
|
|
words (list) - The newly combined list |
684
|
|
|
""" |
685
|
|
|
words = [] |
686
|
|
|
try: |
687
|
|
|
for noun in nouns: |
688
|
|
|
for verb in verbs: |
689
|
|
|
words.append('{} {}'.format(noun, verb)) |
690
|
|
|
words.append('{} {}'.format(verb, noun)) |
691
|
|
|
except KeyError: |
692
|
|
|
pass |
693
|
|
|
return words |
694
|
|
|
|
695
|
|
|
|
696
|
|
|
def _create_pos_subtypes(words): |
697
|
|
|
"""Check part-of-speech tags for a noun-phrase, adding combinations if so. |
698
|
|
|
|
699
|
|
|
If it exists, add combinations with noun-phrase + verb-phrase, |
700
|
|
|
noun-phrase + verb, and noun-phrase + adverb, |
701
|
|
|
for each pos type that exists. |
702
|
|
|
|
703
|
|
|
Args: |
704
|
|
|
words (list) - List of verbs, verb phrases, etc... |
705
|
|
|
|
706
|
|
|
Returns: |
707
|
|
|
new_words (list) - The newly combined list |
708
|
|
|
""" |
709
|
|
|
new_words = [] |
710
|
|
|
types = words.keys() |
711
|
|
|
if 'NNP' in types: |
712
|
|
|
if 'VBP' in types: |
713
|
|
|
new_words += _add_pos_subtypes(words['NNP'], words['VBP']) |
714
|
|
|
if 'VB' in types: |
715
|
|
|
new_words += _add_pos_subtypes(words['NNP'], words['VB']) |
716
|
|
|
if 'RB' in types: |
717
|
|
|
new_words += _add_pos_subtypes(words['NNP'], words['RB']) |
718
|
|
|
return new_words |
719
|
|
|
|
720
|
|
|
|
721
|
|
|
def make_descriptors(words): |
722
|
|
|
"""Make descriptor names. |
723
|
|
|
|
724
|
|
|
Based from a verb + noun, adjective + noun combination. |
725
|
|
|
Examples: |
726
|
|
|
-Pop Cap, |
727
|
|
|
-Big Fish, |
728
|
|
|
-Red Fin, |
729
|
|
|
-Cold Water (grill), etc... |
730
|
|
|
Combines VBP/VB/RB, with NN/NNS |
731
|
|
|
""" |
732
|
|
|
return list(set(_create_pos_subtypes(words))) |
733
|
|
|
|
734
|
|
|
|
735
|
|
|
def all_prefix_first_vowel(word, letters=list(ascii_uppercase)): |
736
|
|
|
"""Find the first vowel in a word and prefixes with consonants. |
737
|
|
|
|
738
|
|
|
Args: |
739
|
|
|
word (str) - the word to update |
740
|
|
|
letters (list) - the letters to use for prefixing. |
741
|
|
|
|
742
|
|
|
Returns: |
743
|
|
|
words (list) - All prefixed words |
744
|
|
|
|
745
|
|
|
""" |
746
|
|
|
re_vowels = re.compile(r'[aeiouy]') |
747
|
|
|
matches = re.search(re_vowels, word) |
748
|
|
|
if matches is None: |
749
|
|
|
return [word] |
750
|
|
|
words = [] |
751
|
|
|
vowels = ['A', 'E', 'I', 'O', 'U'] |
752
|
|
|
first_match = matches.start(0) |
753
|
|
|
for letter in letters: |
754
|
|
|
if letter not in vowels: |
755
|
|
|
# If beginning letter is a vowel, don't offset the index |
756
|
|
|
if first_match == 0: |
757
|
|
|
words.append('{}{}'.format(letter, word)) |
758
|
|
|
else: |
759
|
|
|
words.append('{}{}'.format(letter, word[first_match:])) |
760
|
|
|
return words |
761
|
|
|
|
762
|
|
|
|
763
|
|
|
def recycle(words, func, times=2): |
764
|
|
|
"""Run a set of words applied to a function repeatedly. |
765
|
|
|
|
766
|
|
|
It will re-run with the last output as the new input. |
767
|
|
|
`words` must be a list, and `func` must return a list. |
768
|
|
|
""" |
769
|
|
|
if times > 0: |
770
|
|
|
return recycle(func(words), func, times - 1) |
771
|
|
|
return words |
772
|
|
|
|
773
|
|
|
|
774
|
|
|
def super_scrub(data): |
775
|
|
|
"""Run words through a comprehensive list of filtering functions. |
776
|
|
|
|
777
|
|
|
Expects a dictionary with key "words" |
778
|
|
|
""" |
779
|
|
|
for technique in data['words']: |
780
|
|
|
data['words'][technique] = normalization.uniquify( |
781
|
|
|
normalization.remove_odd_sounding_words( |
782
|
|
|
normalization.clean_sort( |
783
|
|
|
data['words'][technique]))) |
784
|
|
|
return data |
785
|
|
|
|
786
|
|
|
|
787
|
|
|
def generate_all_techniques(words): |
788
|
|
|
"""Generate all techniques across the library in one place.""" |
789
|
|
|
data = { |
790
|
|
|
'words': { |
791
|
|
|
'alliterations': make_name_alliteration(words), |
792
|
|
|
'alliterations': make_name_alliteration(words), |
793
|
|
|
'portmanteau': make_portmanteau_default_vowel(words), |
794
|
|
|
'vowels': make_vowelify(words), |
795
|
|
|
'suffix': suffixify(words), |
796
|
|
|
'prefix': prefixify(words), |
797
|
|
|
'duplifix': duplifixify(words), |
798
|
|
|
'disfix': disfixify(words), |
799
|
|
|
'infix': infixify(words), |
800
|
|
|
'simulfix': simulfixify(words), |
801
|
|
|
'founder_product_name': make_founder_product_name( |
802
|
|
|
'Lindsey', 'Chris', 'Widgets'), |
803
|
|
|
'punctuator': make_punctuator_vowels(words), |
804
|
|
|
'name_abbreviation': make_name_abbreviation(words), |
805
|
|
|
'make_portmanteau_split': make_portmanteau_split(words), |
806
|
|
|
'forkerism': forkerism(words), |
807
|
|
|
'kniferism': kniferism(words), |
808
|
|
|
'spoonerism': spoonerism(words), |
809
|
|
|
'palindrome': palindromes(words), |
810
|
|
|
'reduplication_ablaut': reduplication_ablaut(words), |
811
|
|
|
'misspelling': make_misspelling(words), |
812
|
|
|
'descriptors': make_descriptors( |
813
|
|
|
get_descriptors(words)) |
814
|
|
|
} |
815
|
|
|
} |
816
|
|
|
return super_scrub(data) |
817
|
|
|
|