Completed
Push — master ( 533d77...907c05 )
by Chris
01:19
created

namebot._get_synset_words()   A

Complexity

Conditions 4

Size

Total Lines 15

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 4
dl 0
loc 15
rs 9.2
1
"""Functions that use Natural Language Processing.
2
3
Word relationships found (via NLTK and other libraries)
4
to find and generate related words.
5
"""
6
7
8
from __future__ import absolute_import
9
10
import itertools
11
12
from nltk.corpus import (
0 ignored issues
show
Configuration introduced by
The import nltk.corpus could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
13
    verbnet,
14
    wordnet,
15
)
16
17
from . import normalization
18
19
20
def _get_synset_words(word):
21
    """Simple helper wrapping the more involved get_synsets function.
22
23
    Args:
24
        word (str): The seed word.
25
26
    Returns:
27
        words (list): The list of NLTK words.
28
    """
29
    res = get_synsets([word])[word]
30
    if not res:
31
        return []
32
    res = res.values()
33
    words = list(normalization.flatten([l for l in res if l]))
34
    return words
35
36
37
def print_all_synset_categories():
38
    """Print all domains and categories for research purposes.
39
40
    Returns:
41
        categories (list): A list of all wordnet synsets.
42
    """
43
    categories = []
44
    for synset in list(wordnet.all_synsets('n')):
45
        categories.append(synset)
46
    return categories
47
48
49
def _get_lemma_names(sub_synset, use_definitions=False):
0 ignored issues
show
Coding Style introduced by
This function should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
50
    results = []
51
    if sub_synset():
52
        for v in sub_synset():
53
            if hasattr(v.lemma_names, '__call__'):
54
                results += v.lemma_names()
55
            else:
56
                results += v.lemma_names
57
            if use_definitions:
58
                results.append(v.definition.split())
59
    return results
60
61
62
def get_hyponyms(synset, use_definitions=False):
63
    """Extract hyponyms from a synset.
64
65
    Args:
66
        synset (object): The synset instance.
67
        use_definitions (bool, optional): Extract definitions from the synset.
68
69
    Returns:
70
        list: The results list.
71
    """
72
    return _get_lemma_names(synset.hyponyms, use_definitions=use_definitions)
73
74
75
def get_inst_hyponyms(synset, use_definitions=False):
76
    """Extract instance hyponyms from a synset.
77
78
    Args:
79
        synset (object): The synset instance.
80
        use_definitions (bool, optional): Extract definitions from the synset.
81
82
    Returns:
83
        list: The results list.
84
    """
85
    return _get_lemma_names(
86
        synset.instance_hyponyms, use_definitions=use_definitions)
87
88
89
def get_member_meronyms(synset, use_definitions=False):
90
    """Extract meronyms from a synset.
91
92
    Args:
93
        synset (object): The synset instance.
94
        use_definitions (bool, optional): Extract definitions from the synset.
95
96
    Returns:
97
        list: The results list.
98
    """
99
    return _get_lemma_names(
100
        synset.member_meronyms, use_definitions=use_definitions)
101
102
103
def get_substance_meronyms(synset, use_definitions=False):
104
    """Extract substance meronyms from a synset.
105
106
    Args:
107
        synset (object): The synset instance.
108
        use_definitions (bool, optional): Extract definitions from the synset.
109
110
    Returns:
111
        list: The results list.
112
    """
113
    return _get_lemma_names(
114
        synset.substance_meronyms, use_definitions=use_definitions)
115
116
117
def get_part_meronyms(synset, use_definitions=False):
118
    """Extract part meronyms from a synset.
119
120
    Args:
121
        synset (object): The synset instance.
122
        use_definitions (bool, optional): Extract definitions from the synset.
123
124
    Returns:
125
        list: The results list.
126
    """
127
    return _get_lemma_names(
128
        synset.part_meronyms, use_definitions=use_definitions)
129
130
131
def get_substance_holoynms(synset, use_definitions=False):
132
    """Extract substance holoynms from a synset.
133
134
    Args:
135
        synset (object): The synset instance.
136
        use_definitions (bool, optional): Extract definitions from the synset.
137
138
    Returns:
139
        list: The results list.
140
    """
141
    return _get_lemma_names(
142
        synset.substance_holonyms, use_definitions=use_definitions)
143
144
145
def get_topic_domains(synset, use_definitions=False):
146
    """Extract topic domains from a synset.
147
148
    Args:
149
        synset (object): The synset instance.
150
        use_definitions (bool, optional): Extract definitions from the synset.
151
152
    Returns:
153
        list: The results list.
154
    """
155
    return _get_lemma_names(
156
        synset.topic_domains, use_definitions=use_definitions)
157
158
159
def get_region_domains(synset, use_definitions=False):
160
    """Extract region domains from a synset.
161
162
    Args:
163
        synset (object): The synset instance.
164
        use_definitions (bool, optional): Extract definitions from the synset.
165
166
    Returns:
167
        list: The results list.
168
    """
169
    return _get_lemma_names(
170
        synset.region_domains, use_definitions=use_definitions)
171
172
173
def get_usage_domains(synset, use_definitions=False):
174
    """Extract usage domains from a synset.
175
176
    Args:
177
        synset (object): The synset instance.
178
        use_definitions (bool, optional): Extract definitions from the synset.
179
180
    Returns:
181
        list: The results list.
182
    """
183
    return _get_lemma_names(
184
        synset.usage_domains, use_definitions=use_definitions)
185
186
187
def get_attributes(synset, use_definitions=False):
188
    """Extract attributes from a synset.
189
190
    Args:
191
        synset (object): The synset instance.
192
        use_definitions (bool, optional): Extract definitions from the synset.
193
194
    Returns:
195
        list: The results list.
196
    """
197
    return _get_lemma_names(
198
        synset.attributes, use_definitions=use_definitions)
199
200
201
def get_entailments(synset, use_definitions=False):
202
    """Extract entailments from a synset.
203
204
    Args:
205
        synset (object): The synset instance.
206
        use_definitions (bool, optional): Extract definitions from the synset.
207
208
    Returns:
209
        list: The results list.
210
    """
211
    return _get_lemma_names(
212
        synset.entailments, use_definitions=use_definitions)
213
214
215
def get_causes(synset, use_definitions=False):
216
    """Extract causes from a synset.
217
218
    Args:
219
        synset (object): The synset instance.
220
        use_definitions (bool, optional): Extract definitions from the synset.
221
222
    Returns:
223
        list: The results list.
224
    """
225
    if synset.causes():
226
        return _get_lemma_names(
227
            synset.causes, use_definitions=use_definitions)
228
229
230
def get_also_sees(synset, use_definitions=False):
231
    """Extract also-sees from a synset.
232
233
    Args:
234
        synset (object): The synset instance.
235
        use_definitions (bool, optional): Extract definitions from the synset.
236
237
    Returns:
238
        list: The results list.
239
    """
240
    return _get_lemma_names(
241
        synset.also_sees, use_definitions=use_definitions)
242
243
244
def get_verb_groups(synset, use_definitions=False):
245
    """Extract verb groups from a synset.
246
247
    Args:
248
        synset (object): The synset instance.
249
        use_definitions (bool, optional): Extract definitions from the synset.
250
251
    Returns:
252
        list: The results list.
253
    """
254
    return _get_lemma_names(
255
        synset.verb_groups, use_definitions=use_definitions)
256
257
258
def get_similartos(synset, use_definitions=False):
259
    """Extract similar-tos from a synset.
260
261
    Args:
262
        synset (object): The synset instance.
263
        use_definitions (bool, optional): Extract definitions from the synset.
264
265
    Returns:
266
        list: The results list.
267
    """
268
    return _get_lemma_names(
269
        synset.similar_tos, use_definitions=use_definitions)
270
271
272
def get_member_holoynms(synset, use_definitions=False):
273
    """Extract member holonyms from a synset.
274
275
    Args:
276
        synset (object): The synset instance.
277
        use_definitions (bool, optional): Extract definitions from the synset.
278
279
    Returns:
280
        list: The results list.
281
    """
282
    return _get_lemma_names(
283
        synset.member_holonyms, use_definitions=use_definitions)
284
285
286
def get_part_holoynms(synset, use_definitions=False):
287
    """Extract part holonyms from a synset.
288
289
    Args:
290
        synset (object): The synset instance.
291
        use_definitions (bool, optional): Extract definitions from the synset.
292
293
    Returns:
294
        list: The results list.
295
    """
296
    return _get_lemma_names(
297
        synset.part_holonyms, use_definitions=use_definitions)
298
299
300
def get_instance_hypernyms(synset, use_definitions=False):
301
    """Extract instance hypernyms from a synset.
302
303
    Args:
304
        synset (object): The synset instance.
305
        use_definitions (bool, optional): Extract definitions from the synset.
306
307
    Returns:
308
        list: The results list.
309
    """
310
    return _get_lemma_names(
311
        synset.instance_hypernyms, use_definitions=use_definitions)
312
313
314
def get_hypernyms(synset, use_definitions=False):
315
    """Extract hypernyms from a synset.
316
317
    Args:
318
        synset (object): The synset instance.
319
        use_definitions (bool, optional): Extract definitions from the synset.
320
321
    Returns:
322
        list: The results list.
323
    """
324
    return _get_lemma_names(
325
        synset.hypernyms, use_definitions=use_definitions)
326
327
328
def get_verb_lemmas(verbs):
329
    """Return verbnet lemmas for the given verbs.
330
331
    These verbs are stemmed before lookup to prevent empty results.
332
333
    Args:
334
        verbs (list) - The list of verbs (verbs) to reference.
335
336
    Returns:
337
        lemmas (list) - A list of lemmas for all verbs
338
                        - these are not separated by verb.
339
    """
340
    lemmas = []
341
    for verb in normalization.stem_words(verbs):
342
        _lemmas = verbnet.classids(lemma=verb)
343
        lemmas += [l.split('-')[0] for l in _lemmas]
344
    return lemmas
345
346
347
def get_word_synsets(word):
348
    """Get all synsets for a word.
349
350
    Args:
351
        word (str): The word to lookup.
352
353
    Returns:
354
        object: The synset ring instance.
355
    """
356
    return wordnet.synsets(word.encode('utf-8'), pos=None)
357
358
359
def get_synset_definitions(word):
360
    """Return all possible definitions for synsets in a word synset ring.
361
362
    Args:
363
        word (str): The word to lookup.
364
365
    Returns:
366
        definitions (list): The synset definitions list.
367
    """
368
    definitions = []
369
    synsets = get_word_synsets(word)
370
    for _synset in synsets:
371
        definitions.append(_synset.definition().split())
372
    return definitions
373
374
375
def get_synsets_definitions(words):
376
    """Return all possible definitions for all synsets in the synset ring.
377
378
    Args:
379
        words (list): The list of words.
380
381
    Returns:
382
        sets (list): The synsets.
383
    """
384
    return [get_synset_definitions(w) for w in words if w]
385
386
387
def get_synsets(words, use_definitions=False, clean=False):
388
    """Brute force loop on a synset ring to get all related words.
389
390
    You are expected to filter or remove any that are not relevant separately,
391
    if the resultant set is too long.
392
    The scoring module provides tools to filter based on pronunciation,
393
    but you can write your own and extend the functionality.
394
395
    Args:
396
        words (list): The list of words.
397
        use_definitions (bool, optional): Determine if definition words
398
            should also be extracted.
399
        clean (bool, optional): Determine if set should be de-duped,
400
            cleaned, etc...
401
402
    Returns:
403
        results (dict): The results dictionary.
404
    """
405
    results = {}
406
407
    for word in words:
408
        synsets = get_word_synsets(word)
409
410
        key = {'synset_original': []}
411
412
        for synset in synsets:
413
            if hasattr(synset.lemma_names, '__call__'):
414
                key['synset_original'].append(synset.lemma_names())
415
            else:
416
                key['synset_original'].append(synset.lemma_names)
417
418
            # More Specific *nyms (deep)
419
            key['hyponyms'] = get_hyponyms(
420
                synset, use_definitions=use_definitions)
421
            key['instance_hyponyms'] = get_inst_hyponyms(
422
                synset, use_definitions=use_definitions)
423
            key['member_meronyms'] = get_member_meronyms(
424
                synset, use_definitions=use_definitions)
425
            key['substance_meronyms'] = get_substance_meronyms(
426
                synset, use_definitions=use_definitions)
427
            key['part_meronyms'] = get_part_meronyms(
428
                synset, use_definitions=use_definitions)
429
            key['substance_holonyms'] = get_substance_holoynms(
430
                synset, use_definitions=use_definitions)
431
432
            # More Generic *nyms (shallow)
433
            key['member_holonyms'] = get_member_holoynms(
434
                synset, use_definitions=use_definitions)
435
            key['part_holonyms'] = get_part_holoynms(
436
                synset, use_definitions=use_definitions)
437
            key['instance_hypernyms'] = get_instance_hypernyms(
438
                synset, use_definitions=use_definitions)
439
            key['hypernyms'] = get_hypernyms(
440
                synset, use_definitions=use_definitions)
441
442
            # Other types
443
            key['topic_domains'] = get_topic_domains(
444
                synset, use_definitions=use_definitions)
445
            key['region_domains'] = get_region_domains(
446
                synset, use_definitions=use_definitions)
447
            key['usage_domains'] = get_usage_domains(
448
                synset, use_definitions=use_definitions)
449
            key['attributes'] = get_attributes(
450
                synset, use_definitions=use_definitions)
451
            key['entailments'] = get_entailments(
452
                synset, use_definitions=use_definitions)
453
            key['causes'] = get_causes(
454
                synset, use_definitions=use_definitions)
455
            key['also_sees'] = get_also_sees(
456
                synset, use_definitions=use_definitions)
457
            key['verb_groups'] = get_verb_groups(
458
                synset, use_definitions=use_definitions)
459
            key['similar_tos'] = get_similartos(
460
                synset, use_definitions=use_definitions)
461
462
        results[word] = key
463
464
    # 1. get words back
465
    # 2. flatten nested array
466
    # 3. split up words
467
    # 4. filter, clean, stem, uniquify
468
469
    for nlp_type in results:
470
        if clean:
471
            results[nlp_type] = sorted(
472
                normalization.uniquify(
473
                    normalization.clean_sort(
474
                        normalization.remove_stop_words(
475
                            normalization.stem_words(
476
                                normalization.remove_bad_words(
477
                                    list(itertools.chain(
478
                                        *results[nlp_type]))))))))
479
480
    return results
481