get_synsets()   B
last analyzed

Complexity

Conditions 6

Size

Total Lines 91

Duplication

Lines 0
Ratio 0 %

Importance

Changes 6
Bugs 0 Features 1
Metric Value
cc 6
c 6
b 0
f 1
dl 0
loc 91
rs 7.2102

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
"""Functions that use Natural Language Processing.
2
3
Word relationships found (via NLTK and other libraries)
4
to find and generate related words.
5
"""
6
7
8
from __future__ import absolute_import
9
10
import itertools
11
12
from nltk.corpus import (
0 ignored issues
show
Configuration introduced by
The import nltk.corpus could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
13
    verbnet,
14
    wordnet,
15
)
16
17
from . import normalization
18
19
20
def _get_synset_words(word):
21
    """Simple helper wrapping the more involved get_synsets function.
22
23
    :param word (str): The seed word.
24
    :rtype words (list): The list of NLTK words.
25
    """
26
    res = get_synsets([word])[word]
27
    if not res:
28
        return []
29
    res = res.values()
30
    words = list(normalization.flatten([l for l in res if l]))
31
    return words
32
33
34
def print_all_synset_categories():
35
    """Print all domains and categories for research purposes.
36
37
    :rtype categories (list): A list of all wordnet synsets.
38
    """
39
    categories = []
40
    for synset in list(wordnet.all_synsets('n')):
41
        categories.append(synset)
42
    return categories
43
44
45
def _get_lemma_names(sub_synset, use_definitions=False):
46
    """Get lemma names."""
47
    results = []
48
    if sub_synset():
49
        for v in sub_synset():
50
            if hasattr(v.lemma_names, '__call__'):
51
                results += v.lemma_names()
52
            else:
53
                results += v.lemma_names
54
            if use_definitions:
55
                results.append(v.definition.split())
56
    return results
57
58
59
def get_hyponyms(synset, use_definitions=False):
60
    """Extract hyponyms from a synset.
61
62
    :param (object): The synset instance.
63
    :param use_definitions (bool, optional):
64
        Extract definitions from the synset.
65
    :rtype list: The results list.
66
    """
67
    return _get_lemma_names(synset.hyponyms, use_definitions=use_definitions)
68
69
70
def get_inst_hyponyms(synset, use_definitions=False):
71
    """Extract instance hyponyms from a synset.
72
73
    :param (object): The synset instance.
74
    :param use_definitions (bool, optional):
75
        Extract definitions from the synset.
76
    :rtype list: The results list.
77
    """
78
    return _get_lemma_names(
79
        synset.instance_hyponyms, use_definitions=use_definitions)
80
81
82
def get_member_meronyms(synset, use_definitions=False):
83
    """Extract meronyms from a synset.
84
85
    :param (object): The synset instance.
86
    :param use_definitions (bool, optional):
87
        Extract definitions from the synset.
88
    :rtype list: The results list.
89
    """
90
    return _get_lemma_names(
91
        synset.member_meronyms, use_definitions=use_definitions)
92
93
94
def get_substance_meronyms(synset, use_definitions=False):
95
    """Extract substance meronyms from a synset.
96
97
    :param (object): The synset instance.
98
    :param use_definitions (bool, optional):
99
        Extract definitions from the synset.
100
    :rtype list: The results list.
101
    """
102
    return _get_lemma_names(
103
        synset.substance_meronyms, use_definitions=use_definitions)
104
105
106
def get_part_meronyms(synset, use_definitions=False):
107
    """Extract part meronyms from a synset.
108
109
    :param (object): The synset instance.
110
    :param use_definitions (bool, optional):
111
        Extract definitions from the synset.
112
    :rtype list: The results list.
113
    """
114
    return _get_lemma_names(
115
        synset.part_meronyms, use_definitions=use_definitions)
116
117
118
def get_substance_holoynms(synset, use_definitions=False):
119
    """Extract substance holoynms from a synset.
120
121
    :param (object): The synset instance.
122
    :param use_definitions (bool, optional):
123
        Extract definitions from the synset.
124
    :rtype list: The results list.
125
    """
126
    return _get_lemma_names(
127
        synset.substance_holonyms, use_definitions=use_definitions)
128
129
130
def get_topic_domains(synset, use_definitions=False):
131
    """Extract topic domains from a synset.
132
133
    :param (object): The synset instance.
134
    :param use_definitions (bool, optional):
135
        Extract definitions from the synset.
136
    :rtype list: The results list.
137
    """
138
    return _get_lemma_names(
139
        synset.topic_domains, use_definitions=use_definitions)
140
141
142
def get_region_domains(synset, use_definitions=False):
143
    """Extract region domains from a synset.
144
145
    :param (object): The synset instance.
146
    :param use_definitions (bool, optional):
147
        Extract definitions from the synset.
148
    :rtype list: The results list.
149
    """
150
    return _get_lemma_names(
151
        synset.region_domains, use_definitions=use_definitions)
152
153
154
def get_usage_domains(synset, use_definitions=False):
155
    """Extract usage domains from a synset.
156
157
    :param (object): The synset instance.
158
    :param use_definitions (bool, optional):
159
        Extract definitions from the synset.
160
    :rtype list: The results list.
161
    """
162
    return _get_lemma_names(
163
        synset.usage_domains, use_definitions=use_definitions)
164
165
166
def get_attributes(synset, use_definitions=False):
167
    """Extract attributes from a synset.
168
169
    :param (object): The synset instance.
170
    :param use_definitions (bool, optional):
171
        Extract definitions from the synset.
172
    :rtype list: The results list.
173
    """
174
    return _get_lemma_names(
175
        synset.attributes, use_definitions=use_definitions)
176
177
178
def get_entailments(synset, use_definitions=False):
179
    """Extract entailments from a synset.
180
181
    :param (object): The synset instance.
182
    :param use_definitions (bool, optional):
183
        Extract definitions from the synset.
184
    :rtype list: The results list.
185
    """
186
    return _get_lemma_names(
187
        synset.entailments, use_definitions=use_definitions)
188
189
190
def get_causes(synset, use_definitions=False):
191
    """Extract causes from a synset.
192
193
    :param (object): The synset instance.
194
    :param use_definitions (bool, optional):
195
        Extract definitions from the synset.
196
    :rtype list: The results list.
197
    """
198
    if synset.causes():
199
        return _get_lemma_names(
200
            synset.causes, use_definitions=use_definitions)
201
202
203
def get_also_sees(synset, use_definitions=False):
204
    """Extract also-sees from a synset.
205
206
    :param (object): The synset instance.
207
    :param use_definitions (bool, optional):
208
        Extract definitions from the synset.
209
    :rtype list: The results list.
210
    """
211
    return _get_lemma_names(
212
        synset.also_sees, use_definitions=use_definitions)
213
214
215
def get_verb_groups(synset, use_definitions=False):
216
    """Extract verb groups from a synset.
217
218
    :param (object): The synset instance.
219
    :param use_definitions (bool, optional):
220
        Extract definitions from the synset.
221
    :rtype list: The results list.
222
    """
223
    return _get_lemma_names(
224
        synset.verb_groups, use_definitions=use_definitions)
225
226
227
def get_similartos(synset, use_definitions=False):
228
    """Extract similar-tos from a synset.
229
230
    :param (object): The synset instance.
231
    :param use_definitions (bool, optional):
232
        Extract definitions from the synset.
233
    :rtype list: The results list.
234
    """
235
    return _get_lemma_names(
236
        synset.similar_tos, use_definitions=use_definitions)
237
238
239
def get_member_holoynms(synset, use_definitions=False):
240
    """Extract member holonyms from a synset.
241
242
    :param (object): The synset instance.
243
    :param use_definitions (bool, optional):
244
        Extract definitions from the synset.
245
    :rtype list: The results list.
246
    """
247
    return _get_lemma_names(
248
        synset.member_holonyms, use_definitions=use_definitions)
249
250
251
def get_part_holoynms(synset, use_definitions=False):
252
    """Extract part holonyms from a synset.
253
254
    :param (object): The synset instance.
255
    :param use_definitions (bool, optional):
256
        Extract definitions from the synset.
257
    :rtype list: The results list.
258
    """
259
    return _get_lemma_names(
260
        synset.part_holonyms, use_definitions=use_definitions)
261
262
263
def get_instance_hypernyms(synset, use_definitions=False):
264
    """Extract instance hypernyms from a synset.
265
266
    :param (object): The synset instance.
267
    :param use_definitions (bool, optional):
268
        Extract definitions from the synset.
269
    :rtype list: The results list.
270
    """
271
    return _get_lemma_names(
272
        synset.instance_hypernyms, use_definitions=use_definitions)
273
274
275
def get_hypernyms(synset, use_definitions=False):
276
    """Extract hypernyms from a synset.
277
278
    :param (object): The synset instance.
279
    :param use_definitions (bool, optional):
280
        Extract definitions from the synset.
281
    :rtype list: The results list.
282
    """
283
    return _get_lemma_names(
284
        synset.hypernyms, use_definitions=use_definitions)
285
286
287
def get_verb_lemmas(verbs):
288
    """Return verbnet lemmas for the given verbs.
289
290
    These verbs are stemmed before lookup to prevent empty results.
291
292
    :param verbs (list) - The list of verbs (verbs) to reference.
293
    :rtype lemmas (list) - A list of lemmas for all verbs
294
                        - these are not separated by verb.
295
    """
296
    lemmas = []
297
    for verb in normalization.stem_words(verbs):
298
        _lemmas = verbnet.classids(lemma=verb)
299
        lemmas += [l.split('-')[0] for l in _lemmas]
300
    return lemmas
301
302
303
def get_word_synsets(word):
304
    """Get all synsets for a word.
305
306
    :param word (str): The word to lookup.
307
    :rtype object: The synset ring instance.
308
    """
309
    return wordnet.synsets(word.encode('utf-8'), pos=None)
310
311
312
def get_synset_definitions(word):
313
    """Return all possible definitions for synsets in a word synset ring.
314
315
    :param word (str): The word to lookup.
316
    :rtype definitions (list): The synset definitions list.
317
    """
318
    definitions = []
319
    synsets = get_word_synsets(word)
320
    for _synset in synsets:
321
        definitions.append(_synset.definition().split())
322
    return definitions
323
324
325
def get_synsets_definitions(words):
326
    """Return all possible definitions for all synsets in the synset ring.
327
328
    :param words (list): The list of words.
329
    :rtype sets (list): The synsets.
330
    """
331
    return [get_synset_definitions(w) for w in words if w]
332
333
334
def get_synsets(words, use_definitions=False, clean=False):
335
    """Brute force loop on a synset ring to get all related words.
336
337
    You are expected to filter or remove any that are not relevant separately,
338
    if the resultant set is too long.
339
    The scoring module provides tools to filter based on pronunciation,
340
    but you can write your own and extend the functionality.
341
342
    :param words (list): The list of words.
343
    :param use_definitions (bool, optional): Determine if definition words
344
            should also be extracted.
345
    :param clean (bool, optional): Determine if set should be de-duped,
346
            cleaned, etc...
347
    :rtype results (dict): The results dictionary.
348
    """
349
    results = {}
350
351
    for word in words:
352
        synsets = get_word_synsets(word)
353
354
        key = {'synset_original': []}
355
356
        for synset in synsets:
357
            if hasattr(synset.lemma_names, '__call__'):
358
                key['synset_original'].append(synset.lemma_names())
359
            else:
360
                key['synset_original'].append(synset.lemma_names)
361
362
            # More Specific *nyms (deep)
363
            key['hyponyms'] = get_hyponyms(
364
                synset, use_definitions=use_definitions)
365
            key['instance_hyponyms'] = get_inst_hyponyms(
366
                synset, use_definitions=use_definitions)
367
            key['member_meronyms'] = get_member_meronyms(
368
                synset, use_definitions=use_definitions)
369
            key['substance_meronyms'] = get_substance_meronyms(
370
                synset, use_definitions=use_definitions)
371
            key['part_meronyms'] = get_part_meronyms(
372
                synset, use_definitions=use_definitions)
373
            key['substance_holonyms'] = get_substance_holoynms(
374
                synset, use_definitions=use_definitions)
375
376
            # More Generic *nyms (shallow)
377
            key['member_holonyms'] = get_member_holoynms(
378
                synset, use_definitions=use_definitions)
379
            key['part_holonyms'] = get_part_holoynms(
380
                synset, use_definitions=use_definitions)
381
            key['instance_hypernyms'] = get_instance_hypernyms(
382
                synset, use_definitions=use_definitions)
383
            key['hypernyms'] = get_hypernyms(
384
                synset, use_definitions=use_definitions)
385
386
            # Other types
387
            key['topic_domains'] = get_topic_domains(
388
                synset, use_definitions=use_definitions)
389
            key['region_domains'] = get_region_domains(
390
                synset, use_definitions=use_definitions)
391
            key['usage_domains'] = get_usage_domains(
392
                synset, use_definitions=use_definitions)
393
            key['attributes'] = get_attributes(
394
                synset, use_definitions=use_definitions)
395
            key['entailments'] = get_entailments(
396
                synset, use_definitions=use_definitions)
397
            key['causes'] = get_causes(
398
                synset, use_definitions=use_definitions)
399
            key['also_sees'] = get_also_sees(
400
                synset, use_definitions=use_definitions)
401
            key['verb_groups'] = get_verb_groups(
402
                synset, use_definitions=use_definitions)
403
            key['similar_tos'] = get_similartos(
404
                synset, use_definitions=use_definitions)
405
406
        results[word] = key
407
408
    # 1. get words back
409
    # 2. flatten nested array
410
    # 3. split up words
411
    # 4. filter, clean, stem, uniquify
412
413
    for nlp_type in results:
414
        if clean:
415
            results[nlp_type] = sorted(
416
                normalization.uniquify(
417
                    normalization.clean_sort(
418
                        normalization.remove_stop_words(
419
                            normalization.stem_words(
420
                                normalization.remove_bad_words(
421
                                    list(itertools.chain(
422
                                        *results[nlp_type]))))))))
423
424
    return results
425