buildWikidataProperties()   D
last analyzed

Complexity

Conditions 8

Size

Total Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 8
c 0
b 0
f 0
dl 0
loc 23
rs 4.7619
1
#!/usr/bin/env python3
2
3
import requests
4
import json
5
import pickle
6
import sys
7
from nltk.corpus import wordnet as wn
8
9
default_language = 'en'
10
11
def buildWikidataProperties():
12
    """
13
        Return the set of all Wikidata properties.
14
    """
15
    properties = set()
16
    maxRange=60
17
    for i in range(0, maxRange):
18
        print("%d/%d"%(i+1, maxRange))
19
        propertiesIDs = '|'.join(['P%d'%x for x in range(50*i+1, 50*(i+1)+1)])
20
        request = requests.get('http://www.wikidata.org/w/api.php', params={'action':'wbgetentities', 'sites':'itwiki', 'ids':propertiesIDs, 'format':'json'})
21
        j = request.json()
22
        if j['success'] == 0:
23
            continue
24
        for prop in j['entities'].values():
25
            if 'missing' in prop.keys():
26
                continue
27
            try:
28
                for alias in prop['aliases'][default_language]:
29
                    properties.add(alias['value'])
30
            except KeyError: # no alias
31
                pass
32
            properties.add(prop['labels'][default_language]['value'])
33
    return properties
34
35
def buildNouns():
36
    """
37
        Returns the set of all nouns of NLTK
38
    """
39
    return {x.name().split('.', 1)[0] for x in wn.all_synsets('n')}
40
41
def buildVerbs():
42
    """
43
        Returns the set of all verbs of NLTK
44
    """
45
    return {x.name().split(".", 1)[0] for x in wn.all_synsets("v")}
46
47
if __name__ == "__main__":
48
    if len(sys.argv) != 3:
49
        sys.exit("Syntax: ./%s storage_file -<database description> (wiki : Wikidata properties, n : nouns, v : verbs)" % sys.argv[0]) # ex: ./extractors.py file.pkl -wiki
50
    data = {}
51
    if sys.argv[2] == '-wiki':
52
        data = buildWikidataProperties()
53
    if sys.argv[2] == '-n':
54
        data = buildNouns()
55
    if sys.argv[2] == '-v':
56
        data = buildVerbs()
57
    f = open(sys.argv[1], 'wb')
58
    pickle.dump(data, f)
59
    f.close()
60