1
|
|
|
# TODO how import files from a package |
2
|
|
|
import json |
3
|
|
|
|
4
|
|
|
from gensim.models.keyedvectors import KeyedVectors |
5
|
|
|
from pkg_resources import resource_filename, resource_string |
6
|
|
|
|
7
|
|
|
|
8
|
|
|
def load_w2v_small(): |
9
|
|
|
# pylint: disable=C0301 |
10
|
|
|
return KeyedVectors.load_word2vec_format( |
11
|
|
|
resource_filename(__name__, 'GoogleNews-vectors-negative300-bolukbasi.bin'), |
12
|
|
|
binary=True) |
13
|
|
|
|
14
|
|
|
|
15
|
|
|
def load_json_resource(resource_name): |
16
|
|
|
return json.loads( |
17
|
|
|
resource_string(__name__, resource_name + '.json').decode('utf-8') |
18
|
|
|
|
19
|
|
|
) |
20
|
|
|
|
21
|
|
|
|
22
|
|
|
BOLUKBASI_DATA = load_json_resource('bolukbasi') |
23
|
|
|
|
24
|
|
|
BOLUKBASI_DATA['gender']['profession_names'] = list( |
25
|
|
|
zip(*BOLUKBASI_DATA['gender']['professions']))[0] |
26
|
|
|
|
27
|
|
|
|
28
|
|
|
BOLUKBASI_DATA['gender']['specific_full'].sort() |
29
|
|
|
|
30
|
|
|
# TODO: in the code of the article, the last definitional pair |
31
|
|
|
# is not in the specific full |
32
|
|
|
BOLUKBASI_DATA['gender']['specific_full_with_definitional'] = list( |
33
|
|
|
set.union( |
34
|
|
|
*map(set, BOLUKBASI_DATA['gender']['definitional_pairs']) |
35
|
|
|
) | set(BOLUKBASI_DATA['gender']['specific_full']) |
36
|
|
|
) |
37
|
|
|
BOLUKBASI_DATA['gender']['specific_full_with_definitional'].sort() |
38
|
|
|
|
39
|
|
|
BOLUKBASI_DATA['gender']['neutral_profession_names'] = list( |
40
|
|
|
set(BOLUKBASI_DATA['gender']['profession_names']) |
41
|
|
|
- set(BOLUKBASI_DATA['gender']['specific_full_with_definitional']) |
42
|
|
|
) |
43
|
|
|
BOLUKBASI_DATA['gender']['neutral_profession_names'].sort() |
44
|
|
|
|
45
|
|
|
BOLUKBASI_DATA['gender']['word_group_keys'] = ['profession_names', |
46
|
|
|
'neutral_profession_names', |
47
|
|
|
'specific_seed', |
48
|
|
|
'specific_full', |
49
|
|
|
'specific_full_with_definitional'] # pylint: disable=C0301 |
50
|
|
|
|