Passed
Pull Request — dev (#30)
by Shlomi
02:34
created

responsibly.we.data.load_json_resource()   A

Complexity

Conditions 1

Size

Total Lines 3
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 3
nop 1
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
# TODO how import files from a package
2
import json
3
import warnings
4
5
from gensim.models.keyedvectors import KeyedVectors
6
from pkg_resources import resource_filename, resource_string
7
8
9
def load_w2v_small():
10
    """Load reduced Word2Vec model as `KeyedVectors` object.
11
12
    Based on the pre-trained embedding on the Google News corpus:
13
    https://code.google.com/archive/p/word2vec/
14
    """
15
    # pylint: disable=C0301
16
17
    with warnings.catch_warnings():
18
        warnings.simplefilter('ignore', DeprecationWarning)
19
        model = KeyedVectors.load_word2vec_format(
20
            resource_filename(__name__, 'GoogleNews-vectors-negative300-bolukbasi.bin'),
21
            binary=True)
22
23
    return model
24
25
26
def load_json_resource(resource_name):
27
    return json.loads(
28
        resource_string(__name__, resource_name + '.json').decode('utf-8')
29
30
    )
31
32
33
BOLUKBASI_DATA = load_json_resource('bolukbasi')
34
35
BOLUKBASI_DATA['gender']['profession_names'] = list(
36
    zip(*BOLUKBASI_DATA['gender']['professions']))[0]
37
38
39
BOLUKBASI_DATA['gender']['specific_full'].sort()
40
41
# TODO: in the code of the article, the last definitional pair
42
# is not in the specific full
43
BOLUKBASI_DATA['gender']['specific_full_with_definitional_equalize'] = list(
44
    (set.union(
45
        *map(set, BOLUKBASI_DATA['gender']['definitional_pairs']))
46
     | set.union(
47
         *map(set, BOLUKBASI_DATA['gender']['equalize_pairs']))
48
     | set(BOLUKBASI_DATA['gender']['specific_full']))
49
)
50
BOLUKBASI_DATA['gender']['specific_full_with_definitional_equalize'].sort()
51
52
BOLUKBASI_DATA['gender']['neutral_profession_names'] = list(
53
    set(BOLUKBASI_DATA['gender']['profession_names'])
54
    - set(BOLUKBASI_DATA['gender']['specific_full_with_definitional_equalize'])
55
)
56
BOLUKBASI_DATA['gender']['neutral_profession_names'].sort()
57
58
BOLUKBASI_DATA['gender']['word_group_keys'] = ['profession_names',
59
                                               'neutral_profession_names',
60
                                               'specific_seed',
61
                                               'specific_full',
62
                                               'specific_full_with_definitional_equalize']  # pylint: disable=C0301
63
64
65
WEAT_DATA = load_json_resource('weat')
66
67
# Zhao, J., Wang, T., Yatskar, M., Ordonez, V., & Chang, K. W. (2018).
68
# Gender bias in coreference resolution: Evaluation and debiasing methods.
69
# arXiv preprint arXiv:1804.06876.
70
# https://arxiv.org/abs/1804.06876
71
OCCUPATION_FEMALE_PRECENTAGE = load_json_resource(
72
    'occupational_female_precentage')
73