responsibly.we.data - Code Metrics - Inspection of "Useful threshold api" - ResponsiblyAI/responsibly - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#31)

by Shlomi

created 2019-08-04 02:48 UTC

responsibly.we.data A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	73
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	38
dl	0
loc	73
rs	10
c	0
b	0
f	0
wmc	3

2 Functions

Rating	Name	Duplication	Size	Complexity
A	load_w2v_small()	0	15	2
A	load_json_resource()	0	3	1

# TODO how import files from a package
import json
import warnings

from gensim.models.keyedvectors import KeyedVectors
from pkg_resources import resource_filename, resource_string


def load_w2v_small():
    """Load reduced Word2Vec model as `KeyedVectors` object.

    Based on the pre-trained embedding on the Google News corpus:
    https://code.google.com/archive/p/word2vec/
    """
    # pylint: disable=C0301

    with warnings.catch_warnings():
        warnings.simplefilter('ignore', DeprecationWarning)
        model = KeyedVectors.load_word2vec_format(
            resource_filename(__name__, 'GoogleNews-vectors-negative300-bolukbasi.bin'),
            binary=True)

    return model


def load_json_resource(resource_name):
    return json.loads(
        resource_string(__name__, resource_name + '.json').decode('utf-8')

    )


BOLUKBASI_DATA = load_json_resource('bolukbasi')

BOLUKBASI_DATA['gender']['profession_names'] = list(
    zip(*BOLUKBASI_DATA['gender']['professions']))[0]


BOLUKBASI_DATA['gender']['specific_full'].sort()

# TODO: in the code of the article, the last definitional pair
# is not in the specific full
BOLUKBASI_DATA['gender']['specific_full_with_definitional_equalize'] = list(
    (set.union(
        *map(set, BOLUKBASI_DATA['gender']['definitional_pairs']))
     | set.union(
         *map(set, BOLUKBASI_DATA['gender']['equalize_pairs']))
     | set(BOLUKBASI_DATA['gender']['specific_full']))
)
BOLUKBASI_DATA['gender']['specific_full_with_definitional_equalize'].sort()

BOLUKBASI_DATA['gender']['neutral_profession_names'] = list(
    set(BOLUKBASI_DATA['gender']['profession_names'])
    - set(BOLUKBASI_DATA['gender']['specific_full_with_definitional_equalize'])
)
BOLUKBASI_DATA['gender']['neutral_profession_names'].sort()

BOLUKBASI_DATA['gender']['word_group_keys'] = ['profession_names',
                                               'neutral_profession_names',
                                               'specific_seed',
                                               'specific_full',
                                               'specific_full_with_definitional_equalize']  # pylint: disable=C0301


WEAT_DATA = load_json_resource('weat')

# Zhao, J., Wang, T., Yatskar, M., Ordonez, V., & Chang, K. W. (2018).
# Gender bias in coreference resolution: Evaluation and debiasing methods.
# arXiv preprint arXiv:1804.06876.
# https://arxiv.org/abs/1804.06876
OCCUPATION_FEMALE_PRECENTAGE = load_json_resource(
    'occupational_female_precentage')


1			# TODO how import files from a package
2			import json
3			import warnings
4
5			from gensim.models.keyedvectors import KeyedVectors
6			from pkg_resources import resource_filename, resource_string
7
8
9			def load_w2v_small():
10			"""Load reduced Word2Vec model as `KeyedVectors` object.
11
12			Based on the pre-trained embedding on the Google News corpus:
13			https://code.google.com/archive/p/word2vec/
14			"""
15			# pylint: disable=C0301
16
17			with warnings.catch_warnings():
18			warnings.simplefilter('ignore', DeprecationWarning)
19			model = KeyedVectors.load_word2vec_format(
20			resource_filename(__name__, 'GoogleNews-vectors-negative300-bolukbasi.bin'),
21			binary=True)
22
23			return model
24
25
26			def load_json_resource(resource_name):
27			return json.loads(
28			resource_string(__name__, resource_name + '.json').decode('utf-8')
29
30			)
31
32
33			BOLUKBASI_DATA = load_json_resource('bolukbasi')
34
35			BOLUKBASI_DATA['gender']['profession_names'] = list(
36			zip(*BOLUKBASI_DATA['gender']['professions']))[0]
37
38
39			BOLUKBASI_DATA['gender']['specific_full'].sort()
40
41			# TODO: in the code of the article, the last definitional pair
42			# is not in the specific full
43			BOLUKBASI_DATA['gender']['specific_full_with_definitional_equalize'] = list(
44			(set.union(
45			*map(set, BOLUKBASI_DATA['gender']['definitional_pairs']))
46			\| set.union(
47			*map(set, BOLUKBASI_DATA['gender']['equalize_pairs']))
48			\| set(BOLUKBASI_DATA['gender']['specific_full']))
49			)
50			BOLUKBASI_DATA['gender']['specific_full_with_definitional_equalize'].sort()
51
52			BOLUKBASI_DATA['gender']['neutral_profession_names'] = list(
53			set(BOLUKBASI_DATA['gender']['profession_names'])
54			- set(BOLUKBASI_DATA['gender']['specific_full_with_definitional_equalize'])
55			)
56			BOLUKBASI_DATA['gender']['neutral_profession_names'].sort()
57
58			BOLUKBASI_DATA['gender']['word_group_keys'] = ['profession_names',
59			'neutral_profession_names',
60			'specific_seed',
61			'specific_full',
62			'specific_full_with_definitional_equalize'] # pylint: disable=C0301
63
64
65			WEAT_DATA = load_json_resource('weat')
66
67			# Zhao, J., Wang, T., Yatskar, M., Ordonez, V., & Chang, K. W. (2018).
68			# Gender bias in coreference resolution: Evaluation and debiasing methods.
69			# arXiv preprint arXiv:1804.06876.
70			# https://arxiv.org/abs/1804.06876
71			OCCUPATION_FEMALE_PRECENTAGE = load_json_resource(
72			'occupational_female_precentage')
73

ResponsiblyAI / responsibly

Pull Request — master (#31)

responsibly.we.data A

Complexity

Size/Duplication

Importance

2 Functions

Duplication Side-by-Side

Filter issues like