Passed
Push — master ( 170db5...8af2aa )
by Shlomi
02:43 queued 58s
created

ethically.we.weat._filter_weat_data()   B

Complexity

Conditions 6

Size

Total Lines 14
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
eloc 10
nop 3
dl 0
loc 14
rs 8.6666
c 0
b 0
f 0
1
"""
2
Compute WEAT score of a Words Embedding.
3
4
WEAT is a bias measurement method for words embedding,
5
which is inspired by the `IAT <https://en.wikipedia.org/wiki/Implicit-association_test>`_
6
(Implicit Association Test) for humans.
7
It measures the similarity between two sets of *target words*
8
(e.g., programmer, engineer, scientist, ... and nurse, teacher, librarian, ...)
9
and two sets of *attribute words* (e.g., man, male, ... and woman, female ...).
10
A p-value is calculated using a permutation-test.
11
12
Reference:
13
    - Caliskan, A., Bryson, J. J., & Narayanan, A. (2017).
14
      `Semantics derived automatically
15
      from language corpora contain human-like biases
16
      <http://opus.bath.ac.uk/55288/>`_.
17
      Science, 356(6334), 183-186.
18
19
.. important::
20
    The effect size and pvalue in the WEAT have
21
    entirely different meaning from those reported in IATs (original finding).
22
    Refer to the paper for more details.
23
24
Stimulus and original finding from:
25
26
- [0, 1, 2]
27
  A. G. Greenwald, D. E. McGhee, J. L. Schwartz,
28
  Measuring individual differences in implicit cognition:
29
  the implicit association test.,
30
  Journal of personality and social psychology 74, 1464 (1998).
31
32
- [3, 4]:
33
  M. Bertrand, S. Mullainathan, Are Emily and Greg more employable
34
  than Lakisha and Jamal? a field experiment on labor market discrimination,
35
  The American Economic Review 94, 991 (2004).
36
37
- [5, 6, 9]:
38
  B. A. Nosek, M. Banaji, A. G. Greenwald, Harvesting implicit group attitudes
39
  and beliefs from a demonstration web site.,
40
  Group Dynamics: Theory, Research, and Practice 6, 101 (2002).
41
42
- [7]:
43
  B. A. Nosek, M. R. Banaji, A. G. Greenwald, Math=male, me=female,
44
  therefore math≠me.,
45
  Journal of Personality and Social Psychology 83, 44 (2002).
46
47
- [8]
48
  P. D. Turney, P. Pantel, From frequency to meaning:
49
  Vector space models of semantics,
50
  Journal of Artificial Intelligence Research 37, 141 (2010).
51
"""
52
53
# pylint: disable=C0301
54
55
import copy
56
import random
57
import warnings
58
59
import numpy as np
60
import pandas as pd
61
from mlxtend.evaluate import permutation_test
62
63
from ..consts import RANDOM_STATE
64
from .data import WEAT_DATA
65
from .utils import assert_gensim_keyed_vectors
66
67
68
FILTER_BY_OPTIONS = ['model', 'data']
69
RESULTS_DF_COLUMNS = ['Target words', 'Attrib. words',
70
                      'Nt', 'Na', 's', 'd', 'p']
71
PVALUE_METHODS = ['exact', 'approximate']
72
ORIGINAL_DF_COLUMNS = ['original_' + key for key in ['N', 'd', 'p']]
73
74
75
def _calc_association_target_attributes(model, target_word,
76
                                        first_attribute_words,
77
                                        second_attribute_words):
78
    assert_gensim_keyed_vectors(model)
79
80
    with warnings.catch_warnings():
81
        warnings.simplefilter('ignore', FutureWarning)
82
        first_mean = model.n_similarity([target_word],
83
                                        first_attribute_words).mean()
84
        second_mean = model.n_similarity([target_word],
85
                                         second_attribute_words).mean()
86
87
    return first_mean - second_mean
88
89
90
def _calc_association_all_targets_attributes(model, target_words,
91
                                             first_attribute_words,
92
                                             second_attribute_words):
93
    return [_calc_association_target_attributes(model, target_word,
94
                                                first_attribute_words,
95
                                                second_attribute_words)
96
            for target_word in target_words]
97
98
99
def _calc_weat_score(model,
100
                     first_target_words, second_target_words,
101
                     first_attribute_words, second_attribute_words):
102
103
    (first_associations,
104
     second_associations) = _calc_weat_associations(model,
105
                                                    first_target_words,
106
                                                    second_target_words,
107
                                                    first_attribute_words,
108
                                                    second_attribute_words)
109
110
    return sum(first_associations) - sum(second_associations)
111
112
113
def _calc_weat_pvalue(first_associations, second_associations,
114
                      method='approximate'):
115
116
    if method not in PVALUE_METHODS:
117
        raise ValueError('method should be one of {}, {} was given'.format(
118
            PVALUE_METHODS, method))
119
120
    pvalue = permutation_test(first_associations, second_associations,
121
                              func='x_mean > y_mean',
122
                              method=method,
123
                              seed=RANDOM_STATE)  # if exact - no meaning
124
    return pvalue
125
126
127
def _calc_weat_associations(model,
128
                            first_target_words, second_target_words,
129
                            first_attribute_words, second_attribute_words):
130
131
    assert len(first_target_words) == len(second_target_words)
132
    assert len(first_attribute_words) == len(second_attribute_words)
133
134
    first_associations = _calc_association_all_targets_attributes(model,
135
                                                                  first_target_words,
136
                                                                  first_attribute_words,
137
                                                                  second_attribute_words)
138
139
    second_associations = _calc_association_all_targets_attributes(model,
140
                                                                   second_target_words,
141
                                                                   first_attribute_words,
142
                                                                   second_attribute_words)
143
144
    return first_associations, second_associations
145
146
147
def _filter_by_data_weat_stimuli(stimuli):
148
    """Inplace."""
149
    for group in stimuli:
150
        if 'remove' in stimuli[group]:
151
            words_to_remove = stimuli[group]['remove']
152
            stimuli[group]['words'] = [word for word in stimuli[group]['words']
153
                                       if word not in words_to_remove]
154
155
156
def _sample_if_bigger(seq, length):
157
    random.seed(RANDOM_STATE)
158
    if len(seq) > length:
159
        seq = random.sample(seq, length)
160
    return seq
161
162
163
def _filter_by_model_weat_stimuli(stimuli, model):
164
    """Inplace."""
165
166
    for group_category in ['target', 'attribute']:
167
        first_group = 'first_' + group_category
168
        second_group = 'second_' + group_category
169
170
        first_words = [word for word in stimuli[first_group]['words']
171
                       if word in model]
172
        second_words = [word for word in stimuli[second_group]['words']
173
                        if word in model]
174
175
        min_len = min(len(first_words), len(second_words))
176
177
        first_words = _sample_if_bigger(first_words, min_len)
178
        second_words = _sample_if_bigger(second_words, min_len)
179
180
        first_words.sort()
181
        second_words.sort()
182
183
        stimuli[first_group]['words'] = first_words
184
        stimuli[second_group]['words'] = second_words
185
186
187
def _filter_weat_data(weat_data, model, filter_by):
188
    """inplace."""
189
190
    if filter_by not in FILTER_BY_OPTIONS:
191
        raise ValueError('filter_by should be one of {}, {} was given'.format(
192
            FILTER_BY_OPTIONS, filter_by))
193
194
    if filter_by == 'data':
195
        for stimuli in weat_data:
196
            _filter_by_data_weat_stimuli(stimuli)
197
198
    elif filter_by == 'model':
199
        for stimuli in weat_data:
200
            _filter_by_model_weat_stimuli(stimuli, model)
201
202
203
def calc_single_weat(model,
204
                     first_target, second_target,
205
                     first_attribute, second_attribute,
206
                     with_pvalue=True, pvalue_kwargs=None):
207
    """
208
    Calc the WEAT result of a words embedding.
209
210
    :param model: Words embedding model of ``gensim.model.KeyedVectors``
211
    :param dict first_target: First target words list and its name
212
    :param dict second_target: Second target words list and its name
213
    :param dict first_attribute: First attribute words list and its name
214
    :param dict second_attribute: Second attribute words list and its name
215
    :param bool with_pvalue: Whether to calculate the p-value of the
216
                             WEAT score (might be computationally expensive)
217
    :return: WEAT result (score, size effect, Nt, Na and p-value)
218
    """
219
220
    if pvalue_kwargs is None:
221
        pvalue_kwargs = {}
222
223
    (first_associations,
224
     second_associations) = _calc_weat_associations(model,
225
                                                    first_target['words'],
226
                                                    second_target['words'],
227
                                                    first_attribute['words'],
228
                                                    second_attribute['words'])
229
230
    if first_associations and second_associations:
231
        score = sum(first_associations) - sum(second_associations)
232
        std_dev = np.std(first_associations + second_associations, ddof=0)
233
        effect_size = ((np.mean(first_associations) - np.mean(second_associations))
234
                       / std_dev)
235
236
        pvalue = None
237
        if with_pvalue:
238
            pvalue = _calc_weat_pvalue(first_associations,
239
                                       second_associations,
240
                                       **pvalue_kwargs)
241
    else:
242
        score, std_dev, effect_size, pvalue = None, None, None, None
243
244
    return {'Target words': '{} vs. {}'.format(first_target['name'],
245
                                               second_target['name']),
246
            'Attrib. words': '{} vs. {}'.format(first_attribute['name'],
247
                                                second_attribute['name']),
248
            's': score,
249
            'd': effect_size,
250
            'p': pvalue,
251
            'Nt': '{}x2'.format(len(first_target['words'])),
252
            'Na': '{}x2'.format(len(first_attribute['words']))}
253
254
255
def calc_weat_pleasant_unpleasant_attribute(model,
256
                                            first_target, second_target,
257
                                            with_pvalue=True, pvalue_kwargs=None):
258
    weat_data = {'first_attribute': copy.deepcopy(WEAT_DATA[0]['first_attribute']),
259
                 'second_attribute': copy.deepcopy(WEAT_DATA[0]['second_attribute']),
260
                 'first_target': first_target,
261
                 'second_target': second_target}
262
263
    _filter_by_model_weat_stimuli(weat_data, model)
264
265
    if pvalue_kwargs is None:
266
        pvalue_kwargs = {}
267
268
    return calc_single_weat(model,
269
                            **weat_data,
270
                            with_pvalue=with_pvalue, pvalue_kwargs=pvalue_kwargs)
271
272
273
def calc_all_weat(model, weat_data='caliskan', filter_by='model',
274
                  with_original_finding=False,
275
                  with_pvalue=True, pvalue_kwargs=None):
276
    """
277
    Calc the WEAT results of a words embedding on multiple cases.
278
279
    Note that for the effect size and pvalue in the WEAT have
280
    entirely different meaning from those reported in IATs (original finding).
281
    Refer to the paper for more details.
282
283
    :param model: Words embedding model of ``gensim.model.KeyedVectors``
284
    :param dict weat_data: WEAT cases data
285
    :param bool filter_by: Whether to filter the word lists
286
                           by the `model` (`'model'`)
287
                           or by the `remove` key in `weat_data` (`'data'`).
288
    :param bool with_original_finding: Show the origina
289
    :param bool with_pvalue: Whether to calculate the p-value of the
290
                             WEAT results (might be computationally expensive)
291
    :return: :class:`pandas.DataFrame` of WEAT results
292
             (score, size effect, Nt, Na and p-value)
293
    """
294
295
    if weat_data == 'caliskan':
296
        weat_data = WEAT_DATA
297
298
    if pvalue_kwargs is None:
299
        pvalue_kwargs = {}
300
301
    weat_data = copy.deepcopy(weat_data)
302
303
    _filter_weat_data(weat_data,
304
                      model,
305
                      filter_by)
306
307
    results = []
308
    for stimuli in weat_data:
309
        result = calc_single_weat(model,
310
                                  stimuli['first_target'],
311
                                  stimuli['second_target'],
312
                                  stimuli['first_attribute'],
313
                                  stimuli['second_attribute'],
314
                                  with_pvalue, pvalue_kwargs)
315
316
        # TODO: refactor - check before if one group is without words
317
        # because of the filtering
318
        if not all(group['words'] for group in stimuli.values()
319
                   if 'words' in group):
320
            result['score'] = None
321
            result['effect_size'] = None
322
            result['pvalue'] = None
323
324
        result['stimuli'] = stimuli
325
326
        if with_original_finding:
327
            result.update({'original_' + k: v
328
                           for k, v in stimuli['original_finding'].items()})
329
        results.append(result)
330
331
    results_df = pd.DataFrame(results)
332
    results_df = results_df.replace('nan', None)
333
    results_df = results_df.fillna('')
334
335
    # if not results_df.empty:
336
    cols = RESULTS_DF_COLUMNS[:]
337
    if with_original_finding:
338
        cols += ORIGINAL_DF_COLUMNS
339
    if not with_pvalue:
340
        cols.remove('p')
341
    else:
342
        results_df['p'] = results_df['p'].apply(lambda pvalue: '{:0.1e}'.format(pvalue)  # pylint: disable=W0108
343
                                                if pvalue else pvalue)
344
345
    results_df = results_df[cols]
346
    results_df = results_df.round(2)
347
348
    return results_df
349