1
|
|
|
""" |
2
|
|
|
Compute WEAT score of a Words Embedding. |
3
|
|
|
|
4
|
|
|
WEAT is a bias measurement method for words embedding, |
5
|
|
|
which is inspired by the `IAT <https://en.wikipedia.org/wiki/Implicit-association_test>`_ |
6
|
|
|
(Implicit Association Test) for humans. |
7
|
|
|
It measures the similarity between two sets of *target words* |
8
|
|
|
(e.g., programmer, engineer, scientist, ... and nurse, teacher, librarian, ...) |
9
|
|
|
and two sets of *attribute words* (e.g., man, male, ... and woman, female ...). |
10
|
|
|
A p-value is calculated using a permutation-test. |
11
|
|
|
|
12
|
|
|
Reference: |
13
|
|
|
- Caliskan, A., Bryson, J. J., & Narayanan, A. (2017). |
14
|
|
|
`Semantics derived automatically |
15
|
|
|
from language corpora contain human-like biases |
16
|
|
|
<http://opus.bath.ac.uk/55288/>`_. |
17
|
|
|
Science, 356(6334), 183-186. |
18
|
|
|
|
19
|
|
|
.. important:: |
20
|
|
|
The effect size and pvalue in the WEAT have |
21
|
|
|
entirely different meaning from those reported in IATs (original finding). |
22
|
|
|
Refer to the paper for more details. |
23
|
|
|
|
24
|
|
|
Stimulus and original finding from: |
25
|
|
|
|
26
|
|
|
- [0, 1, 2] |
27
|
|
|
A. G. Greenwald, D. E. McGhee, J. L. Schwartz, |
28
|
|
|
Measuring individual differences in implicit cognition: |
29
|
|
|
the implicit association test., |
30
|
|
|
Journal of personality and social psychology 74, 1464 (1998). |
31
|
|
|
|
32
|
|
|
- [3, 4]: |
33
|
|
|
M. Bertrand, S. Mullainathan, Are Emily and Greg more employable |
34
|
|
|
than Lakisha and Jamal? a field experiment on labor market discrimination, |
35
|
|
|
The American Economic Review 94, 991 (2004). |
36
|
|
|
|
37
|
|
|
- [5, 6, 9]: |
38
|
|
|
B. A. Nosek, M. Banaji, A. G. Greenwald, Harvesting implicit group attitudes |
39
|
|
|
and beliefs from a demonstration web site., |
40
|
|
|
Group Dynamics: Theory, Research, and Practice 6, 101 (2002). |
41
|
|
|
|
42
|
|
|
- [7]: |
43
|
|
|
B. A. Nosek, M. R. Banaji, A. G. Greenwald, Math=male, me=female, |
44
|
|
|
therefore math≠me., |
45
|
|
|
Journal of Personality and Social Psychology 83, 44 (2002). |
46
|
|
|
|
47
|
|
|
- [8] |
48
|
|
|
P. D. Turney, P. Pantel, From frequency to meaning: |
49
|
|
|
Vector space models of semantics, |
50
|
|
|
Journal of Artificial Intelligence Research 37, 141 (2010). |
51
|
|
|
""" |
52
|
|
|
|
53
|
|
|
# pylint: disable=C0301 |
54
|
|
|
|
55
|
|
|
import copy |
56
|
|
|
import random |
57
|
|
|
import warnings |
58
|
|
|
|
59
|
|
|
import numpy as np |
60
|
|
|
import pandas as pd |
61
|
|
|
from mlxtend.evaluate import permutation_test |
62
|
|
|
|
63
|
|
|
from ..consts import RANDOM_STATE |
64
|
|
|
from .data import WEAT_DATA |
65
|
|
|
from .utils import assert_gensim_keyed_vectors |
66
|
|
|
|
67
|
|
|
|
68
|
|
|
FILTER_BY_OPTIONS = ['model', 'data'] |
69
|
|
|
RESULTS_DF_COLUMNS = ['Target words', 'Attrib. words', |
70
|
|
|
'Nt', 'Na', 's', 'd', 'p'] |
71
|
|
|
PVALUE_METHODS = ['exact', 'approximate'] |
72
|
|
|
ORIGINAL_DF_COLUMNS = ['original_' + key for key in ['N', 'd', 'p']] |
73
|
|
|
|
74
|
|
|
|
75
|
|
|
def _calc_association_target_attributes(model, target_word, |
76
|
|
|
first_attribute_words, |
77
|
|
|
second_attribute_words): |
78
|
|
|
assert_gensim_keyed_vectors(model) |
79
|
|
|
|
80
|
|
|
with warnings.catch_warnings(): |
81
|
|
|
warnings.simplefilter('ignore', FutureWarning) |
82
|
|
|
first_mean = model.n_similarity([target_word], |
83
|
|
|
first_attribute_words).mean() |
84
|
|
|
second_mean = model.n_similarity([target_word], |
85
|
|
|
second_attribute_words).mean() |
86
|
|
|
|
87
|
|
|
return first_mean - second_mean |
88
|
|
|
|
89
|
|
|
|
90
|
|
|
def _calc_association_all_targets_attributes(model, target_words, |
91
|
|
|
first_attribute_words, |
92
|
|
|
second_attribute_words): |
93
|
|
|
return [_calc_association_target_attributes(model, target_word, |
94
|
|
|
first_attribute_words, |
95
|
|
|
second_attribute_words) |
96
|
|
|
for target_word in target_words] |
97
|
|
|
|
98
|
|
|
|
99
|
|
|
def _calc_weat_score(model, |
100
|
|
|
first_target_words, second_target_words, |
101
|
|
|
first_attribute_words, second_attribute_words): |
102
|
|
|
|
103
|
|
|
(first_associations, |
104
|
|
|
second_associations) = _calc_weat_associations(model, |
105
|
|
|
first_target_words, |
106
|
|
|
second_target_words, |
107
|
|
|
first_attribute_words, |
108
|
|
|
second_attribute_words) |
109
|
|
|
|
110
|
|
|
return sum(first_associations) - sum(second_associations) |
111
|
|
|
|
112
|
|
|
|
113
|
|
|
def _calc_weat_pvalue(first_associations, second_associations, |
114
|
|
|
method='approximate'): |
115
|
|
|
|
116
|
|
|
if method not in PVALUE_METHODS: |
117
|
|
|
raise ValueError('method should be one of {}, {} was given'.format( |
118
|
|
|
PVALUE_METHODS, method)) |
119
|
|
|
|
120
|
|
|
pvalue = permutation_test(first_associations, second_associations, |
121
|
|
|
func='x_mean > y_mean', |
122
|
|
|
method=method, |
123
|
|
|
seed=RANDOM_STATE) # if exact - no meaning |
124
|
|
|
return pvalue |
125
|
|
|
|
126
|
|
|
|
127
|
|
|
def _calc_weat_associations(model, |
128
|
|
|
first_target_words, second_target_words, |
129
|
|
|
first_attribute_words, second_attribute_words): |
130
|
|
|
|
131
|
|
|
assert len(first_target_words) == len(second_target_words) |
132
|
|
|
assert len(first_attribute_words) == len(second_attribute_words) |
133
|
|
|
|
134
|
|
|
first_associations = _calc_association_all_targets_attributes(model, |
135
|
|
|
first_target_words, |
136
|
|
|
first_attribute_words, |
137
|
|
|
second_attribute_words) |
138
|
|
|
|
139
|
|
|
second_associations = _calc_association_all_targets_attributes(model, |
140
|
|
|
second_target_words, |
141
|
|
|
first_attribute_words, |
142
|
|
|
second_attribute_words) |
143
|
|
|
|
144
|
|
|
return first_associations, second_associations |
145
|
|
|
|
146
|
|
|
|
147
|
|
|
def _filter_by_data_weat_stimuli(stimuli): |
148
|
|
|
"""Inplace.""" |
149
|
|
|
for group in stimuli: |
150
|
|
|
if 'remove' in stimuli[group]: |
151
|
|
|
words_to_remove = stimuli[group]['remove'] |
152
|
|
|
stimuli[group]['words'] = [word for word in stimuli[group]['words'] |
153
|
|
|
if word not in words_to_remove] |
154
|
|
|
|
155
|
|
|
|
156
|
|
|
def _sample_if_bigger(seq, length): |
157
|
|
|
random.seed(RANDOM_STATE) |
158
|
|
|
if len(seq) > length: |
159
|
|
|
seq = random.sample(seq, length) |
160
|
|
|
return seq |
161
|
|
|
|
162
|
|
|
|
163
|
|
|
def _filter_by_model_weat_stimuli(stimuli, model): |
164
|
|
|
"""Inplace.""" |
165
|
|
|
|
166
|
|
|
for group_category in ['target', 'attribute']: |
167
|
|
|
first_group = 'first_' + group_category |
168
|
|
|
second_group = 'second_' + group_category |
169
|
|
|
|
170
|
|
|
first_words = [word for word in stimuli[first_group]['words'] |
171
|
|
|
if word in model] |
172
|
|
|
second_words = [word for word in stimuli[second_group]['words'] |
173
|
|
|
if word in model] |
174
|
|
|
|
175
|
|
|
min_len = min(len(first_words), len(second_words)) |
176
|
|
|
|
177
|
|
|
first_words = _sample_if_bigger(first_words, min_len) |
178
|
|
|
second_words = _sample_if_bigger(second_words, min_len) |
179
|
|
|
|
180
|
|
|
first_words.sort() |
181
|
|
|
second_words.sort() |
182
|
|
|
|
183
|
|
|
stimuli[first_group]['words'] = first_words |
184
|
|
|
stimuli[second_group]['words'] = second_words |
185
|
|
|
|
186
|
|
|
|
187
|
|
|
def _filter_weat_data(weat_data, model, filter_by): |
188
|
|
|
"""inplace.""" |
189
|
|
|
|
190
|
|
|
if filter_by not in FILTER_BY_OPTIONS: |
191
|
|
|
raise ValueError('filter_by should be one of {}, {} was given'.format( |
192
|
|
|
FILTER_BY_OPTIONS, filter_by)) |
193
|
|
|
|
194
|
|
|
if filter_by == 'data': |
195
|
|
|
for stimuli in weat_data: |
196
|
|
|
_filter_by_data_weat_stimuli(stimuli) |
197
|
|
|
|
198
|
|
|
elif filter_by == 'model': |
199
|
|
|
for stimuli in weat_data: |
200
|
|
|
_filter_by_model_weat_stimuli(stimuli, model) |
201
|
|
|
|
202
|
|
|
|
203
|
|
|
def calc_single_weat(model, |
204
|
|
|
first_target, second_target, |
205
|
|
|
first_attribute, second_attribute, |
206
|
|
|
with_pvalue=True, pvalue_kwargs=None): |
207
|
|
|
""" |
208
|
|
|
Calc the WEAT result of a words embedding. |
209
|
|
|
|
210
|
|
|
:param model: Words embedding model of ``gensim.model.KeyedVectors`` |
211
|
|
|
:param dict first_target: First target words list and its name |
212
|
|
|
:param dict second_target: Second target words list and its name |
213
|
|
|
:param dict first_attribute: First attribute words list and its name |
214
|
|
|
:param dict second_attribute: Second attribute words list and its name |
215
|
|
|
:param bool with_pvalue: Whether to calculate the p-value of the |
216
|
|
|
WEAT score (might be computationally expensive) |
217
|
|
|
:return: WEAT result (score, size effect, Nt, Na and p-value) |
218
|
|
|
""" |
219
|
|
|
|
220
|
|
|
if pvalue_kwargs is None: |
221
|
|
|
pvalue_kwargs = {} |
222
|
|
|
|
223
|
|
|
(first_associations, |
224
|
|
|
second_associations) = _calc_weat_associations(model, |
225
|
|
|
first_target['words'], |
226
|
|
|
second_target['words'], |
227
|
|
|
first_attribute['words'], |
228
|
|
|
second_attribute['words']) |
229
|
|
|
|
230
|
|
|
if first_associations and second_associations: |
231
|
|
|
score = sum(first_associations) - sum(second_associations) |
232
|
|
|
std_dev = np.std(first_associations + second_associations, ddof=0) |
233
|
|
|
effect_size = ((np.mean(first_associations) - np.mean(second_associations)) |
234
|
|
|
/ std_dev) |
235
|
|
|
|
236
|
|
|
pvalue = None |
237
|
|
|
if with_pvalue: |
238
|
|
|
pvalue = _calc_weat_pvalue(first_associations, |
239
|
|
|
second_associations, |
240
|
|
|
**pvalue_kwargs) |
241
|
|
|
else: |
242
|
|
|
score, std_dev, effect_size, pvalue = None, None, None, None |
243
|
|
|
|
244
|
|
|
return {'Target words': '{} vs. {}'.format(first_target['name'], |
245
|
|
|
second_target['name']), |
246
|
|
|
'Attrib. words': '{} vs. {}'.format(first_attribute['name'], |
247
|
|
|
second_attribute['name']), |
248
|
|
|
's': score, |
249
|
|
|
'd': effect_size, |
250
|
|
|
'p': pvalue, |
251
|
|
|
'Nt': '{}x2'.format(len(first_target['words'])), |
252
|
|
|
'Na': '{}x2'.format(len(first_attribute['words']))} |
253
|
|
|
|
254
|
|
|
|
255
|
|
|
def calc_weat_pleasant_unpleasant_attribute(model, |
256
|
|
|
first_target, second_target, |
257
|
|
|
with_pvalue=True, pvalue_kwargs=None): |
258
|
|
|
weat_data = {'first_attribute': copy.deepcopy(WEAT_DATA[0]['first_attribute']), |
259
|
|
|
'second_attribute': copy.deepcopy(WEAT_DATA[0]['second_attribute']), |
260
|
|
|
'first_target': first_target, |
261
|
|
|
'second_target': second_target} |
262
|
|
|
|
263
|
|
|
_filter_by_model_weat_stimuli(weat_data, model) |
264
|
|
|
|
265
|
|
|
if pvalue_kwargs is None: |
266
|
|
|
pvalue_kwargs = {} |
267
|
|
|
|
268
|
|
|
return calc_single_weat(model, |
269
|
|
|
**weat_data, |
270
|
|
|
with_pvalue=with_pvalue, pvalue_kwargs=pvalue_kwargs) |
271
|
|
|
|
272
|
|
|
|
273
|
|
|
def calc_all_weat(model, weat_data='caliskan', filter_by='model', |
274
|
|
|
with_original_finding=False, |
275
|
|
|
with_pvalue=True, pvalue_kwargs=None): |
276
|
|
|
""" |
277
|
|
|
Calc the WEAT results of a words embedding on multiple cases. |
278
|
|
|
|
279
|
|
|
Note that for the effect size and pvalue in the WEAT have |
280
|
|
|
entirely different meaning from those reported in IATs (original finding). |
281
|
|
|
Refer to the paper for more details. |
282
|
|
|
|
283
|
|
|
:param model: Words embedding model of ``gensim.model.KeyedVectors`` |
284
|
|
|
:param dict weat_data: WEAT cases data |
285
|
|
|
:param bool filter_by: Whether to filter the word lists |
286
|
|
|
by the `model` (`'model'`) |
287
|
|
|
or by the `remove` key in `weat_data` (`'data'`). |
288
|
|
|
:param bool with_original_finding: Show the origina |
289
|
|
|
:param bool with_pvalue: Whether to calculate the p-value of the |
290
|
|
|
WEAT results (might be computationally expensive) |
291
|
|
|
:return: :class:`pandas.DataFrame` of WEAT results |
292
|
|
|
(score, size effect, Nt, Na and p-value) |
293
|
|
|
""" |
294
|
|
|
|
295
|
|
|
if weat_data == 'caliskan': |
296
|
|
|
weat_data = WEAT_DATA |
297
|
|
|
|
298
|
|
|
if pvalue_kwargs is None: |
299
|
|
|
pvalue_kwargs = {} |
300
|
|
|
|
301
|
|
|
weat_data = copy.deepcopy(weat_data) |
302
|
|
|
|
303
|
|
|
_filter_weat_data(weat_data, |
304
|
|
|
model, |
305
|
|
|
filter_by) |
306
|
|
|
|
307
|
|
|
results = [] |
308
|
|
|
for stimuli in weat_data: |
309
|
|
|
result = calc_single_weat(model, |
310
|
|
|
stimuli['first_target'], |
311
|
|
|
stimuli['second_target'], |
312
|
|
|
stimuli['first_attribute'], |
313
|
|
|
stimuli['second_attribute'], |
314
|
|
|
with_pvalue, pvalue_kwargs) |
315
|
|
|
|
316
|
|
|
# TODO: refactor - check before if one group is without words |
317
|
|
|
# because of the filtering |
318
|
|
|
if not all(group['words'] for group in stimuli.values() |
319
|
|
|
if 'words' in group): |
320
|
|
|
result['score'] = None |
321
|
|
|
result['effect_size'] = None |
322
|
|
|
result['pvalue'] = None |
323
|
|
|
|
324
|
|
|
result['stimuli'] = stimuli |
325
|
|
|
|
326
|
|
|
if with_original_finding: |
327
|
|
|
result.update({'original_' + k: v |
328
|
|
|
for k, v in stimuli['original_finding'].items()}) |
329
|
|
|
results.append(result) |
330
|
|
|
|
331
|
|
|
results_df = pd.DataFrame(results) |
332
|
|
|
results_df = results_df.replace('nan', None) |
333
|
|
|
results_df = results_df.fillna('') |
334
|
|
|
|
335
|
|
|
# if not results_df.empty: |
336
|
|
|
cols = RESULTS_DF_COLUMNS[:] |
337
|
|
|
if with_original_finding: |
338
|
|
|
cols += ORIGINAL_DF_COLUMNS |
339
|
|
|
if not with_pvalue: |
340
|
|
|
cols.remove('p') |
341
|
|
|
else: |
342
|
|
|
results_df['p'] = results_df['p'].apply(lambda pvalue: '{:0.1e}'.format(pvalue) # pylint: disable=W0108 |
343
|
|
|
if pvalue else pvalue) |
344
|
|
|
|
345
|
|
|
results_df = results_df[cols] |
346
|
|
|
results_df = results_df.round(2) |
347
|
|
|
|
348
|
|
|
return results_df |
349
|
|
|
|