Completed
Push — master ( 13dc98...362bd5 )
by Shlomi
25s queued 12s
created

BiasWordEmbedding.plot_factual_association()   A

Complexity

Conditions 2

Size

Total Lines 28
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 12
dl 0
loc 28
rs 9.8
c 0
b 0
f 0
cc 2
nop 3
1
# pylint: disable=too-many-lines
2
"""
3
Measuring and adjusting bias in word embedding by Bolukbasi (2016).
4
5
References:
6
    - Bolukbasi, T., Chang, K. W., Zou, J. Y., Saligrama, V.,
7
      & Kalai, A. T. (2016).
8
      `Man is to computer programmer as woman is to homemaker?
9
      debiasing word embeddings <https://arxiv.org/abs/1607.06520>`_.
10
      In Advances in neural information processing systems
11
      (pp. 4349-4357).
12
13
    - The code and data is based on the GitHub repository:
14
      https://github.com/tolga-b/debiaswe (MIT License).
15
16
    - Gonen, H., & Goldberg, Y. (2019).
17
      `Lipstick on a Pig:
18
      Debiasing Methods Cover up Systematic Gender Biases
19
      in Word Embeddings But do not Remove Them
20
      <https://arxiv.org/abs/1903.03862>`_.
21
      arXiv preprint arXiv:1903.03862.
22
23
    - Nissim, M., van Noord, R., van der Goot, R. (2019).
24
      `Fair is Better than Sensational: Man is to Doctor
25
      as Woman is to Doctor <https://arxiv.org/abs/1905.09866>`_.
26
27
Usage
28
~~~~~
29
30
.. code:: python
31
32
   >>> from ethically.we import GenderBiasWE
33
   >>> from gensim import downloader
34
   >>> w2v_model = downloader.load('word2vec-google-news-300')
35
   >>> w2v_gender_bias_we = GenderBiasWE(w2v_model)
36
   >>> w2v_gender_bias_we.calc_direct_bias()
37
   0.07307904249481942
38
   >>> w2v_gender_bias_we.debias()
39
   >>> w2v_gender_bias_we.calc_direct_bias()
40
   1.7964246601064155e-09
41
42
Types of Bias
43
~~~~~~~~~~~~~
44
45
Direct Bias
46
^^^^^^^^^^^
47
48
1. Associations
49
    Words that are closer to one end (e.g., *he*) than to
50
    the other end (*she*).
51
    For example, occupational stereotypes (page 7).
52
    Calculated by
53
    :meth:`~ethically.we.bias.BiasWordEmbedding.calc_direct_bias`.
54
55
2. Analogies
56
    Analogies of *he:x::she:y*.
57
    For example analogies exhibiting stereotypes (page 7).
58
    Generated by
59
    :meth:`~ethically.we.bias.BiasWordEmbedding.generate_analogies`.
60
61
62
Indirect Bias
63
^^^^^^^^^^^^^
64
65
Projection of a neutral words into a two neutral words direction
66
is explained in a great portion by a shared bias direction projection.
67
68
Calculated by
69
:meth:`~ethically.we.bias.BiasWordEmbedding.calc_indirect_bias`
70
and
71
:meth:`~ethically.we.bias.GenderBiasWE.generate_closest_words_indirect_bias`.
72
73
"""
74
75
import copy
76
import warnings
77
78
import matplotlib.pylab as plt
79
import numpy as np
80
import pandas as pd
81
import seaborn as sns
82
from scipy.stats import pearsonr, spearmanr
83
from sklearn.decomposition import PCA
84
from sklearn.metrics.pairwise import euclidean_distances
85
from sklearn.svm import LinearSVC
86
from tqdm import tqdm
87
88
from ethically.consts import RANDOM_STATE
89
from ethically.utils import _warning_setup
90
from ethically.we.benchmark import evaluate_word_embedding
91
from ethically.we.data import BOLUKBASI_DATA, OCCUPATION_FEMALE_PRECENTAGE
92
from ethically.we.utils import (
93
    assert_gensim_keyed_vectors, cosine_similarity, generate_one_word_forms,
94
    generate_words_forms, get_seed_vector, most_similar, normalize,
95
    plot_clustering_as_classification, project_params, project_reject_vector,
96
    project_vector, reject_vector, round_to_extreme,
97
    take_two_sides_extreme_sorted, update_word_vector,
98
)
99
from tabulate import tabulate
100
101
102
DIRECTION_METHODS = ['single', 'sum', 'pca']
103
DEBIAS_METHODS = ['neutralize', 'hard', 'soft']
104
FIRST_PC_THRESHOLD = 0.5
105
MAX_NON_SPECIFIC_EXAMPLES = 1000
106
107
__all__ = ['GenderBiasWE', 'BiasWordEmbedding']
108
109
_warning_setup()
110
111
112
class BiasWordEmbedding:
113
    """Measure and adjust a bias in English word embedding.
114
115
    :param model: Word embedding model of ``gensim.model.KeyedVectors``
116
    :param bool only_lower: Whether the word embedding contrains
117
                            only lower case words
118
    :param bool verbose: Set verbosity
119
    :param bool to_normalize: Whether to normalize all the vectors
120
                              (recommended!)
121
    """
122
123
    def __init__(self, model, only_lower=False, verbose=False,
124
                 identify_direction=False, to_normalize=True):
125
        assert_gensim_keyed_vectors(model)
126
127
        # TODO: this is bad Python, ask someone about it
128
        # probably should be a better design
129
        # identify_direction doesn't have any meaning
130
        # for the class BiasWordEmbedding
131
        if self.__class__ == __class__ and identify_direction is not False:
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable __class__ does not seem to be defined.
Loading history...
132
            raise ValueError('identify_direction must be False'
133
                             ' for an instance of {}'
134
                             .format(__class__))
135
136
        self.model = model
137
138
        # TODO: write unitest for when it is False
139
        self.only_lower = only_lower
140
141
        self._verbose = verbose
142
143
        self.direction = None
144
        self.positive_end = None
145
        self.negative_end = None
146
147
        if to_normalize:
148
            self.model.init_sims(replace=True)
149
150
    def __copy__(self):
151
        bias_word_embedding = self.__class__(self.model,
152
                                             self.only_lower,
153
                                             self._verbose,
154
                                             identify_direction=False)
155
        bias_word_embedding.direction = copy.deepcopy(self.direction)
156
        bias_word_embedding.positive_end = copy.deepcopy(self.positive_end)
157
        bias_word_embedding.negative_end = copy.deepcopy(self.negative_end)
158
        return bias_word_embedding
159
160
    def __deepcopy__(self, memo):
161
        bias_word_embedding = copy.copy(self)
162
        bias_word_embedding.model = copy.deepcopy(bias_word_embedding.model)
163
        return bias_word_embedding
164
165
    def __getitem__(self, key):
166
        return self.model[key]
167
168
    def __contains__(self, item):
169
        return item in self.model
170
171
    def _filter_words_by_model(self, words):
172
        return [word for word in words if word in self]
173
174
    def _is_direction_identified(self):
175
        if self.direction is None:
176
            raise RuntimeError('The direction was not identified'
177
                               ' for this {} instance'
178
                               .format(self.__class__.__name__))
179
180
    # There is a mistake in the article
181
    # it is written (section 5.1):
182
    # "To identify the gender subspace, we took the ten gender pair difference
183
    # vectors and computed its principal components (PCs)"
184
    # however in the source code:
185
    # https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/we.py#L235-L245
186
    def _identify_subspace_by_pca(self, definitional_pairs, n_components):
187
        matrix = []
188
189
        for word1, word2 in definitional_pairs:
190
            vector1 = normalize(self[word1])
191
            vector2 = normalize(self[word2])
192
193
            center = (vector1 + vector2) / 2
194
195
            matrix.append(vector1 - center)
196
            matrix.append(vector2 - center)
197
198
        pca = PCA(n_components=n_components)
199
        pca.fit(matrix)
200
201
        if self._verbose:
202
            table = enumerate(pca.explained_variance_ratio_, start=1)
203
            headers = ['Principal Component',
204
                       'Explained Variance Ratio']
205
            print(tabulate(table, headers=headers))
206
207
        return pca
208
209
    # TODO: add the SVD method from section 6 step 1
210
    # It seems there is a mistake there, I think it is the same as PCA
211
    # just with replacing it with SVD
212
    def _identify_direction(self, positive_end, negative_end,
213
                            definitional, method='pca'):
214
        if method not in DIRECTION_METHODS:
215
            raise ValueError('method should be one of {}, {} was given'.format(
216
                DIRECTION_METHODS, method))
217
218
        if positive_end == negative_end:
219
            raise ValueError('positive_end and negative_end'
220
                             'should be different, and not the same "{}"'
221
                             .format(positive_end))
222
        if self._verbose:
223
            print('Identify direction using {} method...'.format(method))
224
225
        direction = None
226
227
        if method == 'single':
228
            direction = normalize(normalize(self[definitional[0]])
229
                                  - normalize(self[definitional[1]]))
230
231
        elif method == 'sum':
232
            group1_sum_vector = np.sum([self[word]
233
                                        for word in definitional[0]], axis=0)
234
            group2_sum_vector = np.sum([self[word]
235
                                        for word in definitional[1]], axis=0)
236
237
            diff_vector = (normalize(group1_sum_vector)
238
                           - normalize(group2_sum_vector))
239
240
            direction = normalize(diff_vector)
241
242
        elif method == 'pca':
243
            pca = self._identify_subspace_by_pca(definitional, 10)
244
            if pca.explained_variance_ratio_[0] < FIRST_PC_THRESHOLD:
245
                raise RuntimeError('The Explained variance'
246
                                   'of the first principal component should be'
247
                                   'at least {}, but it is {}'
248
                                   .format(FIRST_PC_THRESHOLD,
249
                                           pca.explained_variance_ratio_[0]))
250
            direction = pca.components_[0]
251
252
            # if direction is opposite (e.g. we cannot control
253
            # what the PCA will return)
254
            ends_diff_projection = cosine_similarity((self[positive_end]
255
                                                      - self[negative_end]),
256
                                                     direction)
257
            if ends_diff_projection < 0:
258
                direction = -direction  # pylint: disable=invalid-unary-operand-type
259
260
        self.direction = direction
261
        self.positive_end = positive_end
262
        self.negative_end = negative_end
263
264
    def project_on_direction(self, word):
265
        """Project the normalized vector of the word on the direction.
266
267
        :param str word: The word tor project
268
        :return float: The projection scalar
269
        """
270
271
        self._is_direction_identified()
272
273
        vector = self[word]
274
        projection_score = self.model.cosine_similarities(self.direction,
275
                                                          [vector])[0]
276
        return projection_score
277
278
    def _calc_projection_scores(self, words):
279
        self._is_direction_identified()
280
281
        df = pd.DataFrame({'word': words})
282
283
        # TODO: maybe using cosine_similarities on all the vectors?
284
        # it might be faster
285
        df['projection'] = df['word'].apply(self.project_on_direction)
286
        df = df.sort_values('projection', ascending=False)
287
288
        return df
289
290
    def calc_projection_data(self, words):
291
        """
292
        Calculate projection, projected and rejected vectors of a words list.
293
294
        :param list words: List of words
295
        :return: :class:`pandas.DataFrame` of the projection,
296
                 projected and rejected vectors of the words list
297
        """
298
        projection_data = []
299
        for word in words:
300
            vector = self[word]
301
            projection = self.project_on_direction(word)
302
            normalized_vector = normalize(vector)
303
304
            (projection,
305
             projected_vector,
306
             rejected_vector) = project_params(normalized_vector,
307
                                               self.direction)
308
309
            projection_data.append({'word': word,
310
                                    'vector': vector,
311
                                    'projection': projection,
312
                                    'projected_vector': projected_vector,
313
                                    'rejected_vector': rejected_vector})
314
315
        return pd.DataFrame(projection_data)
316
317
    def plot_projection_scores(self, words, n_extreme=10,
318
                               ax=None, axis_projection_step=None):
319
        """Plot the projection scalar of words on the direction.
320
321
        :param list words: The words tor project
322
        :param int or None n_extreme: The number of extreme words to show
323
        :return: The ax object of the plot
324
        """
325
326
        self._is_direction_identified()
327
328
        projections_df = self._calc_projection_scores(words)
329
        projections_df['projection'] = projections_df['projection'].round(2)
330
331
        if n_extreme is not None:
332
            projections_df = take_two_sides_extreme_sorted(projections_df,
333
                                                           n_extreme=n_extreme)
334
335
        if ax is None:
336
            _, ax = plt.subplots(1)
337
338
        if axis_projection_step is None:
339
            axis_projection_step = 0.1
340
341
        cmap = plt.get_cmap('RdBu')
342
        projections_df['color'] = ((projections_df['projection'] + 0.5)
343
                                   .apply(cmap))
344
345
        most_extream_projection = (projections_df['projection']
346
                                   .abs()
347
                                   .max()
348
                                   .round(1))
349
350
        sns.barplot(x='projection', y='word', data=projections_df,
351
                    palette=projections_df['color'])
352
353
        plt.xticks(np.arange(-most_extream_projection,
354
                             most_extream_projection + axis_projection_step,
355
                             axis_projection_step))
356
        plt.title('← {} {} {} →'.format(self.negative_end,
357
                                        ' ' * 20,
358
                                        self.positive_end))
359
360
        plt.xlabel('Direction Projection')
361
        plt.ylabel('Words')
362
363
        return ax
364
365
    def plot_dist_projections_on_direction(self, word_groups, ax=None):
366
        """Plot the projection scalars distribution on the direction.
367
368
        :param dict word_groups word: The groups to projects
369
        :return float: The ax object of the plot
370
        """
371
372
        if ax is None:
373
            _, ax = plt.subplots(1)
374
375
        names = sorted(word_groups.keys())
376
377
        for name in names:
378
            words = word_groups[name]
379
            label = '{} (#{})'.format(name, len(words))
380
            vectors = [self[word] for word in words]
381
            projections = self.model.cosine_similarities(self.direction,
382
                                                         vectors)
383
            sns.distplot(projections, hist=False, label=label, ax=ax)
384
385
        plt.axvline(0, color='k', linestyle='--')
386
387
        plt.title('← {} {} {} →'.format(self.negative_end,
388
                                        ' ' * 20,
389
                                        self.positive_end))
390
        plt.xlabel('Direction Projection')
391
        plt.ylabel('Density')
392
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
393
394
        return ax
395
396
    @classmethod
397
    def _calc_bias_across_word_embeddings(cls,
398
                                          word_embedding_bias_dict,
399
                                          words):
400
        """
401
        Calculate to projections and rho of words for two word embeddings.
402
403
        :param dict word_embedding_bias_dict: ``WordsEmbeddingBias`` objects
404
                                               as values,
405
                                               and their names as keys.
406
        :param list words: Words to be projected.
407
        :return tuple: Projections and spearman rho.
408
        """
409
        # pylint: disable=W0212
410
        assert len(word_embedding_bias_dict) == 2, 'Support only in two'\
411
                                                    'word embeddings'
412
413
        intersection_words = [word for word in words
414
                              if all(word in web
415
                                     for web in (word_embedding_bias_dict
416
                                                 .values()))]
417
418
        projections = {name: web._calc_projection_scores(intersection_words)['projection']  # pylint: disable=C0301
419
                       for name, web in word_embedding_bias_dict.items()}
420
421
        df = pd.DataFrame(projections)
422
        df.index = intersection_words
423
424
        rho, _ = spearmanr(*df.transpose().values)
425
        return df, rho
426
427
    @classmethod
428
    def plot_bias_across_word_embeddings(cls, word_embedding_bias_dict,
429
                                         words, ax=None, scatter_kwargs=None):
430
        """
431
        Plot the projections of same words of two word mbeddings.
432
433
        :param dict word_embedding_bias_dict: ``WordsEmbeddingBias`` objects
434
                                               as values,
435
                                               and their names as keys.
436
        :param list words: Words to be projected.
437
        :param scatter_kwargs: Kwargs for matplotlib.pylab.scatter.
438
        :type scatter_kwargs: dict or None
439
        :return: The ax object of the plot
440
        """
441
        # pylint: disable=W0212
442
443
        df, rho = cls._calc_bias_across_word_embeddings(word_embedding_bias_dict,  # pylint: disable=C0301
444
                                                        words)
445
446
        if ax is None:
447
            _, ax = plt.subplots(1)
448
449
        if scatter_kwargs is None:
450
            scatter_kwargs = {}
451
452
        name1, name2 = word_embedding_bias_dict.keys()
453
454
        ax.scatter(x=name1, y=name2, data=df, **scatter_kwargs)
455
456
        plt.title('Bias Across Word Embeddings'
457
                  '(Spearman Rho = {:0.2f})'.format(rho))
458
459
        negative_end = word_embedding_bias_dict[name1].negative_end
460
        positive_end = word_embedding_bias_dict[name1].positive_end
461
        plt.xlabel('← {}     {}     {} →'.format(negative_end,
462
                                                 name1,
463
                                                 positive_end))
464
        plt.ylabel('← {}     {}     {} →'.format(negative_end,
465
                                                 name2,
466
                                                 positive_end))
467
468
        ax_min = round_to_extreme(df.values.min())
469
        ax_max = round_to_extreme(df.values.max())
470
        plt.xlim(ax_min, ax_max)
471
        plt.ylim(ax_min, ax_max)
472
473
        return ax
474
475
    # TODO: refactor for speed and clarity
476
    def generate_analogies(self, n_analogies=100, seed='ends',
477
                           multiple=False,
478
                           delta=1., restrict_vocab=30000,
479
                           unrestricted=False):
480
        """
481
        Generate analogies based on a seed vector.
482
483
        x - y ~ seed vector.
484
        or a:x::b:y when a-b ~ seed vector.
485
486
        The seed vector can be defined by two word ends,
487
        or by the bias direction.
488
489
        ``delta`` is used for semantically coherent. Default vale of 1
490
        corresponds to an angle <= pi/3.
491
492
493
        There is criticism regarding generating analogies
494
        when used with `unstricted=False` and not ignoring analogies
495
        with `match` column equal to `False`.
496
        Tolga's technique of generating analogies, as implemented in this
497
        method, is limited inherently to analogies with x != y, which may
498
        be force "fake" bias analogies.
499
500
        See:
501
502
        - Nissim, M., van Noord, R., van der Goot, R. (2019).
503
          `Fair is Better than Sensational: Man is to Doctor
504
          as Woman is to Doctor <https://arxiv.org/abs/1905.09866>`_.
505
506
        :param seed: The definition of the seed vector.
507
                     Either by a tuple of two word ends,
508
                     or by `'ends` for the pre-defined ends
509
                     or by `'direction'` for the pre-defined direction vector.
510
        :param int n_analogies: Number of analogies to generate.
511
        :param bool multiple: Whether to allow multiple appearances of a word
512
                              in the analogies.
513
        :param float delta: Threshold for semantic similarity.
514
                            The maximal distance between x and y.
515
        :param int restrict_vocab: The vocabulary size to use.
516
        :param bool unrestricted: Whether to validate the generated analogies
517
                                  with unrestricted `most_similar`.
518
        :return: Data Frame of analogies (x, y), their distances,
519
                 and their cosine similarity scores
520
        """
521
        # pylint: disable=C0301,R0914
522
523
        if not unrestricted:
524
            warnings.warn('Not Using unrestricted most_similar '
525
                          'may introduce fake biased analogies.')
526
527
        (seed_vector,
528
         positive_end,
529
         negative_end) = get_seed_vector(seed, self)
530
531
        restrict_vocab_vectors = self.model.vectors[:restrict_vocab]
532
533
        normalized_vectors = (restrict_vocab_vectors
534
                              / np.linalg.norm(restrict_vocab_vectors, axis=1)[:, None])
535
536
        pairs_distances = euclidean_distances(normalized_vectors, normalized_vectors)
537
538
        # `pairs_distances` must be not-equal to zero
539
        # otherwise, x-y will be the zero vector, and every cosine similarity
540
        # will be equal to zero.
541
        # This cause to the **limitation** of this method which enforce a not-same
542
        # words for x and y.
543
        pairs_mask = (pairs_distances < delta) & (pairs_distances != 0)
544
545
        pairs_indices = np.array(np.nonzero(pairs_mask)).T
546
        x_vectors = np.take(normalized_vectors, pairs_indices[:, 0], axis=0)
547
        y_vectors = np.take(normalized_vectors, pairs_indices[:, 1], axis=0)
548
549
        x_minus_y_vectors = x_vectors - y_vectors
550
        normalized_x_minus_y_vectors = (x_minus_y_vectors
551
                                        / np.linalg.norm(x_minus_y_vectors, axis=1)[:, None])
552
553
        cos_distances = normalized_x_minus_y_vectors @ seed_vector
554
555
        sorted_cos_distances_indices = np.argsort(cos_distances)[::-1]
556
557
        sorted_cos_distances_indices_iter = iter(sorted_cos_distances_indices)
558
559
        analogies = []
560
        generated_words_x = set()
561
        generated_words_y = set()
562
563
        while len(analogies) < n_analogies:
564
            cos_distance_index = next(sorted_cos_distances_indices_iter)
565
            paris_index = pairs_indices[cos_distance_index]
566
            word_x, word_y = [self.model.index2word[index]
567
                              for index in paris_index]
568
569
            if multiple or (not multiple
570
                            and (word_x not in generated_words_x
571
                                 and word_y not in generated_words_y)):
572
573
                analogy = ({positive_end: word_x,
574
                            negative_end: word_y,
575
                            'score': cos_distances[cos_distance_index],
576
                            'distance': pairs_distances[tuple(paris_index)]})
577
578
                generated_words_x.add(word_x)
579
                generated_words_y.add(word_y)
580
581
                if unrestricted:
582
                    most_x = next(word
583
                                  for word, _ in most_similar(self.model,
584
                                                              [word_y, positive_end],
585
                                                              [negative_end]))
586
                    most_y = next(word
587
                                  for word, _ in most_similar(self.model,
588
                                                              [word_x, negative_end],
589
                                                              [positive_end]))
590
591
                    analogy['most_x'] = most_x
592
                    analogy['most_y'] = most_y
593
                    analogy['match'] = ((word_x == most_x)
594
                                        and (word_y == most_y))
595
596
                analogies.append(analogy)
597
598
        df = pd.DataFrame(analogies)
599
600
        columns = [positive_end, negative_end, 'distance', 'score']
601
602
        if unrestricted:
603
            columns.extend(['most_x', 'most_y', 'match'])
604
605
        df = df[columns]
606
607
        return df
608
609
    def calc_direct_bias(self, neutral_words, c=None):
610
        """Calculate the direct bias.
611
612
        Based on the projection of neutral words on the direction.
613
614
        :param list neutral_words: List of neutral words
615
        :param c: Strictness of bias measuring
616
        :type c: float or None
617
        :return: The direct bias
618
        """
619
620
        if c is None:
621
            c = 1
622
623
        projections = self._calc_projection_scores(neutral_words)['projection']
624
        direct_bias_terms = np.abs(projections) ** c
625
        direct_bias = direct_bias_terms.sum() / len(neutral_words)
626
627
        return direct_bias
628
629
    def calc_indirect_bias(self, word1, word2):
630
        """Calculate the indirect bias between two words.
631
632
        Based on the amount of shared projection of the words on the direction.
633
634
        Also called PairBias.
635
        :param str word1: First word
636
        :param str word2: Second word
637
        :type c: float or None
638
        :return The indirect bias between the two words
639
        """
640
641
        self._is_direction_identified()
642
643
        vector1 = normalize(self[word1])
644
        vector2 = normalize(self[word2])
645
646
        perpendicular_vector1 = reject_vector(vector1, self.direction)
647
        perpendicular_vector2 = reject_vector(vector2, self.direction)
648
649
        inner_product = vector1 @ vector2
650
        perpendicular_similarity = cosine_similarity(perpendicular_vector1,
651
                                                     perpendicular_vector2)
652
653
        indirect_bias = ((inner_product - perpendicular_similarity)
654
                         / inner_product)
655
        return indirect_bias
656
657
    def generate_closest_words_indirect_bias(self,
658
                                             neutral_positive_end,
659
                                             neutral_negative_end,
660
                                             words=None, n_extreme=5):
661
        """
662
        Generate closest words to a neutral direction and their indirect bias.
663
664
        The direction of the neutral words is used to find
665
        the most extreme words.
666
        The indirect bias is calculated between the most extreme words
667
        and the closest end.
668
669
        :param str neutral_positive_end: A word that define the positive side
670
                                         of the neutral direction.
671
        :param str neutral_negative_end: A word that define the negative side
672
                                         of the neutral direction.
673
        :param list words: List of words to project on the neutral direction.
674
        :param int n_extreme: The number for the most extreme words
675
                              (positive and negative) to show.
676
        :return: Data Frame of the most extreme words
677
                 with their projection scores and indirect biases.
678
        """
679
680
        neutral_direction = normalize(self[neutral_positive_end]
681
                                      - self[neutral_negative_end])
682
683
        vectors = [normalize(self[word]) for word in words]
684
        df = (pd.DataFrame([{'word': word,
685
                             'projection': vector @ neutral_direction}
686
                            for word, vector in zip(words, vectors)])
687
              .sort_values('projection', ascending=False))
688
689
        df = take_two_sides_extreme_sorted(df, n_extreme,
690
                                           'end',
691
                                           neutral_positive_end,
692
                                           neutral_negative_end)
693
694
        df['indirect_bias'] = df.apply(lambda r:
695
                                       self.calc_indirect_bias(r['word'],
696
                                                               r['end']),
697
                                       axis=1)
698
699
        df = df.set_index(['end', 'word'])
700
        df = df[['projection', 'indirect_bias']]
701
702
        return df
703
704
    def _extract_neutral_words(self, specific_words):
705
        extended_specific_words = set()
706
707
        # because or specific_full data was trained on partial word embedding
708
        for word in specific_words:
709
            extended_specific_words.add(word)
710
            extended_specific_words.add(word.lower())
711
            extended_specific_words.add(word.upper())
712
            extended_specific_words.add(word.title())
713
714
        neutral_words = [word for word in self.model.vocab
715
                         if word not in extended_specific_words]
716
717
        return neutral_words
718
719
    def _neutralize(self, neutral_words):
720
        self._is_direction_identified()
721
722
        if self._verbose:
723
            neutral_words_iter = tqdm(neutral_words)
724
        else:
725
            neutral_words_iter = iter(neutral_words)
726
727
        for word in neutral_words_iter:
728
            neutralized_vector = reject_vector(self[word],
729
                                               self.direction)
730
            update_word_vector(self.model, word, neutralized_vector)
731
732
        self.model.init_sims(replace=True)
733
734
    def _equalize(self, equality_sets):
735
        # pylint: disable=R0914
736
737
        self._is_direction_identified()
738
739
        if self._verbose:
740
            words_data = []
741
742
        for equality_set_index, equality_set_words in enumerate(equality_sets):
743
            equality_set_vectors = [normalize(self[word])
744
                                    for word in equality_set_words]
745
            center = np.mean(equality_set_vectors, axis=0)
746
            (projected_center,
747
             rejected_center) = project_reject_vector(center,
748
                                                      self.direction)
749
            scaling = np.sqrt(1 - np.linalg.norm(rejected_center)**2)
750
751
            for word, vector in zip(equality_set_words, equality_set_vectors):
752
                projected_vector = project_vector(vector, self.direction)
753
754
                projected_part = normalize(projected_vector - projected_center)
755
756
                # In the code it is different of Bolukbasi
757
                # It behaves the same only for equality_sets
758
                # with size of 2 (pairs) - not sure!
759
                # However, my code is the same as the article
760
                # equalized_vector = rejected_center + scaling * self.direction
761
                # https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/debias.py#L36-L37
762
                # For pairs, projected_part_vector1 == -projected_part_vector2,
763
                # and this is the same as
764
                # projected_part_vector1 == self.direction
765
                equalized_vector = rejected_center + scaling * projected_part
766
767
                update_word_vector(self.model, word, equalized_vector)
768
769
                if self._verbose:
770
                    words_data.append({
0 ignored issues
show
introduced by
The variable words_data does not seem to be defined in case self._verbose on line 739 is False. Are you sure this can never be the case?
Loading history...
771
                        'equality_set_index': equality_set_index,
772
                        'word': word,
773
                        'scaling': scaling,
774
                        'projected_scalar': vector @ self.direction,
775
                        'equalized_projected_scalar': (equalized_vector
776
                                                       @ self.direction),
777
                    })
778
779
        if self._verbose:
780
            print('Equalize Words Data '
781
                  '(all equal for 1-dim bias space (direction):')
782
            words_data_df = (pd.DataFrame(words_data)
783
                             .set_index(['equality_set_index', 'word']))
784
            print(tabulate(words_data_df, headers='keys'))
785
786
        self.model.init_sims(replace=True)
787
788
    def debias(self, method='hard', neutral_words=None, equality_sets=None,
789
               inplace=True):
790
        """Debias the word embedding.
791
792
        :param str method: The method of debiasing.
793
        :param list neutral_words: List of neutral words
794
                                   for the neutralize step
795
        :param list equality_sets: List of equality sets,
796
                                   for the equalize step.
797
                                   The sets represent the direction.
798
        :param bool inplace: Whether to debias the object inplace
799
                             or return a new one
800
801
        .. warning::
802
803
          After calling `debias`,
804
          all the vectors of the word embedding
805
          will be normalized to unit length.
806
807
        """
808
809
        # pylint: disable=W0212
810
        if inplace:
811
            bias_word_embedding = self
812
        else:
813
            bias_word_embedding = copy.deepcopy(self)
814
815
        if method not in DEBIAS_METHODS:
816
            raise ValueError('method should be one of {}, {} was given'.format(
817
                DEBIAS_METHODS, method))
818
819
        if method in ['hard', 'neutralize']:
820
            if self._verbose:
821
                print('Neutralize...')
822
            bias_word_embedding._neutralize(neutral_words)
823
824
        if method == 'hard':
825
            if self._verbose:
826
                print('Equalize...')
827
            bias_word_embedding._equalize(equality_sets)
828
829
        if inplace:
830
            return None
831
        else:
832
            return bias_word_embedding
833
834
    def evaluate_word_embedding(self,
835
                                kwargs_word_pairs=None,
836
                                kwargs_word_analogies=None):
837
        """
838
        Evaluate word pairs tasks and word analogies tasks.
839
840
        :param model: Word embedding.
841
        :param kwargs_word_pairs: Kwargs for
842
                                  evaluate_word_pairs
843
                                  method.
844
        :type kwargs_word_pairs: dict or None
845
        :param kwargs_word_analogies: Kwargs for
846
                                      evaluate_word_analogies
847
                                      method.
848
        :type evaluate_word_analogies: dict or None
849
        :return: Tuple of :class:`pandas.DataFrame`
850
                 for the evaluation results.
851
        """
852
853
        return evaluate_word_embedding(self.model,
854
                                       kwargs_word_pairs,
855
                                       kwargs_word_analogies)
856
857
    def learn_full_specific_words(self, seed_specific_words,
858
                                  max_non_specific_examples=None, debug=None):
859
        """Learn specific words given a list of seed specific wordsself.
860
861
        Using Linear SVM.
862
863
        :param list seed_specific_words: List of seed specific words
864
        :param int max_non_specific_examples: The number of non-specific words
865
                                              to sample for training
866
        :return: List of learned specific words and the classifier object
867
        """
868
869
        if debug is None:
870
            debug = False
871
872
        if max_non_specific_examples is None:
873
            max_non_specific_examples = MAX_NON_SPECIFIC_EXAMPLES
874
875
        data = []
876
        non_specific_example_count = 0
877
878
        for word in self.model.vocab:
879
            is_specific = word in seed_specific_words
880
881
            if not is_specific:
882
                non_specific_example_count += 1
883
                if non_specific_example_count <= max_non_specific_examples:
884
                    data.append((self[word], is_specific))
885
            else:
886
                data.append((self[word], is_specific))
887
888
        np.random.seed(RANDOM_STATE)
889
        np.random.shuffle(data)
890
891
        X, y = zip(*data)
892
893
        X = np.array(X)
894
        X /= np.linalg.norm(X, axis=1)[:, None]
895
896
        y = np.array(y).astype('int')
897
898
        clf = LinearSVC(C=1, class_weight='balanced',
899
                        random_state=RANDOM_STATE)
900
901
        clf.fit(X, y)
902
903
        full_specific_words = []
904
        for word in self.model.vocab:
905
            vector = [normalize(self[word])]
906
            if clf.predict(vector):
907
                full_specific_words.append(word)
908
909
        if not debug:
910
            return full_specific_words, clf
911
912
        return full_specific_words, clf, X, y
913
914
    def _plot_most_biased_one_cluster(self,
915
                                      most_biased_neutral_words, y_bias,
916
                                      random_state=1, ax=None):
917
        most_biased_vectors = [self.model[word]
918
                               for word in most_biased_neutral_words]
919
920
        return plot_clustering_as_classification(most_biased_vectors,
921
                                                 y_bias,
922
                                                 random_state=random_state,
923
                                                 ax=ax)
924
925
    def compute_factual_association(self, factual_properity):
926
        """Compute association of a factual property to the projection.
927
928
        Inspired by WEFAT (Word-Embedding Factual Association Test),
929
        but it is not the same:
930
        - Caliskan, A., Bryson, J. J., & Narayanan, A. (2017).
931
        `Semantics derived automatically
932
        from language corpora contain human-like biases
933
        <http://opus.bath.ac.uk/55288/>`_.
934
        Science, 356(6334), 183-186.
935
936
        In a future version, the WEFAT will also be implemented.
937
938
        If a word doesn't exist in the word embedding,
939
        then it will be filtered out.
940
941
        For example, in :class:`ethically.we.bias.GenderBiasWE`,
942
        the defuat factual property is the percentage of female
943
        in various occupations
944
        from the Labor Force Statistics of 2017 Population Survey,
945
        Taken from: https://arxiv.org/abs/1804.06876
946
947
        :param dict factual_properity: Dictionary of words
948
                                       and their factual values.
949
        :return: Pearson r, pvalue and the words with their
950
                 associated factual values
951
                 and their projection on the bias direction.
952
        """
953
954
        points = {word: (value, self.project_on_direction(word))
955
                  for word, value in factual_properity.items()
956
                  if word in self.model}
957
958
        x, y = zip(*points.values())
959
960
        return pearsonr(x, y), points
961
962
    def plot_factual_association(self, factual_properity, ax=None):
963
        """Plot association of a factual property to the projection.
964
965
        See: :meth:`BiasWordEmbedding.compute_factual_association`
966
967
        :param dict factual_properity: Dictionary of words
968
                                       and their factual values.
969
        """
970
971
        result = self.compute_factual_association(factual_properity)
972
973
        (r, pvalue), points = result
974
        x, y = zip(*points.values())
975
976
        if ax is None:
977
            _, ax = plt.subplots(1)
978
979
        ax.scatter(x, y)
980
981
        plt.title('Assocsion between Factual Property'
982
                  'and Projection on Direction '
983
                  '(Pearson R = {:0.2f} ; pvalue={:0.2f})'
984
                  .format(r, pvalue))
985
986
        plt.xlabel('Factual Property')
987
        plt.ylabel('Projection on Direction')
988
989
        return ax
990
991
    @staticmethod
992
    def plot_most_biased_clustering(biased, debiased,
993
                                    seed='ends', n_extreme=500,
994
                                    random_state=1):
995
        """Plot clustering as classification of biased neutral words.
996
997
        :param biased: Biased word embedding of
998
                       :class:`~ethically.we.bias.BiasWordEmbedding`.
999
        :param debiased: Debiased word embedding of
1000
                         :class:`~ethically.we.bias.BiasWordEmbedding`.
1001
        :param seed: The definition of the seed vector.
1002
                    Either by a tuple of two word ends,
1003
                    or by `'ends` for the pre-defined ends
1004
                    or by `'direction'` for
1005
                    the pre-defined direction vector.
1006
        :param n_extrem: The number of extreme biased
1007
                         neutral words to use.
1008
        :return: Tuple of list of ax objects of the plot,
1009
                 and a dictionary with the most positive
1010
                 and negative words.
1011
1012
        Based on:
1013
1014
        - Gonen, H., & Goldberg, Y. (2019).
1015
          `Lipstick on a Pig:
1016
          Debiasing Methods Cover up Systematic Gender Biases
1017
          in Word Embeddings But do not Remove
1018
          Them <https://arxiv.org/abs/1903.03862>`_.
1019
          arXiv preprint arXiv:1903.03862.
1020
1021
        - https://github.com/gonenhila/gender_bias_lipstick
1022
        """
1023
        # pylint: disable=protected-access,too-many-locals,line-too-long
1024
1025
        assert biased.positive_end == debiased.positive_end, \
1026
            'Postive ends should be the same.'
1027
        assert biased.negative_end == debiased.negative_end, \
1028
            'Negative ends should be the same.'
1029
1030
        seed_vector, _, _ = get_seed_vector(seed, biased)
1031
1032
        neutral_words = biased._data['neutral_words']
1033
        neutral_word_vectors = (biased[word] for word in neutral_words)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable word does not seem to be defined.
Loading history...
1034
        neutral_word_projections = [(normalize(vector) @ seed_vector, word)
1035
                                    for word, vector
1036
                                    in zip(neutral_words,
1037
                                           neutral_word_vectors)]
1038
1039
        neutral_word_projections.sort()
1040
1041
        _, most_negative_words = zip(*neutral_word_projections[:n_extreme])
1042
        _, most_positive_words = zip(*neutral_word_projections[-n_extreme:])
1043
1044
        most_biased_neutral_words = most_negative_words + most_positive_words
1045
1046
        y_bias = [False] * n_extreme + [True] * n_extreme
1047
1048
        _, axes = plt.subplots(1, 2, figsize=(20, 5))
1049
1050
        acc_biased = biased._plot_most_biased_one_cluster(most_biased_neutral_words,
1051
                                                          y_bias,
1052
                                                          random_state=random_state,
1053
                                                          ax=axes[0])
1054
        axes[0].set_title('Biased - Accuracy={}'.format(acc_biased))
1055
1056
        acc_debiased = debiased._plot_most_biased_one_cluster(most_biased_neutral_words,
1057
                                                              y_bias,
1058
                                                              random_state=random_state,
1059
                                                              ax=axes[1])
1060
        axes[1].set_title('Debiased - Accuracy={}'.format(acc_debiased))
1061
1062
        return axes, {biased.positive_end: most_positive_words,
1063
                      biased.negative_end: most_negative_words}
1064
1065
1066
class GenderBiasWE(BiasWordEmbedding):
1067
    """Measure and adjust the Gender Bias in English Word Embedding.
1068
1069
    :param model: Word embedding model of ``gensim.model.KeyedVectors``
1070
    :param bool only_lower: Whether the word embedding contrains
1071
                            only lower case words
1072
    :param bool verbose: Set verbosity
1073
    :param bool to_normalize: Whether to normalize all the vectors
1074
                              (recommended!)
1075
    """
1076
1077
    def __init__(self, model, only_lower=False, verbose=False,
1078
                 identify_direction=True, to_normalize=True):
1079
        super().__init__(model=model,
1080
                         only_lower=only_lower,
1081
                         verbose=verbose,
1082
                         to_normalize=True)
1083
        self._initialize_data()
1084
        if identify_direction:
1085
            self._identify_direction('she', 'he',
1086
                                     self._data['definitional_pairs'],
1087
                                     'pca')
1088
1089
    def _initialize_data(self):
1090
        self._data = copy.deepcopy(BOLUKBASI_DATA['gender'])
1091
1092
        if not self.only_lower:
1093
            self._data['specific_full_with_definitional_equalize'] = \
1094
                generate_words_forms(self
1095
                                     ._data['specific_full_with_definitional_equalize'])  # pylint: disable=C0301
1096
1097
        for key in self._data['word_group_keys']:
1098
            self._data[key] = (self._filter_words_by_model(self
1099
                                                           ._data[key]))
1100
1101
        self._data['neutral_words'] = self._extract_neutral_words(self
1102
                                                                  ._data['specific_full_with_definitional_equalize'])  # pylint: disable=C0301
1103
        self._data['neutral_words'].sort()
1104
        self._data['word_group_keys'].append('neutral_words')
1105
1106
    def plot_projection_scores(self, words='professions', n_extreme=10,
1107
                               ax=None, axis_projection_step=None):
1108
        if words == 'professions':
1109
            words = self._data['profession_names']
1110
1111
        return super().plot_projection_scores(words, n_extreme,
1112
                                              ax, axis_projection_step)
1113
1114
    def plot_dist_projections_on_direction(self, word_groups='bolukbasi',
1115
                                           ax=None):
1116
        if word_groups == 'bolukbasi':
1117
            word_groups = {key: self._data[key]
1118
                           for key in self._data['word_group_keys']}
1119
1120
        return super().plot_dist_projections_on_direction(word_groups, ax)
1121
1122
    @classmethod
1123
    def plot_bias_across_word_embeddings(cls, word_embedding_bias_dict,
1124
                                         ax=None, scatter_kwargs=None):
1125
        # pylint: disable=W0221
1126
        words = BOLUKBASI_DATA['gender']['neutral_profession_names']
1127
        # TODO: is it correct for inheritance of class method?
1128
        super(cls, cls).plot_bias_across_word_embeddings(word_embedding_bias_dict,  # pylint: disable=C0301
1129
                                                         words,
1130
                                                         ax,
1131
                                                         scatter_kwargs)
1132
1133
    def calc_direct_bias(self, neutral_words='professions', c=None):
1134
        if isinstance(neutral_words, str) and neutral_words == 'professions':
1135
            return super().calc_direct_bias(
1136
                self._data['neutral_profession_names'], c)
1137
        else:
1138
            return super().calc_direct_bias(neutral_words)
1139
1140
    def generate_closest_words_indirect_bias(self,
1141
                                             neutral_positive_end,
1142
                                             neutral_negative_end,
1143
                                             words='professions', n_extreme=5):
1144
        # pylint: disable=C0301
1145
1146
        if words == 'professions':
1147
            words = self._data['profession_names']
1148
1149
        return super().generate_closest_words_indirect_bias(neutral_positive_end,
1150
                                                            neutral_negative_end,
1151
                                                            words,
1152
                                                            n_extreme=n_extreme)
1153
1154
    def debias(self, method='hard', neutral_words=None, equality_sets=None,
1155
               inplace=True):
1156
        # pylint: disable=C0301
1157
        if method in ['hard', 'neutralize']:
1158
            if neutral_words is None:
1159
                neutral_words = self._data['neutral_words']
1160
1161
        if method == 'hard' and equality_sets is None:
1162
            equality_sets = self._data['definitional_pairs']
1163
1164
            if not self.only_lower:
1165
                assert all(len(equality_set) == 2
1166
                           for equality_set in equality_sets), 'currently supporting only equality pairs if only_lower is False'
1167
                # TODO: refactor
1168
                equality_sets = {(candidate1, candidate2)
1169
                                 for word1, word2 in equality_sets
1170
                                 for candidate1, candidate2 in zip(generate_one_word_forms(word1),
1171
                                                                   generate_one_word_forms(word2))}
1172
1173
        return super().debias(method, neutral_words, equality_sets,
1174
                              inplace)
1175
1176
    def learn_full_specific_words(self, seed_specific_words='bolukbasi',
1177
                                  max_non_specific_examples=None,
1178
                                  debug=None):
1179
        if seed_specific_words == 'bolukbasi':
1180
            seed_specific_words = self._data['specific_seed']
1181
1182
        return super().learn_full_specific_words(seed_specific_words,
1183
                                                 max_non_specific_examples,
1184
                                                 debug)
1185
1186
    def compute_factual_association(self,
1187
                                    factual_properity=OCCUPATION_FEMALE_PRECENTAGE):  # pylint: disable=line-too-long
1188
        return super().compute_factual_association(factual_properity)
1189
1190
    def plot_factual_association(self,
1191
                                 factual_properity=OCCUPATION_FEMALE_PRECENTAGE,  # pylint: disable=line-too-long
1192
                                 ax=None):
1193
        return super().plot_factual_association(factual_properity, ax)
1194