BiasWordEmbedding._calc_bias_across_word_embeddings()   A
last analyzed

Complexity

Conditions 1

Size

Total Lines 30
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 15
nop 3
dl 0
loc 30
rs 9.65
c 0
b 0
f 0
1
# pylint: disable=too-many-lines
2
"""
3
Measuring and adjusting bias in word embedding by Bolukbasi (2016).
4
5
References:
6
    - Bolukbasi, T., Chang, K. W., Zou, J. Y., Saligrama, V.,
7
      & Kalai, A. T. (2016).
8
      `Man is to computer programmer as woman is to homemaker?
9
      debiasing word embeddings <https://arxiv.org/abs/1607.06520>`_.
10
      In Advances in neural information processing systems
11
      (pp. 4349-4357).
12
13
    - The code and data is based on the GitHub repository:
14
      https://github.com/tolga-b/debiaswe (MIT License).
15
16
    - Gonen, H., & Goldberg, Y. (2019).
17
      `Lipstick on a Pig:
18
      Debiasing Methods Cover up Systematic Gender Biases
19
      in Word Embeddings But do not Remove Them
20
      <https://arxiv.org/abs/1903.03862>`_.
21
      arXiv preprint arXiv:1903.03862.
22
23
    - Nissim, M., van Noord, R., van der Goot, R. (2019).
24
      `Fair is Better than Sensational: Man is to Doctor
25
      as Woman is to Doctor <https://arxiv.org/abs/1905.09866>`_.
26
27
Usage
28
~~~~~
29
30
.. code:: python
31
32
   >>> from responsibly.we import GenderBiasWE
33
   >>> from gensim import downloader
34
   >>> w2v_model = downloader.load('word2vec-google-news-300')
35
   >>> w2v_gender_bias_we = GenderBiasWE(w2v_model)
36
   >>> w2v_gender_bias_we.calc_direct_bias()
37
   0.07307904249481942
38
   >>> w2v_gender_bias_we.debias()
39
   >>> w2v_gender_bias_we.calc_direct_bias()
40
   1.7964246601064155e-09
41
42
Types of Bias
43
~~~~~~~~~~~~~
44
45
Direct Bias
46
^^^^^^^^^^^
47
48
1. Associations
49
    Words that are closer to one end (e.g., *he*) than to
50
    the other end (*she*).
51
    For example, occupational stereotypes (page 7).
52
    Calculated by
53
    :meth:`~responsibly.we.bias.BiasWordEmbedding.calc_direct_bias`.
54
55
2. Analogies
56
    Analogies of *he:x::she:y*.
57
    For example analogies exhibiting stereotypes (page 7).
58
    Generated by
59
    :meth:`~responsibly.we.bias.BiasWordEmbedding.generate_analogies`.
60
61
62
Indirect Bias
63
^^^^^^^^^^^^^
64
65
Projection of a neutral words into a two neutral words direction
66
is explained in a great portion by a shared bias direction projection.
67
68
Calculated by
69
:meth:`~responsibly.we.bias.BiasWordEmbedding.calc_indirect_bias`
70
and
71
:meth:`~responsibly.we.bias.GenderBiasWE.generate_closest_words_indirect_bias`.
72
73
"""
74
75
import copy
76
import warnings
77
78
import matplotlib.pylab as plt
79
import numpy as np
80
import pandas as pd
81
import seaborn as sns
82
from scipy.stats import pearsonr, spearmanr
83
from sklearn.decomposition import PCA
84
from sklearn.metrics.pairwise import euclidean_distances
85
from sklearn.svm import LinearSVC
86
from tabulate import tabulate
87
from tqdm import tqdm
88
89
from responsibly.consts import RANDOM_STATE
90
from responsibly.utils import _warning_setup
91
from responsibly.we.benchmark import evaluate_word_embedding
92
from responsibly.we.data import BOLUKBASI_DATA, OCCUPATION_FEMALE_PRECENTAGE
93
from responsibly.we.utils import (
94
    assert_gensim_keyed_vectors, cosine_similarity, generate_one_word_forms,
95
    generate_words_forms, get_seed_vector, most_similar, normalize,
96
    plot_clustering_as_classification, project_params, project_reject_vector,
97
    project_vector, reject_vector, round_to_extreme,
98
    take_two_sides_extreme_sorted, update_word_vector,
99
)
100
101
102
DIRECTION_METHODS = ['single', 'sum', 'pca']
103
DEBIAS_METHODS = ['neutralize', 'hard', 'soft']
104
FIRST_PC_THRESHOLD = 0.5
105
MAX_NON_SPECIFIC_EXAMPLES = 1000
106
107
__all__ = ['GenderBiasWE', 'BiasWordEmbedding']
108
109
_warning_setup()
110
111
112
class BiasWordEmbedding:
113
    """Measure and adjust a bias in English word embedding.
114
115
    :param model: Word embedding model of ``gensim.model.KeyedVectors``
116
    :param bool only_lower: Whether the word embedding contrains
117
                            only lower case words
118
    :param bool verbose: Set verbosity
119
    :param bool to_normalize: Whether to normalize all the vectors
120
                              (recommended!)
121
    """
122
123
    def __init__(self, model, only_lower=False, verbose=False,
124
                 identify_direction=False, to_normalize=True):
125
        # pylint: disable=undefined-variable
126
127
        assert_gensim_keyed_vectors(model)
128
129
        # TODO: this is bad Python, ask someone about it
130
        # probably should be a better design
131
        # identify_direction doesn't have any meaning
132
        # for the class BiasWordEmbedding
133
        # The goal is to force this interfeace of sub-classes.
134
        if self.__class__ == __class__ and identify_direction is not False:
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable __class__ does not seem to be defined.
Loading history...
135
            raise ValueError('identify_direction must be False'
136
                             ' for an instance of {}'
137
                             .format(__class__))
138
139
        self.model = model
140
141
        # TODO: write unitest for when it is False
142
        self.only_lower = only_lower
143
144
        self._verbose = verbose
145
146
        self.direction = None
147
        self.positive_end = None
148
        self.negative_end = None
149
150
        if to_normalize:
151
            self.model.init_sims(replace=True)
152
153
    def __copy__(self):
154
        bias_word_embedding = self.__class__(self.model,
155
                                             self.only_lower,
156
                                             self._verbose,
157
                                             identify_direction=False)
158
        bias_word_embedding.direction = copy.deepcopy(self.direction)
159
        bias_word_embedding.positive_end = copy.deepcopy(self.positive_end)
160
        bias_word_embedding.negative_end = copy.deepcopy(self.negative_end)
161
        return bias_word_embedding
162
163
    def __deepcopy__(self, memo):
164
        bias_word_embedding = copy.copy(self)
165
        bias_word_embedding.model = copy.deepcopy(bias_word_embedding.model)
166
        return bias_word_embedding
167
168
    def __getitem__(self, key):
169
        return self.model[key]
170
171
    def __contains__(self, item):
172
        return item in self.model
173
174
    def _filter_words_by_model(self, words):
175
        return [word for word in words if word in self]
176
177
    def _is_direction_identified(self):
178
        if self.direction is None:
179
            raise RuntimeError('The direction was not identified'
180
                               ' for this {} instance'
181
                               .format(self.__class__.__name__))
182
183
    # There is a mistake in the article
184
    # it is written (section 5.1):
185
    # "To identify the gender subspace, we took the ten gender pair difference
186
    # vectors and computed its principal components (PCs)"
187
    # however in the source code:
188
    # https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/we.py#L235-L245
189
    def _identify_subspace_by_pca(self, definitional_pairs, n_components):
190
        matrix = []
191
192
        for word1, word2 in definitional_pairs:
193
            vector1 = normalize(self[word1])
194
            vector2 = normalize(self[word2])
195
196
            center = (vector1 + vector2) / 2
197
198
            matrix.append(vector1 - center)
199
            matrix.append(vector2 - center)
200
201
        pca = PCA(n_components=n_components)
202
        pca.fit(matrix)
203
204
        if self._verbose:
205
            table = enumerate(pca.explained_variance_ratio_, start=1)
206
            headers = ['Principal Component',
207
                       'Explained Variance Ratio']
208
            print(tabulate(table, headers=headers))
209
210
        return pca
211
212
    # TODO: add the SVD method from section 6 step 1
213
    # It seems there is a mistake there, I think it is the same as PCA
214
    # just with replacing it with SVD
215
    def _identify_direction(self, positive_end, negative_end,
216
                            definitional, method='pca'):
217
        if method not in DIRECTION_METHODS:
218
            raise ValueError('method should be one of {}, {} was given'.format(
219
                DIRECTION_METHODS, method))
220
221
        if positive_end == negative_end:
222
            raise ValueError('positive_end and negative_end'
223
                             'should be different, and not the same "{}"'
224
                             .format(positive_end))
225
        if self._verbose:
226
            print('Identify direction using {} method...'.format(method))
227
228
        direction = None
229
230
        if method == 'single':
231
            if self._verbose:
232
                print('Positive definitional end:', definitional[0])
233
                print('Negative definitional end:', definitional[1])
234
            direction = normalize(normalize(self[definitional[0]])
235
                                  - normalize(self[definitional[1]]))
236
237
        elif method == 'sum':
238
            group1_sum_vector = np.sum([self[word]
239
                                        for word in definitional[0]], axis=0)
240
            group2_sum_vector = np.sum([self[word]
241
                                        for word in definitional[1]], axis=0)
242
243
            diff_vector = (normalize(group1_sum_vector)
244
                           - normalize(group2_sum_vector))
245
246
            direction = normalize(diff_vector)
247
248
        elif method == 'pca':
249
            pca = self._identify_subspace_by_pca(definitional, 10)
250
            if pca.explained_variance_ratio_[0] < FIRST_PC_THRESHOLD:
251
                raise RuntimeError('The Explained variance'
252
                                   'of the first principal component should be'
253
                                   'at least {}, but it is {}'
254
                                   .format(FIRST_PC_THRESHOLD,
255
                                           pca.explained_variance_ratio_[0]))
256
            direction = pca.components_[0]
257
258
            # if direction is opposite (e.g. we cannot control
259
            # what the PCA will return)
260
            ends_diff_projection = cosine_similarity((self[positive_end]
261
                                                      - self[negative_end]),
262
                                                     direction)
263
            if ends_diff_projection < 0:
264
                direction = -direction  # pylint: disable=invalid-unary-operand-type
265
266
        self.direction = direction
267
        self.positive_end = positive_end
268
        self.negative_end = negative_end
269
270
    def project_on_direction(self, word):
271
        """Project the normalized vector of the word on the direction.
272
273
        :param str word: The word tor project
274
        :return float: The projection scalar
275
        """
276
277
        self._is_direction_identified()
278
279
        vector = self[word]
280
        projection_score = self.model.cosine_similarities(self.direction,
281
                                                          [vector])[0]
282
        return projection_score
283
284
    def _calc_projection_scores(self, words):
285
        self._is_direction_identified()
286
287
        df = pd.DataFrame({'word': words})
288
289
        # TODO: maybe using cosine_similarities on all the vectors?
290
        # it might be faster
291
        df['projection'] = df['word'].apply(self.project_on_direction)
292
        df = df.sort_values('projection', ascending=False)
293
294
        return df
295
296
    def calc_projection_data(self, words):
297
        """
298
        Calculate projection, projected and rejected vectors of a words list.
299
300
        :param list words: List of words
301
        :return: :class:`pandas.DataFrame` of the projection,
302
                 projected and rejected vectors of the words list
303
        """
304
        projection_data = []
305
        for word in words:
306
            vector = self[word]
307
            projection = self.project_on_direction(word)
308
            normalized_vector = normalize(vector)
309
310
            (projection,
311
             projected_vector,
312
             rejected_vector) = project_params(normalized_vector,
313
                                               self.direction)
314
315
            projection_data.append({'word': word,
316
                                    'vector': vector,
317
                                    'projection': projection,
318
                                    'projected_vector': projected_vector,
319
                                    'rejected_vector': rejected_vector})
320
321
        return pd.DataFrame(projection_data)
322
323
    def plot_projection_scores(self, words, n_extreme=10,
324
                               ax=None, axis_projection_step=None):
325
        """Plot the projection scalar of words on the direction.
326
327
        :param list words: The words tor project
328
        :param int or None n_extreme: The number of extreme words to show
329
        :return: The ax object of the plot
330
        """
331
332
        self._is_direction_identified()
333
334
        projections_df = self._calc_projection_scores(words)
335
        projections_df['projection'] = projections_df['projection'].round(2)
336
337
        if n_extreme is not None:
338
            projections_df = take_two_sides_extreme_sorted(projections_df,
339
                                                           n_extreme=n_extreme)
340
341
        if ax is None:
342
            _, ax = plt.subplots(1)
343
344
        if axis_projection_step is None:
345
            axis_projection_step = 0.1
346
347
        cmap = plt.get_cmap('RdBu')
348
        projections_df['color'] = ((projections_df['projection'] + 0.5)
349
                                   .apply(cmap))
350
351
        most_extream_projection = np.round(
352
            projections_df['projection']
353
            .abs()
354
            .max(),
355
            decimals=1)
356
357
        sns.barplot(x='projection', y='word', data=projections_df,
358
                    palette=projections_df['color'])
359
360
        plt.xticks(np.arange(-most_extream_projection,
361
                             most_extream_projection + axis_projection_step,
362
                             axis_projection_step))
363
        plt.title('← {} {} {} →'.format(self.negative_end,
364
                                        ' ' * 20,
365
                                        self.positive_end))
366
367
        plt.xlabel('Direction Projection')
368
        plt.ylabel('Words')
369
370
        return ax
371
372
    def plot_dist_projections_on_direction(self, word_groups, ax=None):
373
        """Plot the projection scalars distribution on the direction.
374
375
        :param dict word_groups word: The groups to projects
376
        :return float: The ax object of the plot
377
        """
378
379
        if ax is None:
380
            _, ax = plt.subplots(1)
381
382
        names = sorted(word_groups.keys())
383
384
        for name in names:
385
            words = word_groups[name]
386
            label = '{} (#{})'.format(name, len(words))
387
            vectors = [self[word] for word in words]
388
            projections = self.model.cosine_similarities(self.direction,
389
                                                         vectors)
390
            sns.distplot(projections, hist=False, label=label, ax=ax)
391
392
        plt.axvline(0, color='k', linestyle='--')
393
394
        plt.title('← {} {} {} →'.format(self.negative_end,
395
                                        ' ' * 20,
396
                                        self.positive_end))
397
        plt.xlabel('Direction Projection')
398
        plt.ylabel('Density')
399
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
400
401
        return ax
402
403
    @classmethod
404
    def _calc_bias_across_word_embeddings(cls,
405
                                          word_embedding_bias_dict,
406
                                          words):
407
        """
408
        Calculate to projections and rho of words for two word embeddings.
409
410
        :param dict word_embedding_bias_dict: ``WordsEmbeddingBias`` objects
411
                                               as values,
412
                                               and their names as keys.
413
        :param list words: Words to be projected.
414
        :return tuple: Projections and spearman rho.
415
        """
416
        # pylint: disable=W0212
417
        assert len(word_embedding_bias_dict) == 2, 'Support only in two'\
418
                                                    'word embeddings'
419
420
        intersection_words = [word for word in words
421
                              if all(word in web
422
                                     for web in (word_embedding_bias_dict
423
                                                 .values()))]
424
425
        projections = {name: web._calc_projection_scores(intersection_words)['projection']  # pylint: disable=C0301
426
                       for name, web in word_embedding_bias_dict.items()}
427
428
        df = pd.DataFrame(projections)
429
        df.index = intersection_words
430
431
        rho, _ = spearmanr(*df.transpose().values)
432
        return df, rho
433
434
    @classmethod
435
    def plot_bias_across_word_embeddings(cls, word_embedding_bias_dict,
436
                                         words, ax=None, scatter_kwargs=None):
437
        """
438
        Plot the projections of same words of two word mbeddings.
439
440
        :param dict word_embedding_bias_dict: ``WordsEmbeddingBias`` objects
441
                                               as values,
442
                                               and their names as keys.
443
        :param list words: Words to be projected.
444
        :param scatter_kwargs: Kwargs for matplotlib.pylab.scatter.
445
        :type scatter_kwargs: dict or None
446
        :return: The ax object of the plot
447
        """
448
        # pylint: disable=W0212
449
450
        df, rho = cls._calc_bias_across_word_embeddings(word_embedding_bias_dict,  # pylint: disable=C0301
451
                                                        words)
452
453
        if ax is None:
454
            _, ax = plt.subplots(1)
455
456
        if scatter_kwargs is None:
457
            scatter_kwargs = {}
458
459
        name1, name2 = word_embedding_bias_dict.keys()
460
461
        ax.scatter(x=name1, y=name2, data=df, **scatter_kwargs)
462
463
        plt.title('Bias Across Word Embeddings'
464
                  '(Spearman Rho = {:0.2f})'.format(rho))
465
466
        negative_end = word_embedding_bias_dict[name1].negative_end
467
        positive_end = word_embedding_bias_dict[name1].positive_end
468
        plt.xlabel('← {}     {}     {} →'.format(negative_end,
469
                                                 name1,
470
                                                 positive_end))
471
        plt.ylabel('← {}     {}     {} →'.format(negative_end,
472
                                                 name2,
473
                                                 positive_end))
474
475
        ax_min = round_to_extreme(df.values.min())
476
        ax_max = round_to_extreme(df.values.max())
477
        plt.xlim(ax_min, ax_max)
478
        plt.ylim(ax_min, ax_max)
479
480
        return ax
481
482
    # TODO: refactor for speed and clarity
483
    def generate_analogies(self, n_analogies=100, seed='ends',
484
                           multiple=False,
485
                           delta=1., restrict_vocab=30000,
486
                           unrestricted=False):
487
        """
488
        Generate analogies based on a seed vector.
489
490
        x - y ~ seed vector.
491
        or a:x::b:y when a-b ~ seed vector.
492
493
        The seed vector can be defined by two word ends,
494
        or by the bias direction.
495
496
        ``delta`` is used for semantically coherent. Default vale of 1
497
        corresponds to an angle <= pi/3.
498
499
500
        There is criticism regarding generating analogies
501
        when used with `unstricted=False` and not ignoring analogies
502
        with `match` column equal to `False`.
503
        Tolga's technique of generating analogies, as implemented in this
504
        method, is limited inherently to analogies with x != y, which may
505
        be force "fake" bias analogies.
506
507
        See:
508
509
        - Nissim, M., van Noord, R., van der Goot, R. (2019).
510
          `Fair is Better than Sensational: Man is to Doctor
511
          as Woman is to Doctor <https://arxiv.org/abs/1905.09866>`_.
512
513
        :param seed: The definition of the seed vector.
514
                     Either by a tuple of two word ends,
515
                     or by `'ends` for the pre-defined ends
516
                     or by `'direction'` for the pre-defined direction vector.
517
        :param int n_analogies: Number of analogies to generate.
518
        :param bool multiple: Whether to allow multiple appearances of a word
519
                              in the analogies.
520
        :param float delta: Threshold for semantic similarity.
521
                            The maximal distance between x and y.
522
        :param int restrict_vocab: The vocabulary size to use.
523
        :param bool unrestricted: Whether to validate the generated analogies
524
                                  with unrestricted `most_similar`.
525
        :return: Data Frame of analogies (x, y), their distances,
526
                 and their cosine similarity scores
527
        """
528
        # pylint: disable=C0301,R0914,E1136
529
530
        if not unrestricted:
531
            warnings.warn('Not Using unrestricted most_similar '
532
                          'may introduce fake biased analogies.')
533
534
        (seed_vector,
535
         positive_end,
536
         negative_end) = get_seed_vector(seed, self)
537
538
        restrict_vocab_vectors = self.model.vectors[:restrict_vocab]
539
540
        normalized_vectors = (restrict_vocab_vectors
541
                              / np.linalg.norm(restrict_vocab_vectors, axis=1)[:, None])
542
543
        pairs_distances = euclidean_distances(normalized_vectors, normalized_vectors)
544
545
        # `pairs_distances` must be not-equal to zero
546
        # otherwise, x-y will be the zero vector, and every cosine similarity
547
        # will be equal to zero.
548
        # This cause to the **limitation** of this method which enforce a not-same
549
        # words for x and y.
550
        pairs_mask = (pairs_distances < delta) & (pairs_distances != 0)
551
552
        pairs_indices = np.array(np.nonzero(pairs_mask)).T
553
        x_vectors = np.take(normalized_vectors, pairs_indices[:, 0], axis=0)
554
        y_vectors = np.take(normalized_vectors, pairs_indices[:, 1], axis=0)
555
556
        x_minus_y_vectors = x_vectors - y_vectors
557
        normalized_x_minus_y_vectors = (x_minus_y_vectors
558
                                        / np.linalg.norm(x_minus_y_vectors, axis=1)[:, None])
559
560
        cos_distances = normalized_x_minus_y_vectors @ seed_vector
561
562
        sorted_cos_distances_indices = np.argsort(cos_distances)[::-1]
563
564
        sorted_cos_distances_indices_iter = iter(sorted_cos_distances_indices)
565
566
        analogies = []
567
        generated_words_x = set()
568
        generated_words_y = set()
569
570
        while len(analogies) < n_analogies:
571
            cos_distance_index = next(sorted_cos_distances_indices_iter)
572
            paris_index = pairs_indices[cos_distance_index]
573
            word_x, word_y = [self.model.index2word[index]
574
                              for index in paris_index]
575
576
            if multiple or (not multiple
577
                            and (word_x not in generated_words_x
578
                                 and word_y not in generated_words_y)):
579
580
                analogy = ({positive_end: word_x,
581
                            negative_end: word_y,
582
                            'score': cos_distances[cos_distance_index],
583
                            'distance': pairs_distances[tuple(paris_index)]})
584
585
                generated_words_x.add(word_x)
586
                generated_words_y.add(word_y)
587
588
                if unrestricted:
589
                    most_x = next(word
590
                                  for word, _ in most_similar(self.model,
591
                                                              [word_y, positive_end],
592
                                                              [negative_end]))
593
                    most_y = next(word
594
                                  for word, _ in most_similar(self.model,
595
                                                              [word_x, negative_end],
596
                                                              [positive_end]))
597
598
                    analogy['most_x'] = most_x
599
                    analogy['most_y'] = most_y
600
                    analogy['match'] = ((word_x == most_x)
601
                                        and (word_y == most_y))
602
603
                analogies.append(analogy)
604
605
        df = pd.DataFrame(analogies)
606
607
        columns = [positive_end, negative_end, 'distance', 'score']
608
609
        if unrestricted:
610
            columns.extend(['most_x', 'most_y', 'match'])
611
612
        df = df[columns]
613
614
        return df
615
616
    def calc_direct_bias(self, neutral_words, c=None):
617
        """Calculate the direct bias.
618
619
        Based on the projection of neutral words on the direction.
620
621
        :param list neutral_words: List of neutral words
622
        :param c: Strictness of bias measuring
623
        :type c: float or None
624
        :return: The direct bias
625
        """
626
627
        if c is None:
628
            c = 1
629
630
        projections = self._calc_projection_scores(neutral_words)['projection']
631
        direct_bias_terms = np.abs(projections) ** c
632
        direct_bias = direct_bias_terms.sum() / len(neutral_words)
633
634
        return direct_bias
635
636
    def calc_indirect_bias(self, word1, word2):
637
        """Calculate the indirect bias between two words.
638
639
        Based on the amount of shared projection of the words on the direction.
640
641
        Also called PairBias.
642
        :param str word1: First word
643
        :param str word2: Second word
644
        :type c: float or None
645
        :return The indirect bias between the two words
646
        """
647
648
        self._is_direction_identified()
649
650
        vector1 = normalize(self[word1])
651
        vector2 = normalize(self[word2])
652
653
        perpendicular_vector1 = reject_vector(vector1, self.direction)
654
        perpendicular_vector2 = reject_vector(vector2, self.direction)
655
656
        inner_product = vector1 @ vector2
657
        perpendicular_similarity = cosine_similarity(perpendicular_vector1,
658
                                                     perpendicular_vector2)
659
660
        indirect_bias = ((inner_product - perpendicular_similarity)
661
                         / inner_product)
662
        return indirect_bias
663
664
    def generate_closest_words_indirect_bias(self,
665
                                             neutral_positive_end,
666
                                             neutral_negative_end,
667
                                             words=None, n_extreme=5):
668
        """
669
        Generate closest words to a neutral direction and their indirect bias.
670
671
        The direction of the neutral words is used to find
672
        the most extreme words.
673
        The indirect bias is calculated between the most extreme words
674
        and the closest end.
675
676
        :param str neutral_positive_end: A word that define the positive side
677
                                         of the neutral direction.
678
        :param str neutral_negative_end: A word that define the negative side
679
                                         of the neutral direction.
680
        :param list words: List of words to project on the neutral direction.
681
        :param int n_extreme: The number for the most extreme words
682
                              (positive and negative) to show.
683
        :return: Data Frame of the most extreme words
684
                 with their projection scores and indirect biases.
685
        """
686
687
        neutral_direction = normalize(self[neutral_positive_end]
688
                                      - self[neutral_negative_end])
689
690
        vectors = [normalize(self[word]) for word in words]
691
        df = (pd.DataFrame([{'word': word,
692
                             'projection': vector @ neutral_direction}
693
                            for word, vector in zip(words, vectors)])
694
              .sort_values('projection', ascending=False))
695
696
        df = take_two_sides_extreme_sorted(df, n_extreme,
697
                                           'end',
698
                                           neutral_positive_end,
699
                                           neutral_negative_end)
700
701
        df['indirect_bias'] = df.apply(lambda r:
702
                                       self.calc_indirect_bias(r['word'],
703
                                                               r['end']),
704
                                       axis=1)
705
706
        df = df.set_index(['end', 'word'])
707
        df = df[['projection', 'indirect_bias']]
708
709
        return df
710
711
    def _extract_neutral_words(self, specific_words):
712
        extended_specific_words = set()
713
714
        # because or specific_full data was trained on partial word embedding
715
        for word in specific_words:
716
            extended_specific_words.add(word)
717
            extended_specific_words.add(word.lower())
718
            extended_specific_words.add(word.upper())
719
            extended_specific_words.add(word.title())
720
721
        neutral_words = [word for word in self.model.vocab
722
                         if word not in extended_specific_words]
723
724
        return neutral_words
725
726
    def _neutralize(self, neutral_words):
727
        self._is_direction_identified()
728
729
        if self._verbose:
730
            neutral_words_iter = tqdm(neutral_words)
731
        else:
732
            neutral_words_iter = iter(neutral_words)
733
734
        for word in neutral_words_iter:
735
            neutralized_vector = reject_vector(self[word],
736
                                               self.direction)
737
            update_word_vector(self.model, word, neutralized_vector)
738
739
        self.model.init_sims(replace=True)
740
741
    def _equalize(self, equality_sets):
742
        # pylint: disable=R0914
743
744
        self._is_direction_identified()
745
746
        if self._verbose:
747
            words_data = []
748
749
        for equality_set_index, equality_set_words in enumerate(equality_sets):
750
            equality_set_vectors = [normalize(self[word])
751
                                    for word in equality_set_words]
752
            center = np.mean(equality_set_vectors, axis=0)
753
            (projected_center,
754
             rejected_center) = project_reject_vector(center,
755
                                                      self.direction)
756
            scaling = np.sqrt(1 - np.linalg.norm(rejected_center)**2)
757
758
            for word, vector in zip(equality_set_words, equality_set_vectors):
759
                projected_vector = project_vector(vector, self.direction)
760
761
                projected_part = normalize(projected_vector - projected_center)
762
763
                # In the code it is different of Bolukbasi
764
                # It behaves the same only for equality_sets
765
                # with size of 2 (pairs) - not sure!
766
                # However, my code is the same as the article
767
                # equalized_vector = rejected_center + scaling * self.direction
768
                # https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/debias.py#L36-L37
769
                # For pairs, projected_part_vector1 == -projected_part_vector2,
770
                # and this is the same as
771
                # projected_part_vector1 == self.direction
772
                equalized_vector = rejected_center + scaling * projected_part
773
774
                update_word_vector(self.model, word, equalized_vector)
775
776
                if self._verbose:
777
                    words_data.append({
0 ignored issues
show
introduced by
The variable words_data does not seem to be defined in case self._verbose on line 746 is False. Are you sure this can never be the case?
Loading history...
778
                        'equality_set_index': equality_set_index,
779
                        'word': word,
780
                        'scaling': scaling,
781
                        'projected_scalar': vector @ self.direction,
782
                        'equalized_projected_scalar': (equalized_vector
783
                                                       @ self.direction),
784
                    })
785
786
        if self._verbose:
787
            print('Equalize Words Data '
788
                  '(all equal for 1-dim bias space (direction):')
789
            words_data_df = (pd.DataFrame(words_data)
790
                             .set_index(['equality_set_index', 'word']))
791
            print(tabulate(words_data_df, headers='keys'))
792
793
        self.model.init_sims(replace=True)
794
795
    def _generate_pair_candidates(self, pairs):
796
        # pylint: disable=line-too-long
797
        return {(candidate1, candidate2)
798
                for word1, word2 in pairs
799
                for candidate1, candidate2 in zip(generate_one_word_forms(word1),
800
                                                  generate_one_word_forms(word2))
801
                if candidate1 in self.model and candidate2 in self.model}
802
803
    def debias(self, method='hard', neutral_words=None, equality_sets=None,
804
               inplace=True):
805
        """Debias the word embedding.
806
807
        :param str method: The method of debiasing.
808
        :param list neutral_words: List of neutral words
809
                                   for the neutralize step
810
        :param list equality_sets: List of equality sets,
811
                                   for the equalize step.
812
                                   The sets represent the direction.
813
        :param bool inplace: Whether to debias the object inplace
814
                             or return a new one
815
816
        .. warning::
817
818
          After calling `debias`,
819
          all the vectors of the word embedding
820
          will be normalized to unit length.
821
822
        """
823
824
        # pylint: disable=W0212
825
        if inplace:
826
            bias_word_embedding = self
827
        else:
828
            bias_word_embedding = copy.deepcopy(self)
829
830
        if method not in DEBIAS_METHODS:
831
            raise ValueError('method should be one of {}, {} was given'.format(
832
                DEBIAS_METHODS, method))
833
834
        if method in ['hard', 'neutralize']:
835
            if self._verbose:
836
                print('Neutralize...')
837
            bias_word_embedding._neutralize(neutral_words)
838
839
        if method == 'hard':
840
            if self._verbose:
841
                print('Equalize...')
842
843
            assert all(len(equality_set) == 2
844
                       for equality_set in equality_sets), \
845
                   'Currently supporting only equality pairs.'
846
847
            equality_sets = self._generate_pair_candidates(equality_sets)
848
849
            bias_word_embedding._equalize(equality_sets)
850
851
        if inplace:
852
            return None
853
        else:
854
            return bias_word_embedding
855
856
    def evaluate_word_embedding(self,
857
                                kwargs_word_pairs=None,
858
                                kwargs_word_analogies=None):
859
        """
860
        Evaluate word pairs tasks and word analogies tasks.
861
862
        :param model: Word embedding.
863
        :param kwargs_word_pairs: Kwargs for
864
                                  evaluate_word_pairs
865
                                  method.
866
        :type kwargs_word_pairs: dict or None
867
        :param kwargs_word_analogies: Kwargs for
868
                                      evaluate_word_analogies
869
                                      method.
870
        :type evaluate_word_analogies: dict or None
871
        :return: Tuple of :class:`pandas.DataFrame`
872
                 for the evaluation results.
873
        """
874
875
        return evaluate_word_embedding(self.model,
876
                                       kwargs_word_pairs,
877
                                       kwargs_word_analogies)
878
879
    def learn_full_specific_words(self, seed_specific_words,
880
                                  max_non_specific_examples=None, debug=None):
881
        """Learn specific words given a list of seed specific wordsself.
882
883
        Using Linear SVM.
884
885
        :param list seed_specific_words: List of seed specific words
886
        :param int max_non_specific_examples: The number of non-specific words
887
                                              to sample for training
888
        :return: List of learned specific words and the classifier object
889
        """
890
891
        if debug is None:
892
            debug = False
893
894
        if max_non_specific_examples is None:
895
            max_non_specific_examples = MAX_NON_SPECIFIC_EXAMPLES
896
897
        data = []
898
        non_specific_example_count = 0
899
900
        for word in self.model.vocab:
901
            is_specific = word in seed_specific_words
902
903
            if not is_specific:
904
                non_specific_example_count += 1
905
                if non_specific_example_count <= max_non_specific_examples:
906
                    data.append((self[word], is_specific))
907
            else:
908
                data.append((self[word], is_specific))
909
910
        np.random.seed(RANDOM_STATE)
911
        np.random.shuffle(data)
912
913
        X, y = zip(*data)
914
915
        X = np.array(X)
916
        X /= np.linalg.norm(X, axis=1)[:, None]
917
918
        y = np.array(y).astype('int')
919
920
        clf = LinearSVC(C=1, class_weight='balanced',
921
                        random_state=RANDOM_STATE)
922
923
        clf.fit(X, y)
924
925
        full_specific_words = []
926
        for word in self.model.vocab:
927
            vector = [normalize(self[word])]
928
            if clf.predict(vector):
929
                full_specific_words.append(word)
930
931
        if not debug:
932
            return full_specific_words, clf
933
934
        return full_specific_words, clf, X, y
935
936
    def _plot_most_biased_one_cluster(self,
937
                                      most_biased_neutral_words, y_bias,
938
                                      random_state=1, ax=None):
939
        most_biased_vectors = [self.model[word]
940
                               for word in most_biased_neutral_words]
941
942
        return plot_clustering_as_classification(most_biased_vectors,
943
                                                 y_bias,
944
                                                 random_state=random_state,
945
                                                 ax=ax)
946
947
    def compute_factual_association(self, factual_properity):
948
        """Compute association of a factual property to the projection.
949
950
        Inspired by WEFAT (Word-Embedding Factual Association Test),
951
        but it is not the same:
952
        - Caliskan, A., Bryson, J. J., & Narayanan, A. (2017).
953
        `Semantics derived automatically
954
        from language corpora contain human-like biases
955
        <http://opus.bath.ac.uk/55288/>`_.
956
        Science, 356(6334), 183-186.
957
958
        In a future version, the WEFAT will also be implemented.
959
960
        If a word doesn't exist in the word embedding,
961
        then it will be filtered out.
962
963
        For example, in :class:`responsibly.we.bias.GenderBiasWE`,
964
        the defuat factual property is the percentage of female
965
        in various occupations
966
        from the Labor Force Statistics of 2017 Population Survey,
967
        Taken from: https://arxiv.org/abs/1804.06876
968
969
        :param dict factual_properity: Dictionary of words
970
                                       and their factual values.
971
        :return: Pearson r, pvalue and the words with their
972
                 associated factual values
973
                 and their projection on the bias direction.
974
        """
975
976
        points = {word: (value, self.project_on_direction(word))
977
                  for word, value in factual_properity.items()
978
                  if word in self.model}
979
980
        x, y = zip(*points.values())
981
982
        return pearsonr(x, y), points
983
984
    def plot_factual_association(self, factual_properity, ax=None):
985
        """Plot association of a factual property to the projection.
986
987
        See: :meth:`BiasWordEmbedding.compute_factual_association`
988
989
        :param dict factual_properity: Dictionary of words
990
                                       and their factual values.
991
        """
992
993
        result = self.compute_factual_association(factual_properity)
994
995
        (r, pvalue), points = result
996
        x, y = zip(*points.values())
997
998
        if ax is None:
999
            _, ax = plt.subplots(1)
1000
1001
        ax.scatter(x, y)
1002
1003
        plt.title('Assocsion between Factual Property'
1004
                  'and Projection on Direction '
1005
                  '(Pearson R = {:0.2f} ; pvalue={:0.2f})'
1006
                  .format(r, pvalue))
1007
1008
        plt.xlabel('Factual Property')
1009
        plt.ylabel('Projection on Direction')
1010
1011
        return ax
1012
1013
    @staticmethod
1014
    def plot_most_biased_clustering(biased, debiased,
1015
                                    seed='ends', n_extreme=500,
1016
                                    random_state=1):
1017
        """Plot clustering as classification of biased neutral words.
1018
1019
        :param biased: Biased word embedding of
1020
                       :class:`~responsibly.we.bias.BiasWordEmbedding`.
1021
        :param debiased: Debiased word embedding of
1022
                         :class:`~responsibly.we.bias.BiasWordEmbedding`.
1023
        :param seed: The definition of the seed vector.
1024
                    Either by a tuple of two word ends,
1025
                    or by `'ends` for the pre-defined ends
1026
                    or by `'direction'` for
1027
                    the pre-defined direction vector.
1028
        :param n_extrem: The number of extreme biased
1029
                         neutral words to use.
1030
        :return: Tuple of list of ax objects of the plot,
1031
                 and a dictionary with the most positive
1032
                 and negative words.
1033
1034
        Based on:
1035
1036
        - Gonen, H., & Goldberg, Y. (2019).
1037
          `Lipstick on a Pig:
1038
          Debiasing Methods Cover up Systematic Gender Biases
1039
          in Word Embeddings But do not Remove
1040
          Them <https://arxiv.org/abs/1903.03862>`_.
1041
          arXiv preprint arXiv:1903.03862.
1042
1043
        - https://github.com/gonenhila/gender_bias_lipstick
1044
        """
1045
        # pylint: disable=protected-access,too-many-locals,line-too-long
1046
1047
        assert biased.positive_end == debiased.positive_end, \
1048
            'Postive ends should be the same.'
1049
        assert biased.negative_end == debiased.negative_end, \
1050
            'Negative ends should be the same.'
1051
1052
        seed_vector, _, _ = get_seed_vector(seed, biased)
1053
1054
        neutral_words = biased._data['neutral_words']
1055
        neutral_word_vectors = (biased[word] for word in neutral_words)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable word does not seem to be defined.
Loading history...
1056
        neutral_word_projections = [(normalize(vector) @ seed_vector, word)
1057
                                    for word, vector
1058
                                    in zip(neutral_words,
1059
                                           neutral_word_vectors)]
1060
1061
        neutral_word_projections.sort()
1062
1063
        _, most_negative_words = zip(*neutral_word_projections[:n_extreme])
1064
        _, most_positive_words = zip(*neutral_word_projections[-n_extreme:])
1065
1066
        most_biased_neutral_words = most_negative_words + most_positive_words
1067
1068
        y_bias = [False] * n_extreme + [True] * n_extreme
1069
1070
        _, axes = plt.subplots(1, 2, figsize=(20, 5))
1071
1072
        acc_biased = biased._plot_most_biased_one_cluster(most_biased_neutral_words,
1073
                                                          y_bias,
1074
                                                          random_state=random_state,
1075
                                                          ax=axes[0])
1076
        axes[0].set_title('Biased - Accuracy={}'.format(acc_biased))
1077
1078
        acc_debiased = debiased._plot_most_biased_one_cluster(most_biased_neutral_words,
1079
                                                              y_bias,
1080
                                                              random_state=random_state,
1081
                                                              ax=axes[1])
1082
        axes[1].set_title('Debiased - Accuracy={}'.format(acc_debiased))
1083
1084
        return axes, {biased.positive_end: most_positive_words,
1085
                      biased.negative_end: most_negative_words}
1086
1087
1088
class GenderBiasWE(BiasWordEmbedding):
1089
    """Measure and adjust the Gender Bias in English Word Embedding.
1090
1091
    :param model: Word embedding model of ``gensim.model.KeyedVectors``
1092
    :param bool only_lower: Whether the word embedding contrains
1093
                            only lower case words
1094
    :param bool verbose: Set verbosity
1095
    :param str identify_direction: Set the method of identifying
1096
                                   the gender direction:
1097
                                   `'single'`, `'sum'` or `'pca'`.
1098
    :param bool to_normalize: Whether to normalize all the vectors
1099
                              (recommended!)
1100
    """
1101
1102
    def __init__(self, model, only_lower=False, verbose=False,
1103
                 identify_direction='pca', to_normalize=True):
1104
        super().__init__(model=model,
1105
                         only_lower=only_lower,
1106
                         verbose=verbose,
1107
                         to_normalize=True)
1108
        self._initialize_data()
1109
1110
        if identify_direction:
1111
            definitional = None
1112
1113
            if identify_direction == 'single':
1114
                definitional = ('she', 'he')
1115
            elif identify_direction == 'sum':
1116
                definitional = list(zip(*self._data['definitional_pairs']))
1117
            elif identify_direction == 'pca':
1118
                definitional = self._data['definitional_pairs']
1119
1120
            self._identify_direction('she', 'he',
1121
                                     definitional,
1122
                                     identify_direction)
1123
1124
    def _initialize_data(self):
1125
        self._data = copy.deepcopy(BOLUKBASI_DATA['gender'])
1126
1127
        if not self.only_lower:
1128
            self._data['specific_full_with_definitional_equalize'] = \
1129
                generate_words_forms(self
1130
                                     ._data['specific_full_with_definitional_equalize'])  # pylint: disable=C0301
1131
1132
        for key in self._data['word_group_keys']:
1133
            self._data[key] = (self._filter_words_by_model(self
1134
                                                           ._data[key]))
1135
1136
        self._data['neutral_words'] = self._extract_neutral_words(self
1137
                                                                  ._data['specific_full_with_definitional_equalize'])  # pylint: disable=C0301
1138
        self._data['neutral_words'].sort()
1139
        self._data['word_group_keys'].append('neutral_words')
1140
1141
    def plot_projection_scores(self, words='professions', n_extreme=10,
1142
                               ax=None, axis_projection_step=None):
1143
        if words == 'professions':
1144
            words = self._data['profession_names']
1145
1146
        return super().plot_projection_scores(words, n_extreme,
1147
                                              ax, axis_projection_step)
1148
1149
    def plot_dist_projections_on_direction(self, word_groups='bolukbasi',
1150
                                           ax=None):
1151
        if word_groups == 'bolukbasi':
1152
            word_groups = {key: self._data[key]
1153
                           for key in self._data['word_group_keys']}
1154
1155
        return super().plot_dist_projections_on_direction(word_groups, ax)
1156
1157
    @classmethod
1158
    def plot_bias_across_word_embeddings(cls, word_embedding_bias_dict,
1159
                                         ax=None, scatter_kwargs=None):
1160
        # pylint: disable=W0221
1161
        words = BOLUKBASI_DATA['gender']['neutral_profession_names']
1162
        # TODO: is it correct for inheritance of class method?
1163
        super(cls, cls).plot_bias_across_word_embeddings(word_embedding_bias_dict,  # pylint: disable=C0301
1164
                                                         words,
1165
                                                         ax,
1166
                                                         scatter_kwargs)
1167
1168
    def calc_direct_bias(self, neutral_words='professions', c=None):
1169
        if isinstance(neutral_words, str) and neutral_words == 'professions':
1170
            return super().calc_direct_bias(
1171
                self._data['neutral_profession_names'], c)
1172
        else:
1173
            return super().calc_direct_bias(neutral_words)
1174
1175
    def generate_closest_words_indirect_bias(self,
1176
                                             neutral_positive_end,
1177
                                             neutral_negative_end,
1178
                                             words='professions', n_extreme=5):
1179
        # pylint: disable=C0301
1180
1181
        if words == 'professions':
1182
            words = self._data['profession_names']
1183
1184
        return super().generate_closest_words_indirect_bias(neutral_positive_end,
1185
                                                            neutral_negative_end,
1186
                                                            words,
1187
                                                            n_extreme=n_extreme)
1188
1189
    def debias(self, method='hard', neutral_words=None, equality_sets=None,
1190
               inplace=True):
1191
        # pylint: disable=line-too-long
1192
        if method in ['hard', 'neutralize']:
1193
            if neutral_words is None:
1194
                neutral_words = self._data['neutral_words']
1195
1196
        if method == 'hard' and equality_sets is None:
1197
            equality_sets = {tuple(w) for w in self._data['equalize_pairs']}
1198
            equality_sets |= {tuple(w) for w in self._data['definitional_pairs']}
1199
1200
        return super().debias(method, neutral_words, equality_sets,
1201
                              inplace)
1202
1203
    def learn_full_specific_words(self, seed_specific_words='bolukbasi',
1204
                                  max_non_specific_examples=None,
1205
                                  debug=None):
1206
        if seed_specific_words == 'bolukbasi':
1207
            seed_specific_words = self._data['specific_seed']
1208
1209
        return super().learn_full_specific_words(seed_specific_words,
1210
                                                 max_non_specific_examples,
1211
                                                 debug)
1212
1213
    def compute_factual_association(self,
1214
                                    factual_properity=OCCUPATION_FEMALE_PRECENTAGE):  # pylint: disable=line-too-long
1215
        return super().compute_factual_association(factual_properity)
1216
1217
    def plot_factual_association(self,
1218
                                 factual_properity=OCCUPATION_FEMALE_PRECENTAGE,  # pylint: disable=line-too-long
1219
                                 ax=None):
1220
        return super().plot_factual_association(factual_properity, ax)
1221