Passed
Pull Request — master (#31)
by Shlomi
02:57 queued 35s
created

responsibly.we.bias.GenderBiasWE.debias()   A

Complexity

Conditions 5

Size

Total Lines 13
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 10
nop 5
dl 0
loc 13
rs 9.3333
c 0
b 0
f 0
1
# pylint: disable=too-many-lines
2
"""
3
Measuring and adjusting bias in word embedding by Bolukbasi (2016).
4
5
References:
6
    - Bolukbasi, T., Chang, K. W., Zou, J. Y., Saligrama, V.,
7
      & Kalai, A. T. (2016).
8
      `Man is to computer programmer as woman is to homemaker?
9
      debiasing word embeddings <https://arxiv.org/abs/1607.06520>`_.
10
      In Advances in neural information processing systems
11
      (pp. 4349-4357).
12
13
    - The code and data is based on the GitHub repository:
14
      https://github.com/tolga-b/debiaswe (MIT License).
15
16
    - Gonen, H., & Goldberg, Y. (2019).
17
      `Lipstick on a Pig:
18
      Debiasing Methods Cover up Systematic Gender Biases
19
      in Word Embeddings But do not Remove Them
20
      <https://arxiv.org/abs/1903.03862>`_.
21
      arXiv preprint arXiv:1903.03862.
22
23
    - Nissim, M., van Noord, R., van der Goot, R. (2019).
24
      `Fair is Better than Sensational: Man is to Doctor
25
      as Woman is to Doctor <https://arxiv.org/abs/1905.09866>`_.
26
27
Usage
28
~~~~~
29
30
.. code:: python
31
32
   >>> from responsibly.we import GenderBiasWE
33
   >>> from gensim import downloader
34
   >>> w2v_model = downloader.load('word2vec-google-news-300')
35
   >>> w2v_gender_bias_we = GenderBiasWE(w2v_model)
36
   >>> w2v_gender_bias_we.calc_direct_bias()
37
   0.07307904249481942
38
   >>> w2v_gender_bias_we.debias()
39
   >>> w2v_gender_bias_we.calc_direct_bias()
40
   1.7964246601064155e-09
41
42
Types of Bias
43
~~~~~~~~~~~~~
44
45
Direct Bias
46
^^^^^^^^^^^
47
48
1. Associations
49
    Words that are closer to one end (e.g., *he*) than to
50
    the other end (*she*).
51
    For example, occupational stereotypes (page 7).
52
    Calculated by
53
    :meth:`~responsibly.we.bias.BiasWordEmbedding.calc_direct_bias`.
54
55
2. Analogies
56
    Analogies of *he:x::she:y*.
57
    For example analogies exhibiting stereotypes (page 7).
58
    Generated by
59
    :meth:`~responsibly.we.bias.BiasWordEmbedding.generate_analogies`.
60
61
62
Indirect Bias
63
^^^^^^^^^^^^^
64
65
Projection of a neutral words into a two neutral words direction
66
is explained in a great portion by a shared bias direction projection.
67
68
Calculated by
69
:meth:`~responsibly.we.bias.BiasWordEmbedding.calc_indirect_bias`
70
and
71
:meth:`~responsibly.we.bias.GenderBiasWE.generate_closest_words_indirect_bias`.
72
73
"""
74
75
import copy
76
import warnings
77
78
import matplotlib.pylab as plt
79
import numpy as np
80
import pandas as pd
81
import seaborn as sns
82
from scipy.stats import pearsonr, spearmanr
83
from sklearn.decomposition import PCA
84
from sklearn.metrics.pairwise import euclidean_distances
85
from sklearn.svm import LinearSVC
86
from tqdm import tqdm
87
88
from responsibly.consts import RANDOM_STATE
89
from responsibly.utils import _warning_setup
90
from responsibly.we.benchmark import evaluate_word_embedding
91
from responsibly.we.data import BOLUKBASI_DATA, OCCUPATION_FEMALE_PRECENTAGE
92
from responsibly.we.utils import (
93
    assert_gensim_keyed_vectors, cosine_similarity, generate_one_word_forms,
94
    generate_words_forms, get_seed_vector, most_similar, normalize,
95
    plot_clustering_as_classification, project_params, project_reject_vector,
96
    project_vector, reject_vector, round_to_extreme,
97
    take_two_sides_extreme_sorted, update_word_vector,
98
)
99
from tabulate import tabulate
100
101
102
DIRECTION_METHODS = ['single', 'sum', 'pca']
103
DEBIAS_METHODS = ['neutralize', 'hard', 'soft']
104
FIRST_PC_THRESHOLD = 0.5
105
MAX_NON_SPECIFIC_EXAMPLES = 1000
106
107
__all__ = ['GenderBiasWE', 'BiasWordEmbedding']
108
109
_warning_setup()
110
111
112
class BiasWordEmbedding:
113
    """Measure and adjust a bias in English word embedding.
114
115
    :param model: Word embedding model of ``gensim.model.KeyedVectors``
116
    :param bool only_lower: Whether the word embedding contrains
117
                            only lower case words
118
    :param bool verbose: Set verbosity
119
    :param bool to_normalize: Whether to normalize all the vectors
120
                              (recommended!)
121
    """
122
123
    def __init__(self, model, only_lower=False, verbose=False,
124
                 identify_direction=False, to_normalize=True):
125
        assert_gensim_keyed_vectors(model)
126
127
        # TODO: this is bad Python, ask someone about it
128
        # probably should be a better design
129
        # identify_direction doesn't have any meaning
130
        # for the class BiasWordEmbedding
131
        # The goal is to force this interfeace of sub-classes.
132
        if self.__class__ == __class__ and identify_direction is not False:
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable __class__ does not seem to be defined.
Loading history...
133
            raise ValueError('identify_direction must be False'
134
                             ' for an instance of {}'
135
                             .format(__class__))
136
137
        self.model = model
138
139
        # TODO: write unitest for when it is False
140
        self.only_lower = only_lower
141
142
        self._verbose = verbose
143
144
        self.direction = None
145
        self.positive_end = None
146
        self.negative_end = None
147
148
        if to_normalize:
149
            self.model.init_sims(replace=True)
150
151
    def __copy__(self):
152
        bias_word_embedding = self.__class__(self.model,
153
                                             self.only_lower,
154
                                             self._verbose,
155
                                             identify_direction=False)
156
        bias_word_embedding.direction = copy.deepcopy(self.direction)
157
        bias_word_embedding.positive_end = copy.deepcopy(self.positive_end)
158
        bias_word_embedding.negative_end = copy.deepcopy(self.negative_end)
159
        return bias_word_embedding
160
161
    def __deepcopy__(self, memo):
162
        bias_word_embedding = copy.copy(self)
163
        bias_word_embedding.model = copy.deepcopy(bias_word_embedding.model)
164
        return bias_word_embedding
165
166
    def __getitem__(self, key):
167
        return self.model[key]
168
169
    def __contains__(self, item):
170
        return item in self.model
171
172
    def _filter_words_by_model(self, words):
173
        return [word for word in words if word in self]
174
175
    def _is_direction_identified(self):
176
        if self.direction is None:
177
            raise RuntimeError('The direction was not identified'
178
                               ' for this {} instance'
179
                               .format(self.__class__.__name__))
180
181
    # There is a mistake in the article
182
    # it is written (section 5.1):
183
    # "To identify the gender subspace, we took the ten gender pair difference
184
    # vectors and computed its principal components (PCs)"
185
    # however in the source code:
186
    # https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/we.py#L235-L245
187
    def _identify_subspace_by_pca(self, definitional_pairs, n_components):
188
        matrix = []
189
190
        for word1, word2 in definitional_pairs:
191
            vector1 = normalize(self[word1])
192
            vector2 = normalize(self[word2])
193
194
            center = (vector1 + vector2) / 2
195
196
            matrix.append(vector1 - center)
197
            matrix.append(vector2 - center)
198
199
        pca = PCA(n_components=n_components)
200
        pca.fit(matrix)
201
202
        if self._verbose:
203
            table = enumerate(pca.explained_variance_ratio_, start=1)
204
            headers = ['Principal Component',
205
                       'Explained Variance Ratio']
206
            print(tabulate(table, headers=headers))
207
208
        return pca
209
210
    # TODO: add the SVD method from section 6 step 1
211
    # It seems there is a mistake there, I think it is the same as PCA
212
    # just with replacing it with SVD
213
    def _identify_direction(self, positive_end, negative_end,
214
                            definitional, method='pca'):
215
        if method not in DIRECTION_METHODS:
216
            raise ValueError('method should be one of {}, {} was given'.format(
217
                DIRECTION_METHODS, method))
218
219
        if positive_end == negative_end:
220
            raise ValueError('positive_end and negative_end'
221
                             'should be different, and not the same "{}"'
222
                             .format(positive_end))
223
        if self._verbose:
224
            print('Identify direction using {} method...'.format(method))
225
226
        direction = None
227
228
        if method == 'single':
229
            if self._verbose:
230
                print('Positive definitional end:', definitional[0])
231
                print('Negative definitional end:', definitional[1])
232
            direction = normalize(normalize(self[definitional[0]])
233
                                  - normalize(self[definitional[1]]))
234
235
        elif method == 'sum':
236
            group1_sum_vector = np.sum([self[word]
237
                                        for word in definitional[0]], axis=0)
238
            group2_sum_vector = np.sum([self[word]
239
                                        for word in definitional[1]], axis=0)
240
241
            diff_vector = (normalize(group1_sum_vector)
242
                           - normalize(group2_sum_vector))
243
244
            direction = normalize(diff_vector)
245
246
        elif method == 'pca':
247
            pca = self._identify_subspace_by_pca(definitional, 10)
248
            if pca.explained_variance_ratio_[0] < FIRST_PC_THRESHOLD:
249
                raise RuntimeError('The Explained variance'
250
                                   'of the first principal component should be'
251
                                   'at least {}, but it is {}'
252
                                   .format(FIRST_PC_THRESHOLD,
253
                                           pca.explained_variance_ratio_[0]))
254
            direction = pca.components_[0]
255
256
            # if direction is opposite (e.g. we cannot control
257
            # what the PCA will return)
258
            ends_diff_projection = cosine_similarity((self[positive_end]
259
                                                      - self[negative_end]),
260
                                                     direction)
261
            if ends_diff_projection < 0:
262
                direction = -direction  # pylint: disable=invalid-unary-operand-type
263
264
        self.direction = direction
265
        self.positive_end = positive_end
266
        self.negative_end = negative_end
267
268
    def project_on_direction(self, word):
269
        """Project the normalized vector of the word on the direction.
270
271
        :param str word: The word tor project
272
        :return float: The projection scalar
273
        """
274
275
        self._is_direction_identified()
276
277
        vector = self[word]
278
        projection_score = self.model.cosine_similarities(self.direction,
279
                                                          [vector])[0]
280
        return projection_score
281
282
    def _calc_projection_scores(self, words):
283
        self._is_direction_identified()
284
285
        df = pd.DataFrame({'word': words})
286
287
        # TODO: maybe using cosine_similarities on all the vectors?
288
        # it might be faster
289
        df['projection'] = df['word'].apply(self.project_on_direction)
290
        df = df.sort_values('projection', ascending=False)
291
292
        return df
293
294
    def calc_projection_data(self, words):
295
        """
296
        Calculate projection, projected and rejected vectors of a words list.
297
298
        :param list words: List of words
299
        :return: :class:`pandas.DataFrame` of the projection,
300
                 projected and rejected vectors of the words list
301
        """
302
        projection_data = []
303
        for word in words:
304
            vector = self[word]
305
            projection = self.project_on_direction(word)
306
            normalized_vector = normalize(vector)
307
308
            (projection,
309
             projected_vector,
310
             rejected_vector) = project_params(normalized_vector,
311
                                               self.direction)
312
313
            projection_data.append({'word': word,
314
                                    'vector': vector,
315
                                    'projection': projection,
316
                                    'projected_vector': projected_vector,
317
                                    'rejected_vector': rejected_vector})
318
319
        return pd.DataFrame(projection_data)
320
321
    def plot_projection_scores(self, words, n_extreme=10,
322
                               ax=None, axis_projection_step=None):
323
        """Plot the projection scalar of words on the direction.
324
325
        :param list words: The words tor project
326
        :param int or None n_extreme: The number of extreme words to show
327
        :return: The ax object of the plot
328
        """
329
330
        self._is_direction_identified()
331
332
        projections_df = self._calc_projection_scores(words)
333
        projections_df['projection'] = projections_df['projection'].round(2)
334
335
        if n_extreme is not None:
336
            projections_df = take_two_sides_extreme_sorted(projections_df,
337
                                                           n_extreme=n_extreme)
338
339
        if ax is None:
340
            _, ax = plt.subplots(1)
341
342
        if axis_projection_step is None:
343
            axis_projection_step = 0.1
344
345
        cmap = plt.get_cmap('RdBu')
346
        projections_df['color'] = ((projections_df['projection'] + 0.5)
347
                                   .apply(cmap))
348
349
        most_extream_projection = (projections_df['projection']
350
                                   .abs()
351
                                   .max()
352
                                   .round(1))
353
354
        sns.barplot(x='projection', y='word', data=projections_df,
355
                    palette=projections_df['color'])
356
357
        plt.xticks(np.arange(-most_extream_projection,
358
                             most_extream_projection + axis_projection_step,
359
                             axis_projection_step))
360
        plt.title('← {} {} {} →'.format(self.negative_end,
361
                                        ' ' * 20,
362
                                        self.positive_end))
363
364
        plt.xlabel('Direction Projection')
365
        plt.ylabel('Words')
366
367
        return ax
368
369
    def plot_dist_projections_on_direction(self, word_groups, ax=None):
370
        """Plot the projection scalars distribution on the direction.
371
372
        :param dict word_groups word: The groups to projects
373
        :return float: The ax object of the plot
374
        """
375
376
        if ax is None:
377
            _, ax = plt.subplots(1)
378
379
        names = sorted(word_groups.keys())
380
381
        for name in names:
382
            words = word_groups[name]
383
            label = '{} (#{})'.format(name, len(words))
384
            vectors = [self[word] for word in words]
385
            projections = self.model.cosine_similarities(self.direction,
386
                                                         vectors)
387
            sns.distplot(projections, hist=False, label=label, ax=ax)
388
389
        plt.axvline(0, color='k', linestyle='--')
390
391
        plt.title('← {} {} {} →'.format(self.negative_end,
392
                                        ' ' * 20,
393
                                        self.positive_end))
394
        plt.xlabel('Direction Projection')
395
        plt.ylabel('Density')
396
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
397
398
        return ax
399
400
    @classmethod
401
    def _calc_bias_across_word_embeddings(cls,
402
                                          word_embedding_bias_dict,
403
                                          words):
404
        """
405
        Calculate to projections and rho of words for two word embeddings.
406
407
        :param dict word_embedding_bias_dict: ``WordsEmbeddingBias`` objects
408
                                               as values,
409
                                               and their names as keys.
410
        :param list words: Words to be projected.
411
        :return tuple: Projections and spearman rho.
412
        """
413
        # pylint: disable=W0212
414
        assert len(word_embedding_bias_dict) == 2, 'Support only in two'\
415
                                                    'word embeddings'
416
417
        intersection_words = [word for word in words
418
                              if all(word in web
419
                                     for web in (word_embedding_bias_dict
420
                                                 .values()))]
421
422
        projections = {name: web._calc_projection_scores(intersection_words)['projection']  # pylint: disable=C0301
423
                       for name, web in word_embedding_bias_dict.items()}
424
425
        df = pd.DataFrame(projections)
426
        df.index = intersection_words
427
428
        rho, _ = spearmanr(*df.transpose().values)
429
        return df, rho
430
431
    @classmethod
432
    def plot_bias_across_word_embeddings(cls, word_embedding_bias_dict,
433
                                         words, ax=None, scatter_kwargs=None):
434
        """
435
        Plot the projections of same words of two word mbeddings.
436
437
        :param dict word_embedding_bias_dict: ``WordsEmbeddingBias`` objects
438
                                               as values,
439
                                               and their names as keys.
440
        :param list words: Words to be projected.
441
        :param scatter_kwargs: Kwargs for matplotlib.pylab.scatter.
442
        :type scatter_kwargs: dict or None
443
        :return: The ax object of the plot
444
        """
445
        # pylint: disable=W0212
446
447
        df, rho = cls._calc_bias_across_word_embeddings(word_embedding_bias_dict,  # pylint: disable=C0301
448
                                                        words)
449
450
        if ax is None:
451
            _, ax = plt.subplots(1)
452
453
        if scatter_kwargs is None:
454
            scatter_kwargs = {}
455
456
        name1, name2 = word_embedding_bias_dict.keys()
457
458
        ax.scatter(x=name1, y=name2, data=df, **scatter_kwargs)
459
460
        plt.title('Bias Across Word Embeddings'
461
                  '(Spearman Rho = {:0.2f})'.format(rho))
462
463
        negative_end = word_embedding_bias_dict[name1].negative_end
464
        positive_end = word_embedding_bias_dict[name1].positive_end
465
        plt.xlabel('← {}     {}     {} →'.format(negative_end,
466
                                                 name1,
467
                                                 positive_end))
468
        plt.ylabel('← {}     {}     {} →'.format(negative_end,
469
                                                 name2,
470
                                                 positive_end))
471
472
        ax_min = round_to_extreme(df.values.min())
473
        ax_max = round_to_extreme(df.values.max())
474
        plt.xlim(ax_min, ax_max)
475
        plt.ylim(ax_min, ax_max)
476
477
        return ax
478
479
    # TODO: refactor for speed and clarity
480
    def generate_analogies(self, n_analogies=100, seed='ends',
481
                           multiple=False,
482
                           delta=1., restrict_vocab=30000,
483
                           unrestricted=False):
484
        """
485
        Generate analogies based on a seed vector.
486
487
        x - y ~ seed vector.
488
        or a:x::b:y when a-b ~ seed vector.
489
490
        The seed vector can be defined by two word ends,
491
        or by the bias direction.
492
493
        ``delta`` is used for semantically coherent. Default vale of 1
494
        corresponds to an angle <= pi/3.
495
496
497
        There is criticism regarding generating analogies
498
        when used with `unstricted=False` and not ignoring analogies
499
        with `match` column equal to `False`.
500
        Tolga's technique of generating analogies, as implemented in this
501
        method, is limited inherently to analogies with x != y, which may
502
        be force "fake" bias analogies.
503
504
        See:
505
506
        - Nissim, M., van Noord, R., van der Goot, R. (2019).
507
          `Fair is Better than Sensational: Man is to Doctor
508
          as Woman is to Doctor <https://arxiv.org/abs/1905.09866>`_.
509
510
        :param seed: The definition of the seed vector.
511
                     Either by a tuple of two word ends,
512
                     or by `'ends` for the pre-defined ends
513
                     or by `'direction'` for the pre-defined direction vector.
514
        :param int n_analogies: Number of analogies to generate.
515
        :param bool multiple: Whether to allow multiple appearances of a word
516
                              in the analogies.
517
        :param float delta: Threshold for semantic similarity.
518
                            The maximal distance between x and y.
519
        :param int restrict_vocab: The vocabulary size to use.
520
        :param bool unrestricted: Whether to validate the generated analogies
521
                                  with unrestricted `most_similar`.
522
        :return: Data Frame of analogies (x, y), their distances,
523
                 and their cosine similarity scores
524
        """
525
        # pylint: disable=C0301,R0914
526
527
        if not unrestricted:
528
            warnings.warn('Not Using unrestricted most_similar '
529
                          'may introduce fake biased analogies.')
530
531
        (seed_vector,
532
         positive_end,
533
         negative_end) = get_seed_vector(seed, self)
534
535
        restrict_vocab_vectors = self.model.vectors[:restrict_vocab]
536
537
        normalized_vectors = (restrict_vocab_vectors
538
                              / np.linalg.norm(restrict_vocab_vectors, axis=1)[:, None])
539
540
        pairs_distances = euclidean_distances(normalized_vectors, normalized_vectors)
541
542
        # `pairs_distances` must be not-equal to zero
543
        # otherwise, x-y will be the zero vector, and every cosine similarity
544
        # will be equal to zero.
545
        # This cause to the **limitation** of this method which enforce a not-same
546
        # words for x and y.
547
        pairs_mask = (pairs_distances < delta) & (pairs_distances != 0)
548
549
        pairs_indices = np.array(np.nonzero(pairs_mask)).T
550
        x_vectors = np.take(normalized_vectors, pairs_indices[:, 0], axis=0)
551
        y_vectors = np.take(normalized_vectors, pairs_indices[:, 1], axis=0)
552
553
        x_minus_y_vectors = x_vectors - y_vectors
554
        normalized_x_minus_y_vectors = (x_minus_y_vectors
555
                                        / np.linalg.norm(x_minus_y_vectors, axis=1)[:, None])
556
557
        cos_distances = normalized_x_minus_y_vectors @ seed_vector
558
559
        sorted_cos_distances_indices = np.argsort(cos_distances)[::-1]
560
561
        sorted_cos_distances_indices_iter = iter(sorted_cos_distances_indices)
562
563
        analogies = []
564
        generated_words_x = set()
565
        generated_words_y = set()
566
567
        while len(analogies) < n_analogies:
568
            cos_distance_index = next(sorted_cos_distances_indices_iter)
569
            paris_index = pairs_indices[cos_distance_index]
570
            word_x, word_y = [self.model.index2word[index]
571
                              for index in paris_index]
572
573
            if multiple or (not multiple
574
                            and (word_x not in generated_words_x
575
                                 and word_y not in generated_words_y)):
576
577
                analogy = ({positive_end: word_x,
578
                            negative_end: word_y,
579
                            'score': cos_distances[cos_distance_index],
580
                            'distance': pairs_distances[tuple(paris_index)]})
581
582
                generated_words_x.add(word_x)
583
                generated_words_y.add(word_y)
584
585
                if unrestricted:
586
                    most_x = next(word
587
                                  for word, _ in most_similar(self.model,
588
                                                              [word_y, positive_end],
589
                                                              [negative_end]))
590
                    most_y = next(word
591
                                  for word, _ in most_similar(self.model,
592
                                                              [word_x, negative_end],
593
                                                              [positive_end]))
594
595
                    analogy['most_x'] = most_x
596
                    analogy['most_y'] = most_y
597
                    analogy['match'] = ((word_x == most_x)
598
                                        and (word_y == most_y))
599
600
                analogies.append(analogy)
601
602
        df = pd.DataFrame(analogies)
603
604
        columns = [positive_end, negative_end, 'distance', 'score']
605
606
        if unrestricted:
607
            columns.extend(['most_x', 'most_y', 'match'])
608
609
        df = df[columns]
610
611
        return df
612
613
    def calc_direct_bias(self, neutral_words, c=None):
614
        """Calculate the direct bias.
615
616
        Based on the projection of neutral words on the direction.
617
618
        :param list neutral_words: List of neutral words
619
        :param c: Strictness of bias measuring
620
        :type c: float or None
621
        :return: The direct bias
622
        """
623
624
        if c is None:
625
            c = 1
626
627
        projections = self._calc_projection_scores(neutral_words)['projection']
628
        direct_bias_terms = np.abs(projections) ** c
629
        direct_bias = direct_bias_terms.sum() / len(neutral_words)
630
631
        return direct_bias
632
633
    def calc_indirect_bias(self, word1, word2):
634
        """Calculate the indirect bias between two words.
635
636
        Based on the amount of shared projection of the words on the direction.
637
638
        Also called PairBias.
639
        :param str word1: First word
640
        :param str word2: Second word
641
        :type c: float or None
642
        :return The indirect bias between the two words
643
        """
644
645
        self._is_direction_identified()
646
647
        vector1 = normalize(self[word1])
648
        vector2 = normalize(self[word2])
649
650
        perpendicular_vector1 = reject_vector(vector1, self.direction)
651
        perpendicular_vector2 = reject_vector(vector2, self.direction)
652
653
        inner_product = vector1 @ vector2
654
        perpendicular_similarity = cosine_similarity(perpendicular_vector1,
655
                                                     perpendicular_vector2)
656
657
        indirect_bias = ((inner_product - perpendicular_similarity)
658
                         / inner_product)
659
        return indirect_bias
660
661
    def generate_closest_words_indirect_bias(self,
662
                                             neutral_positive_end,
663
                                             neutral_negative_end,
664
                                             words=None, n_extreme=5):
665
        """
666
        Generate closest words to a neutral direction and their indirect bias.
667
668
        The direction of the neutral words is used to find
669
        the most extreme words.
670
        The indirect bias is calculated between the most extreme words
671
        and the closest end.
672
673
        :param str neutral_positive_end: A word that define the positive side
674
                                         of the neutral direction.
675
        :param str neutral_negative_end: A word that define the negative side
676
                                         of the neutral direction.
677
        :param list words: List of words to project on the neutral direction.
678
        :param int n_extreme: The number for the most extreme words
679
                              (positive and negative) to show.
680
        :return: Data Frame of the most extreme words
681
                 with their projection scores and indirect biases.
682
        """
683
684
        neutral_direction = normalize(self[neutral_positive_end]
685
                                      - self[neutral_negative_end])
686
687
        vectors = [normalize(self[word]) for word in words]
688
        df = (pd.DataFrame([{'word': word,
689
                             'projection': vector @ neutral_direction}
690
                            for word, vector in zip(words, vectors)])
691
              .sort_values('projection', ascending=False))
692
693
        df = take_two_sides_extreme_sorted(df, n_extreme,
694
                                           'end',
695
                                           neutral_positive_end,
696
                                           neutral_negative_end)
697
698
        df['indirect_bias'] = df.apply(lambda r:
699
                                       self.calc_indirect_bias(r['word'],
700
                                                               r['end']),
701
                                       axis=1)
702
703
        df = df.set_index(['end', 'word'])
704
        df = df[['projection', 'indirect_bias']]
705
706
        return df
707
708
    def _extract_neutral_words(self, specific_words):
709
        extended_specific_words = set()
710
711
        # because or specific_full data was trained on partial word embedding
712
        for word in specific_words:
713
            extended_specific_words.add(word)
714
            extended_specific_words.add(word.lower())
715
            extended_specific_words.add(word.upper())
716
            extended_specific_words.add(word.title())
717
718
        neutral_words = [word for word in self.model.vocab
719
                         if word not in extended_specific_words]
720
721
        return neutral_words
722
723
    def _neutralize(self, neutral_words):
724
        self._is_direction_identified()
725
726
        if self._verbose:
727
            neutral_words_iter = tqdm(neutral_words)
728
        else:
729
            neutral_words_iter = iter(neutral_words)
730
731
        for word in neutral_words_iter:
732
            neutralized_vector = reject_vector(self[word],
733
                                               self.direction)
734
            update_word_vector(self.model, word, neutralized_vector)
735
736
        self.model.init_sims(replace=True)
737
738
    def _equalize(self, equality_sets):
739
        # pylint: disable=R0914
740
741
        self._is_direction_identified()
742
743
        if self._verbose:
744
            words_data = []
745
746
        for equality_set_index, equality_set_words in enumerate(equality_sets):
747
            equality_set_vectors = [normalize(self[word])
748
                                    for word in equality_set_words]
749
            center = np.mean(equality_set_vectors, axis=0)
750
            (projected_center,
751
             rejected_center) = project_reject_vector(center,
752
                                                      self.direction)
753
            scaling = np.sqrt(1 - np.linalg.norm(rejected_center)**2)
754
755
            for word, vector in zip(equality_set_words, equality_set_vectors):
756
                projected_vector = project_vector(vector, self.direction)
757
758
                projected_part = normalize(projected_vector - projected_center)
759
760
                # In the code it is different of Bolukbasi
761
                # It behaves the same only for equality_sets
762
                # with size of 2 (pairs) - not sure!
763
                # However, my code is the same as the article
764
                # equalized_vector = rejected_center + scaling * self.direction
765
                # https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/debias.py#L36-L37
766
                # For pairs, projected_part_vector1 == -projected_part_vector2,
767
                # and this is the same as
768
                # projected_part_vector1 == self.direction
769
                equalized_vector = rejected_center + scaling * projected_part
770
771
                update_word_vector(self.model, word, equalized_vector)
772
773
                if self._verbose:
774
                    words_data.append({
0 ignored issues
show
introduced by
The variable words_data does not seem to be defined in case self._verbose on line 743 is False. Are you sure this can never be the case?
Loading history...
775
                        'equality_set_index': equality_set_index,
776
                        'word': word,
777
                        'scaling': scaling,
778
                        'projected_scalar': vector @ self.direction,
779
                        'equalized_projected_scalar': (equalized_vector
780
                                                       @ self.direction),
781
                    })
782
783
        if self._verbose:
784
            print('Equalize Words Data '
785
                  '(all equal for 1-dim bias space (direction):')
786
            words_data_df = (pd.DataFrame(words_data)
787
                             .set_index(['equality_set_index', 'word']))
788
            print(tabulate(words_data_df, headers='keys'))
789
790
        self.model.init_sims(replace=True)
791
792
    def _generate_pair_candidates(self, pairs):
793
        # pylint: disable=line-too-long
794
        return {(candidate1, candidate2)
795
                for word1, word2 in pairs
796
                for candidate1, candidate2 in zip(generate_one_word_forms(word1),
797
                                                  generate_one_word_forms(word2))
798
                if candidate1 in self.model and candidate2 in self.model}
799
800
    def debias(self, method='hard', neutral_words=None, equality_sets=None,
801
               inplace=True):
802
        """Debias the word embedding.
803
804
        :param str method: The method of debiasing.
805
        :param list neutral_words: List of neutral words
806
                                   for the neutralize step
807
        :param list equality_sets: List of equality sets,
808
                                   for the equalize step.
809
                                   The sets represent the direction.
810
        :param bool inplace: Whether to debias the object inplace
811
                             or return a new one
812
813
        .. warning::
814
815
          After calling `debias`,
816
          all the vectors of the word embedding
817
          will be normalized to unit length.
818
819
        """
820
821
        # pylint: disable=W0212
822
        if inplace:
823
            bias_word_embedding = self
824
        else:
825
            bias_word_embedding = copy.deepcopy(self)
826
827
        if method not in DEBIAS_METHODS:
828
            raise ValueError('method should be one of {}, {} was given'.format(
829
                DEBIAS_METHODS, method))
830
831
        if method in ['hard', 'neutralize']:
832
            if self._verbose:
833
                print('Neutralize...')
834
            bias_word_embedding._neutralize(neutral_words)
835
836
        if method == 'hard':
837
            if self._verbose:
838
                print('Equalize...')
839
840
            assert all(len(equality_set) == 2
841
                       for equality_set in equality_sets), \
842
                   'Currently supporting only equality pairs.'
843
844
            equality_sets = self._generate_pair_candidates(equality_sets)
845
846
            bias_word_embedding._equalize(equality_sets)
847
848
        if inplace:
849
            return None
850
        else:
851
            return bias_word_embedding
852
853
    def evaluate_word_embedding(self,
854
                                kwargs_word_pairs=None,
855
                                kwargs_word_analogies=None):
856
        """
857
        Evaluate word pairs tasks and word analogies tasks.
858
859
        :param model: Word embedding.
860
        :param kwargs_word_pairs: Kwargs for
861
                                  evaluate_word_pairs
862
                                  method.
863
        :type kwargs_word_pairs: dict or None
864
        :param kwargs_word_analogies: Kwargs for
865
                                      evaluate_word_analogies
866
                                      method.
867
        :type evaluate_word_analogies: dict or None
868
        :return: Tuple of :class:`pandas.DataFrame`
869
                 for the evaluation results.
870
        """
871
872
        return evaluate_word_embedding(self.model,
873
                                       kwargs_word_pairs,
874
                                       kwargs_word_analogies)
875
876
    def learn_full_specific_words(self, seed_specific_words,
877
                                  max_non_specific_examples=None, debug=None):
878
        """Learn specific words given a list of seed specific wordsself.
879
880
        Using Linear SVM.
881
882
        :param list seed_specific_words: List of seed specific words
883
        :param int max_non_specific_examples: The number of non-specific words
884
                                              to sample for training
885
        :return: List of learned specific words and the classifier object
886
        """
887
888
        if debug is None:
889
            debug = False
890
891
        if max_non_specific_examples is None:
892
            max_non_specific_examples = MAX_NON_SPECIFIC_EXAMPLES
893
894
        data = []
895
        non_specific_example_count = 0
896
897
        for word in self.model.vocab:
898
            is_specific = word in seed_specific_words
899
900
            if not is_specific:
901
                non_specific_example_count += 1
902
                if non_specific_example_count <= max_non_specific_examples:
903
                    data.append((self[word], is_specific))
904
            else:
905
                data.append((self[word], is_specific))
906
907
        np.random.seed(RANDOM_STATE)
908
        np.random.shuffle(data)
909
910
        X, y = zip(*data)
911
912
        X = np.array(X)
913
        X /= np.linalg.norm(X, axis=1)[:, None]
914
915
        y = np.array(y).astype('int')
916
917
        clf = LinearSVC(C=1, class_weight='balanced',
918
                        random_state=RANDOM_STATE)
919
920
        clf.fit(X, y)
921
922
        full_specific_words = []
923
        for word in self.model.vocab:
924
            vector = [normalize(self[word])]
925
            if clf.predict(vector):
926
                full_specific_words.append(word)
927
928
        if not debug:
929
            return full_specific_words, clf
930
931
        return full_specific_words, clf, X, y
932
933
    def _plot_most_biased_one_cluster(self,
934
                                      most_biased_neutral_words, y_bias,
935
                                      random_state=1, ax=None):
936
        most_biased_vectors = [self.model[word]
937
                               for word in most_biased_neutral_words]
938
939
        return plot_clustering_as_classification(most_biased_vectors,
940
                                                 y_bias,
941
                                                 random_state=random_state,
942
                                                 ax=ax)
943
944
    def compute_factual_association(self, factual_properity):
945
        """Compute association of a factual property to the projection.
946
947
        Inspired by WEFAT (Word-Embedding Factual Association Test),
948
        but it is not the same:
949
        - Caliskan, A., Bryson, J. J., & Narayanan, A. (2017).
950
        `Semantics derived automatically
951
        from language corpora contain human-like biases
952
        <http://opus.bath.ac.uk/55288/>`_.
953
        Science, 356(6334), 183-186.
954
955
        In a future version, the WEFAT will also be implemented.
956
957
        If a word doesn't exist in the word embedding,
958
        then it will be filtered out.
959
960
        For example, in :class:`responsibly.we.bias.GenderBiasWE`,
961
        the defuat factual property is the percentage of female
962
        in various occupations
963
        from the Labor Force Statistics of 2017 Population Survey,
964
        Taken from: https://arxiv.org/abs/1804.06876
965
966
        :param dict factual_properity: Dictionary of words
967
                                       and their factual values.
968
        :return: Pearson r, pvalue and the words with their
969
                 associated factual values
970
                 and their projection on the bias direction.
971
        """
972
973
        points = {word: (value, self.project_on_direction(word))
974
                  for word, value in factual_properity.items()
975
                  if word in self.model}
976
977
        x, y = zip(*points.values())
978
979
        return pearsonr(x, y), points
980
981
    def plot_factual_association(self, factual_properity, ax=None):
982
        """Plot association of a factual property to the projection.
983
984
        See: :meth:`BiasWordEmbedding.compute_factual_association`
985
986
        :param dict factual_properity: Dictionary of words
987
                                       and their factual values.
988
        """
989
990
        result = self.compute_factual_association(factual_properity)
991
992
        (r, pvalue), points = result
993
        x, y = zip(*points.values())
994
995
        if ax is None:
996
            _, ax = plt.subplots(1)
997
998
        ax.scatter(x, y)
999
1000
        plt.title('Assocsion between Factual Property'
1001
                  'and Projection on Direction '
1002
                  '(Pearson R = {:0.2f} ; pvalue={:0.2f})'
1003
                  .format(r, pvalue))
1004
1005
        plt.xlabel('Factual Property')
1006
        plt.ylabel('Projection on Direction')
1007
1008
        return ax
1009
1010
    @staticmethod
1011
    def plot_most_biased_clustering(biased, debiased,
1012
                                    seed='ends', n_extreme=500,
1013
                                    random_state=1):
1014
        """Plot clustering as classification of biased neutral words.
1015
1016
        :param biased: Biased word embedding of
1017
                       :class:`~responsibly.we.bias.BiasWordEmbedding`.
1018
        :param debiased: Debiased word embedding of
1019
                         :class:`~responsibly.we.bias.BiasWordEmbedding`.
1020
        :param seed: The definition of the seed vector.
1021
                    Either by a tuple of two word ends,
1022
                    or by `'ends` for the pre-defined ends
1023
                    or by `'direction'` for
1024
                    the pre-defined direction vector.
1025
        :param n_extrem: The number of extreme biased
1026
                         neutral words to use.
1027
        :return: Tuple of list of ax objects of the plot,
1028
                 and a dictionary with the most positive
1029
                 and negative words.
1030
1031
        Based on:
1032
1033
        - Gonen, H., & Goldberg, Y. (2019).
1034
          `Lipstick on a Pig:
1035
          Debiasing Methods Cover up Systematic Gender Biases
1036
          in Word Embeddings But do not Remove
1037
          Them <https://arxiv.org/abs/1903.03862>`_.
1038
          arXiv preprint arXiv:1903.03862.
1039
1040
        - https://github.com/gonenhila/gender_bias_lipstick
1041
        """
1042
        # pylint: disable=protected-access,too-many-locals,line-too-long
1043
1044
        assert biased.positive_end == debiased.positive_end, \
1045
            'Postive ends should be the same.'
1046
        assert biased.negative_end == debiased.negative_end, \
1047
            'Negative ends should be the same.'
1048
1049
        seed_vector, _, _ = get_seed_vector(seed, biased)
1050
1051
        neutral_words = biased._data['neutral_words']
1052
        neutral_word_vectors = (biased[word] for word in neutral_words)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable word does not seem to be defined.
Loading history...
1053
        neutral_word_projections = [(normalize(vector) @ seed_vector, word)
1054
                                    for word, vector
1055
                                    in zip(neutral_words,
1056
                                           neutral_word_vectors)]
1057
1058
        neutral_word_projections.sort()
1059
1060
        _, most_negative_words = zip(*neutral_word_projections[:n_extreme])
1061
        _, most_positive_words = zip(*neutral_word_projections[-n_extreme:])
1062
1063
        most_biased_neutral_words = most_negative_words + most_positive_words
1064
1065
        y_bias = [False] * n_extreme + [True] * n_extreme
1066
1067
        _, axes = plt.subplots(1, 2, figsize=(20, 5))
1068
1069
        acc_biased = biased._plot_most_biased_one_cluster(most_biased_neutral_words,
1070
                                                          y_bias,
1071
                                                          random_state=random_state,
1072
                                                          ax=axes[0])
1073
        axes[0].set_title('Biased - Accuracy={}'.format(acc_biased))
1074
1075
        acc_debiased = debiased._plot_most_biased_one_cluster(most_biased_neutral_words,
1076
                                                              y_bias,
1077
                                                              random_state=random_state,
1078
                                                              ax=axes[1])
1079
        axes[1].set_title('Debiased - Accuracy={}'.format(acc_debiased))
1080
1081
        return axes, {biased.positive_end: most_positive_words,
1082
                      biased.negative_end: most_negative_words}
1083
1084
1085
class GenderBiasWE(BiasWordEmbedding):
1086
    """Measure and adjust the Gender Bias in English Word Embedding.
1087
1088
    :param model: Word embedding model of ``gensim.model.KeyedVectors``
1089
    :param bool only_lower: Whether the word embedding contrains
1090
                            only lower case words
1091
    :param bool verbose: Set verbosity
1092
    :param str identify_direction: Set the method of identifying
1093
                                   the gender direction:
1094
                                   `'single'`, `'sum'` or `'pca'`.
1095
    :param bool to_normalize: Whether to normalize all the vectors
1096
                              (recommended!)
1097
    """
1098
1099
    def __init__(self, model, only_lower=False, verbose=False,
1100
                 identify_direction='pca', to_normalize=True):
1101
        super().__init__(model=model,
1102
                         only_lower=only_lower,
1103
                         verbose=verbose,
1104
                         to_normalize=True)
1105
        self._initialize_data()
1106
1107
        if identify_direction:
1108
            definitional = None
1109
1110
            if identify_direction == 'single':
1111
                definitional = ('she', 'he')
1112
            elif identify_direction == 'sum':
1113
                definitional = list(zip(*self._data['definitional_pairs']))
1114
            elif identify_direction == 'pca':
1115
                definitional = self._data['definitional_pairs']
1116
1117
            self._identify_direction('she', 'he',
1118
                                     definitional,
1119
                                     identify_direction)
1120
1121
    def _initialize_data(self):
1122
        self._data = copy.deepcopy(BOLUKBASI_DATA['gender'])
1123
1124
        if not self.only_lower:
1125
            self._data['specific_full_with_definitional_equalize'] = \
1126
                generate_words_forms(self
1127
                                     ._data['specific_full_with_definitional_equalize'])  # pylint: disable=C0301
1128
1129
        for key in self._data['word_group_keys']:
1130
            self._data[key] = (self._filter_words_by_model(self
1131
                                                           ._data[key]))
1132
1133
        self._data['neutral_words'] = self._extract_neutral_words(self
1134
                                                                  ._data['specific_full_with_definitional_equalize'])  # pylint: disable=C0301
1135
        self._data['neutral_words'].sort()
1136
        self._data['word_group_keys'].append('neutral_words')
1137
1138
    def plot_projection_scores(self, words='professions', n_extreme=10,
1139
                               ax=None, axis_projection_step=None):
1140
        if words == 'professions':
1141
            words = self._data['profession_names']
1142
1143
        return super().plot_projection_scores(words, n_extreme,
1144
                                              ax, axis_projection_step)
1145
1146
    def plot_dist_projections_on_direction(self, word_groups='bolukbasi',
1147
                                           ax=None):
1148
        if word_groups == 'bolukbasi':
1149
            word_groups = {key: self._data[key]
1150
                           for key in self._data['word_group_keys']}
1151
1152
        return super().plot_dist_projections_on_direction(word_groups, ax)
1153
1154
    @classmethod
1155
    def plot_bias_across_word_embeddings(cls, word_embedding_bias_dict,
1156
                                         ax=None, scatter_kwargs=None):
1157
        # pylint: disable=W0221
1158
        words = BOLUKBASI_DATA['gender']['neutral_profession_names']
1159
        # TODO: is it correct for inheritance of class method?
1160
        super(cls, cls).plot_bias_across_word_embeddings(word_embedding_bias_dict,  # pylint: disable=C0301
1161
                                                         words,
1162
                                                         ax,
1163
                                                         scatter_kwargs)
1164
1165
    def calc_direct_bias(self, neutral_words='professions', c=None):
1166
        if isinstance(neutral_words, str) and neutral_words == 'professions':
1167
            return super().calc_direct_bias(
1168
                self._data['neutral_profession_names'], c)
1169
        else:
1170
            return super().calc_direct_bias(neutral_words)
1171
1172
    def generate_closest_words_indirect_bias(self,
1173
                                             neutral_positive_end,
1174
                                             neutral_negative_end,
1175
                                             words='professions', n_extreme=5):
1176
        # pylint: disable=C0301
1177
1178
        if words == 'professions':
1179
            words = self._data['profession_names']
1180
1181
        return super().generate_closest_words_indirect_bias(neutral_positive_end,
1182
                                                            neutral_negative_end,
1183
                                                            words,
1184
                                                            n_extreme=n_extreme)
1185
1186
    def debias(self, method='hard', neutral_words=None, equality_sets=None,
1187
               inplace=True):
1188
        # pylint: disable=line-too-long
1189
        if method in ['hard', 'neutralize']:
1190
            if neutral_words is None:
1191
                neutral_words = self._data['neutral_words']
1192
1193
        if method == 'hard' and equality_sets is None:
1194
            equality_sets = {tuple(w) for w in self._data['equalize_pairs']}
1195
            equality_sets |= {tuple(w) for w in self._data['definitional_pairs']}
1196
1197
        return super().debias(method, neutral_words, equality_sets,
1198
                              inplace)
1199
1200
    def learn_full_specific_words(self, seed_specific_words='bolukbasi',
1201
                                  max_non_specific_examples=None,
1202
                                  debug=None):
1203
        if seed_specific_words == 'bolukbasi':
1204
            seed_specific_words = self._data['specific_seed']
1205
1206
        return super().learn_full_specific_words(seed_specific_words,
1207
                                                 max_non_specific_examples,
1208
                                                 debug)
1209
1210
    def compute_factual_association(self,
1211
                                    factual_properity=OCCUPATION_FEMALE_PRECENTAGE):  # pylint: disable=line-too-long
1212
        return super().compute_factual_association(factual_properity)
1213
1214
    def plot_factual_association(self,
1215
                                 factual_properity=OCCUPATION_FEMALE_PRECENTAGE,  # pylint: disable=line-too-long
1216
                                 ax=None):
1217
        return super().plot_factual_association(factual_properity, ax)
1218