Passed
Push — master ( deaccc...bef728 )
by Shlomi
01:50
created

ethically.we.bias.GenderBiasWE.debias()   A

Complexity

Conditions 5

Size

Total Lines 13
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 10
dl 0
loc 13
rs 9.3333
c 0
b 0
f 0
cc 5
nop 5
1
# pylint: disable=too-many-lines
2
"""
3
Measuring and adjusting bias in word embedding by Bolukbasi (2016).
4
5
References:
6
    - Bolukbasi, T., Chang, K. W., Zou, J. Y., Saligrama, V.,
7
      & Kalai, A. T. (2016).
8
      `Man is to computer programmer as woman is to homemaker?
9
      debiasing word embeddings <https://arxiv.org/abs/1607.06520>`_.
10
      In Advances in neural information processing systems
11
      (pp. 4349-4357).
12
13
    - The code and data is based on the GitHub repository:
14
      https://github.com/tolga-b/debiaswe (MIT License).
15
16
    - Gonen, H., & Goldberg, Y. (2019).
17
      `Lipstick on a Pig:
18
      Debiasing Methods Cover up Systematic Gender Biases
19
      in Word Embeddings But do not Remove Them
20
      <https://arxiv.org/abs/1903.03862>`_.
21
      arXiv preprint arXiv:1903.03862.
22
23
    - Nissim, M., van Noord, R., van der Goot, R. (2019).
24
      `Fair is Better than Sensational: Man is to Doctor
25
      as Woman is to Doctor <https://arxiv.org/abs/1905.09866>`_.
26
27
Usage
28
~~~~~
29
30
.. code:: python
31
32
   >>> from ethically.we import GenderBiasWE
33
   >>> from gensim import downloader
34
   >>> w2v_model = downloader.load('word2vec-google-news-300')
35
   >>> w2v_gender_bias_we = GenderBiasWE(w2v_model)
36
   >>> w2v_gender_bias_we.calc_direct_bias()
37
   0.07307904249481942
38
   >>> w2v_gender_bias_we.debias()
39
   >>> w2v_gender_bias_we.calc_direct_bias()
40
   1.7964246601064155e-09
41
42
Types of Bias
43
~~~~~~~~~~~~~
44
45
Direct Bias
46
^^^^^^^^^^^
47
48
1. Associations
49
    Words that are closer to one end (e.g., *he*) than to
50
    the other end (*she*).
51
    For example, occupational stereotypes (page 7).
52
    Calculated by
53
    :meth:`~ethically.we.bias.BiasWordEmbedding.calc_direct_bias`.
54
55
2. Analogies
56
    Analogies of *he:x::she:y*.
57
    For example analogies exhibiting stereotypes (page 7).
58
    Generated by
59
    :meth:`~ethically.we.bias.BiasWordEmbedding.generate_analogies`.
60
61
62
Indirect Bias
63
^^^^^^^^^^^^^
64
65
Projection of a neutral words into a two neutral words direction
66
is explained in a great portion by a shared bias direction projection.
67
68
Calculated by
69
:meth:`~ethically.we.bias.BiasWordEmbedding.calc_indirect_bias`
70
and
71
:meth:`~ethically.we.bias.GenderBiasWE.generate_closest_words_indirect_bias`.
72
73
"""
74
75
import copy
76
import warnings
77
78
import matplotlib.pylab as plt
79
import numpy as np
80
import pandas as pd
81
import seaborn as sns
82
from scipy.stats import pearsonr, spearmanr
83
from sklearn.decomposition import PCA
84
from sklearn.metrics.pairwise import euclidean_distances
85
from sklearn.svm import LinearSVC
86
from tqdm import tqdm
87
88
from ethically.consts import RANDOM_STATE
89
from ethically.utils import _warning_setup
90
from ethically.we.benchmark import evaluate_word_embedding
91
from ethically.we.data import BOLUKBASI_DATA, OCCUPATION_FEMALE_PRECENTAGE
92
from ethically.we.utils import (
93
    assert_gensim_keyed_vectors, cosine_similarity, generate_one_word_forms,
94
    generate_words_forms, get_seed_vector, most_similar, normalize,
95
    plot_clustering_as_classification, project_params, project_reject_vector,
96
    project_vector, reject_vector, round_to_extreme,
97
    take_two_sides_extreme_sorted, update_word_vector,
98
)
99
from tabulate import tabulate
100
101
102
DIRECTION_METHODS = ['single', 'sum', 'pca']
103
DEBIAS_METHODS = ['neutralize', 'hard', 'soft']
104
FIRST_PC_THRESHOLD = 0.5
105
MAX_NON_SPECIFIC_EXAMPLES = 1000
106
107
__all__ = ['GenderBiasWE', 'BiasWordEmbedding']
108
109
_warning_setup()
110
111
112
class BiasWordEmbedding:
113
    """Measure and adjust a bias in English word embedding.
114
115
    :param model: Word embedding model of ``gensim.model.KeyedVectors``
116
    :param bool only_lower: Whether the word embedding contrains
117
                            only lower case words
118
    :param bool verbose: Set verbosity
119
    :param bool to_normalize: Whether to normalize all the vectors
120
                              (recommended!)
121
    """
122
123
    def __init__(self, model, only_lower=False, verbose=False,
124
                 identify_direction=False, to_normalize=True):
125
        assert_gensim_keyed_vectors(model)
126
127
        # TODO: this is bad Python, ask someone about it
128
        # probably should be a better design
129
        # identify_direction doesn't have any meaning
130
        # for the class BiasWordEmbedding
131
        # The goal is to force this interfeace of sub-classes.
132
        if self.__class__ == __class__ and identify_direction is not False:
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable __class__ does not seem to be defined.
Loading history...
133
            raise ValueError('identify_direction must be False'
134
                             ' for an instance of {}'
135
                             .format(__class__))
136
137
        self.model = model
138
139
        # TODO: write unitest for when it is False
140
        self.only_lower = only_lower
141
142
        self._verbose = verbose
143
144
        self.direction = None
145
        self.positive_end = None
146
        self.negative_end = None
147
148
        if to_normalize:
149
            self.model.init_sims(replace=True)
150
151
    def __copy__(self):
152
        bias_word_embedding = self.__class__(self.model,
153
                                             self.only_lower,
154
                                             self._verbose,
155
                                             identify_direction=False)
156
        bias_word_embedding.direction = copy.deepcopy(self.direction)
157
        bias_word_embedding.positive_end = copy.deepcopy(self.positive_end)
158
        bias_word_embedding.negative_end = copy.deepcopy(self.negative_end)
159
        return bias_word_embedding
160
161
    def __deepcopy__(self, memo):
162
        bias_word_embedding = copy.copy(self)
163
        bias_word_embedding.model = copy.deepcopy(bias_word_embedding.model)
164
        return bias_word_embedding
165
166
    def __getitem__(self, key):
167
        return self.model[key]
168
169
    def __contains__(self, item):
170
        return item in self.model
171
172
    def _filter_words_by_model(self, words):
173
        return [word for word in words if word in self]
174
175
    def _is_direction_identified(self):
176
        if self.direction is None:
177
            raise RuntimeError('The direction was not identified'
178
                               ' for this {} instance'
179
                               .format(self.__class__.__name__))
180
181
    # There is a mistake in the article
182
    # it is written (section 5.1):
183
    # "To identify the gender subspace, we took the ten gender pair difference
184
    # vectors and computed its principal components (PCs)"
185
    # however in the source code:
186
    # https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/we.py#L235-L245
187
    def _identify_subspace_by_pca(self, definitional_pairs, n_components):
188
        matrix = []
189
190
        for word1, word2 in definitional_pairs:
191
            vector1 = normalize(self[word1])
192
            vector2 = normalize(self[word2])
193
194
            center = (vector1 + vector2) / 2
195
196
            matrix.append(vector1 - center)
197
            matrix.append(vector2 - center)
198
199
        pca = PCA(n_components=n_components)
200
        pca.fit(matrix)
201
202
        if self._verbose:
203
            table = enumerate(pca.explained_variance_ratio_, start=1)
204
            headers = ['Principal Component',
205
                       'Explained Variance Ratio']
206
            print(tabulate(table, headers=headers))
207
208
        return pca
209
210
    # TODO: add the SVD method from section 6 step 1
211
    # It seems there is a mistake there, I think it is the same as PCA
212
    # just with replacing it with SVD
213
    def _identify_direction(self, positive_end, negative_end,
214
                            definitional, method='pca'):
215
        if method not in DIRECTION_METHODS:
216
            raise ValueError('method should be one of {}, {} was given'.format(
217
                DIRECTION_METHODS, method))
218
219
        if positive_end == negative_end:
220
            raise ValueError('positive_end and negative_end'
221
                             'should be different, and not the same "{}"'
222
                             .format(positive_end))
223
        if self._verbose:
224
            print('Identify direction using {} method...'.format(method))
225
226
        direction = None
227
228
        if method == 'single':
229
            direction = normalize(normalize(self[definitional[0]])
230
                                  - normalize(self[definitional[1]]))
231
232
        elif method == 'sum':
233
            group1_sum_vector = np.sum([self[word]
234
                                        for word in definitional[0]], axis=0)
235
            group2_sum_vector = np.sum([self[word]
236
                                        for word in definitional[1]], axis=0)
237
238
            diff_vector = (normalize(group1_sum_vector)
239
                           - normalize(group2_sum_vector))
240
241
            direction = normalize(diff_vector)
242
243
        elif method == 'pca':
244
            pca = self._identify_subspace_by_pca(definitional, 10)
245
            if pca.explained_variance_ratio_[0] < FIRST_PC_THRESHOLD:
246
                raise RuntimeError('The Explained variance'
247
                                   'of the first principal component should be'
248
                                   'at least {}, but it is {}'
249
                                   .format(FIRST_PC_THRESHOLD,
250
                                           pca.explained_variance_ratio_[0]))
251
            direction = pca.components_[0]
252
253
            # if direction is opposite (e.g. we cannot control
254
            # what the PCA will return)
255
            ends_diff_projection = cosine_similarity((self[positive_end]
256
                                                      - self[negative_end]),
257
                                                     direction)
258
            if ends_diff_projection < 0:
259
                direction = -direction  # pylint: disable=invalid-unary-operand-type
260
261
        self.direction = direction
262
        self.positive_end = positive_end
263
        self.negative_end = negative_end
264
265
    def project_on_direction(self, word):
266
        """Project the normalized vector of the word on the direction.
267
268
        :param str word: The word tor project
269
        :return float: The projection scalar
270
        """
271
272
        self._is_direction_identified()
273
274
        vector = self[word]
275
        projection_score = self.model.cosine_similarities(self.direction,
276
                                                          [vector])[0]
277
        return projection_score
278
279
    def _calc_projection_scores(self, words):
280
        self._is_direction_identified()
281
282
        df = pd.DataFrame({'word': words})
283
284
        # TODO: maybe using cosine_similarities on all the vectors?
285
        # it might be faster
286
        df['projection'] = df['word'].apply(self.project_on_direction)
287
        df = df.sort_values('projection', ascending=False)
288
289
        return df
290
291
    def calc_projection_data(self, words):
292
        """
293
        Calculate projection, projected and rejected vectors of a words list.
294
295
        :param list words: List of words
296
        :return: :class:`pandas.DataFrame` of the projection,
297
                 projected and rejected vectors of the words list
298
        """
299
        projection_data = []
300
        for word in words:
301
            vector = self[word]
302
            projection = self.project_on_direction(word)
303
            normalized_vector = normalize(vector)
304
305
            (projection,
306
             projected_vector,
307
             rejected_vector) = project_params(normalized_vector,
308
                                               self.direction)
309
310
            projection_data.append({'word': word,
311
                                    'vector': vector,
312
                                    'projection': projection,
313
                                    'projected_vector': projected_vector,
314
                                    'rejected_vector': rejected_vector})
315
316
        return pd.DataFrame(projection_data)
317
318
    def plot_projection_scores(self, words, n_extreme=10,
319
                               ax=None, axis_projection_step=None):
320
        """Plot the projection scalar of words on the direction.
321
322
        :param list words: The words tor project
323
        :param int or None n_extreme: The number of extreme words to show
324
        :return: The ax object of the plot
325
        """
326
327
        self._is_direction_identified()
328
329
        projections_df = self._calc_projection_scores(words)
330
        projections_df['projection'] = projections_df['projection'].round(2)
331
332
        if n_extreme is not None:
333
            projections_df = take_two_sides_extreme_sorted(projections_df,
334
                                                           n_extreme=n_extreme)
335
336
        if ax is None:
337
            _, ax = plt.subplots(1)
338
339
        if axis_projection_step is None:
340
            axis_projection_step = 0.1
341
342
        cmap = plt.get_cmap('RdBu')
343
        projections_df['color'] = ((projections_df['projection'] + 0.5)
344
                                   .apply(cmap))
345
346
        most_extream_projection = (projections_df['projection']
347
                                   .abs()
348
                                   .max()
349
                                   .round(1))
350
351
        sns.barplot(x='projection', y='word', data=projections_df,
352
                    palette=projections_df['color'])
353
354
        plt.xticks(np.arange(-most_extream_projection,
355
                             most_extream_projection + axis_projection_step,
356
                             axis_projection_step))
357
        plt.title('← {} {} {} →'.format(self.negative_end,
358
                                        ' ' * 20,
359
                                        self.positive_end))
360
361
        plt.xlabel('Direction Projection')
362
        plt.ylabel('Words')
363
364
        return ax
365
366
    def plot_dist_projections_on_direction(self, word_groups, ax=None):
367
        """Plot the projection scalars distribution on the direction.
368
369
        :param dict word_groups word: The groups to projects
370
        :return float: The ax object of the plot
371
        """
372
373
        if ax is None:
374
            _, ax = plt.subplots(1)
375
376
        names = sorted(word_groups.keys())
377
378
        for name in names:
379
            words = word_groups[name]
380
            label = '{} (#{})'.format(name, len(words))
381
            vectors = [self[word] for word in words]
382
            projections = self.model.cosine_similarities(self.direction,
383
                                                         vectors)
384
            sns.distplot(projections, hist=False, label=label, ax=ax)
385
386
        plt.axvline(0, color='k', linestyle='--')
387
388
        plt.title('← {} {} {} →'.format(self.negative_end,
389
                                        ' ' * 20,
390
                                        self.positive_end))
391
        plt.xlabel('Direction Projection')
392
        plt.ylabel('Density')
393
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
394
395
        return ax
396
397
    @classmethod
398
    def _calc_bias_across_word_embeddings(cls,
399
                                          word_embedding_bias_dict,
400
                                          words):
401
        """
402
        Calculate to projections and rho of words for two word embeddings.
403
404
        :param dict word_embedding_bias_dict: ``WordsEmbeddingBias`` objects
405
                                               as values,
406
                                               and their names as keys.
407
        :param list words: Words to be projected.
408
        :return tuple: Projections and spearman rho.
409
        """
410
        # pylint: disable=W0212
411
        assert len(word_embedding_bias_dict) == 2, 'Support only in two'\
412
                                                    'word embeddings'
413
414
        intersection_words = [word for word in words
415
                              if all(word in web
416
                                     for web in (word_embedding_bias_dict
417
                                                 .values()))]
418
419
        projections = {name: web._calc_projection_scores(intersection_words)['projection']  # pylint: disable=C0301
420
                       for name, web in word_embedding_bias_dict.items()}
421
422
        df = pd.DataFrame(projections)
423
        df.index = intersection_words
424
425
        rho, _ = spearmanr(*df.transpose().values)
426
        return df, rho
427
428
    @classmethod
429
    def plot_bias_across_word_embeddings(cls, word_embedding_bias_dict,
430
                                         words, ax=None, scatter_kwargs=None):
431
        """
432
        Plot the projections of same words of two word mbeddings.
433
434
        :param dict word_embedding_bias_dict: ``WordsEmbeddingBias`` objects
435
                                               as values,
436
                                               and their names as keys.
437
        :param list words: Words to be projected.
438
        :param scatter_kwargs: Kwargs for matplotlib.pylab.scatter.
439
        :type scatter_kwargs: dict or None
440
        :return: The ax object of the plot
441
        """
442
        # pylint: disable=W0212
443
444
        df, rho = cls._calc_bias_across_word_embeddings(word_embedding_bias_dict,  # pylint: disable=C0301
445
                                                        words)
446
447
        if ax is None:
448
            _, ax = plt.subplots(1)
449
450
        if scatter_kwargs is None:
451
            scatter_kwargs = {}
452
453
        name1, name2 = word_embedding_bias_dict.keys()
454
455
        ax.scatter(x=name1, y=name2, data=df, **scatter_kwargs)
456
457
        plt.title('Bias Across Word Embeddings'
458
                  '(Spearman Rho = {:0.2f})'.format(rho))
459
460
        negative_end = word_embedding_bias_dict[name1].negative_end
461
        positive_end = word_embedding_bias_dict[name1].positive_end
462
        plt.xlabel('← {}     {}     {} →'.format(negative_end,
463
                                                 name1,
464
                                                 positive_end))
465
        plt.ylabel('← {}     {}     {} →'.format(negative_end,
466
                                                 name2,
467
                                                 positive_end))
468
469
        ax_min = round_to_extreme(df.values.min())
470
        ax_max = round_to_extreme(df.values.max())
471
        plt.xlim(ax_min, ax_max)
472
        plt.ylim(ax_min, ax_max)
473
474
        return ax
475
476
    # TODO: refactor for speed and clarity
477
    def generate_analogies(self, n_analogies=100, seed='ends',
478
                           multiple=False,
479
                           delta=1., restrict_vocab=30000,
480
                           unrestricted=False):
481
        """
482
        Generate analogies based on a seed vector.
483
484
        x - y ~ seed vector.
485
        or a:x::b:y when a-b ~ seed vector.
486
487
        The seed vector can be defined by two word ends,
488
        or by the bias direction.
489
490
        ``delta`` is used for semantically coherent. Default vale of 1
491
        corresponds to an angle <= pi/3.
492
493
494
        There is criticism regarding generating analogies
495
        when used with `unstricted=False` and not ignoring analogies
496
        with `match` column equal to `False`.
497
        Tolga's technique of generating analogies, as implemented in this
498
        method, is limited inherently to analogies with x != y, which may
499
        be force "fake" bias analogies.
500
501
        See:
502
503
        - Nissim, M., van Noord, R., van der Goot, R. (2019).
504
          `Fair is Better than Sensational: Man is to Doctor
505
          as Woman is to Doctor <https://arxiv.org/abs/1905.09866>`_.
506
507
        :param seed: The definition of the seed vector.
508
                     Either by a tuple of two word ends,
509
                     or by `'ends` for the pre-defined ends
510
                     or by `'direction'` for the pre-defined direction vector.
511
        :param int n_analogies: Number of analogies to generate.
512
        :param bool multiple: Whether to allow multiple appearances of a word
513
                              in the analogies.
514
        :param float delta: Threshold for semantic similarity.
515
                            The maximal distance between x and y.
516
        :param int restrict_vocab: The vocabulary size to use.
517
        :param bool unrestricted: Whether to validate the generated analogies
518
                                  with unrestricted `most_similar`.
519
        :return: Data Frame of analogies (x, y), their distances,
520
                 and their cosine similarity scores
521
        """
522
        # pylint: disable=C0301,R0914
523
524
        if not unrestricted:
525
            warnings.warn('Not Using unrestricted most_similar '
526
                          'may introduce fake biased analogies.')
527
528
        (seed_vector,
529
         positive_end,
530
         negative_end) = get_seed_vector(seed, self)
531
532
        restrict_vocab_vectors = self.model.vectors[:restrict_vocab]
533
534
        normalized_vectors = (restrict_vocab_vectors
535
                              / np.linalg.norm(restrict_vocab_vectors, axis=1)[:, None])
536
537
        pairs_distances = euclidean_distances(normalized_vectors, normalized_vectors)
538
539
        # `pairs_distances` must be not-equal to zero
540
        # otherwise, x-y will be the zero vector, and every cosine similarity
541
        # will be equal to zero.
542
        # This cause to the **limitation** of this method which enforce a not-same
543
        # words for x and y.
544
        pairs_mask = (pairs_distances < delta) & (pairs_distances != 0)
545
546
        pairs_indices = np.array(np.nonzero(pairs_mask)).T
547
        x_vectors = np.take(normalized_vectors, pairs_indices[:, 0], axis=0)
548
        y_vectors = np.take(normalized_vectors, pairs_indices[:, 1], axis=0)
549
550
        x_minus_y_vectors = x_vectors - y_vectors
551
        normalized_x_minus_y_vectors = (x_minus_y_vectors
552
                                        / np.linalg.norm(x_minus_y_vectors, axis=1)[:, None])
553
554
        cos_distances = normalized_x_minus_y_vectors @ seed_vector
555
556
        sorted_cos_distances_indices = np.argsort(cos_distances)[::-1]
557
558
        sorted_cos_distances_indices_iter = iter(sorted_cos_distances_indices)
559
560
        analogies = []
561
        generated_words_x = set()
562
        generated_words_y = set()
563
564
        while len(analogies) < n_analogies:
565
            cos_distance_index = next(sorted_cos_distances_indices_iter)
566
            paris_index = pairs_indices[cos_distance_index]
567
            word_x, word_y = [self.model.index2word[index]
568
                              for index in paris_index]
569
570
            if multiple or (not multiple
571
                            and (word_x not in generated_words_x
572
                                 and word_y not in generated_words_y)):
573
574
                analogy = ({positive_end: word_x,
575
                            negative_end: word_y,
576
                            'score': cos_distances[cos_distance_index],
577
                            'distance': pairs_distances[tuple(paris_index)]})
578
579
                generated_words_x.add(word_x)
580
                generated_words_y.add(word_y)
581
582
                if unrestricted:
583
                    most_x = next(word
584
                                  for word, _ in most_similar(self.model,
585
                                                              [word_y, positive_end],
586
                                                              [negative_end]))
587
                    most_y = next(word
588
                                  for word, _ in most_similar(self.model,
589
                                                              [word_x, negative_end],
590
                                                              [positive_end]))
591
592
                    analogy['most_x'] = most_x
593
                    analogy['most_y'] = most_y
594
                    analogy['match'] = ((word_x == most_x)
595
                                        and (word_y == most_y))
596
597
                analogies.append(analogy)
598
599
        df = pd.DataFrame(analogies)
600
601
        columns = [positive_end, negative_end, 'distance', 'score']
602
603
        if unrestricted:
604
            columns.extend(['most_x', 'most_y', 'match'])
605
606
        df = df[columns]
607
608
        return df
609
610
    def calc_direct_bias(self, neutral_words, c=None):
611
        """Calculate the direct bias.
612
613
        Based on the projection of neutral words on the direction.
614
615
        :param list neutral_words: List of neutral words
616
        :param c: Strictness of bias measuring
617
        :type c: float or None
618
        :return: The direct bias
619
        """
620
621
        if c is None:
622
            c = 1
623
624
        projections = self._calc_projection_scores(neutral_words)['projection']
625
        direct_bias_terms = np.abs(projections) ** c
626
        direct_bias = direct_bias_terms.sum() / len(neutral_words)
627
628
        return direct_bias
629
630
    def calc_indirect_bias(self, word1, word2):
631
        """Calculate the indirect bias between two words.
632
633
        Based on the amount of shared projection of the words on the direction.
634
635
        Also called PairBias.
636
        :param str word1: First word
637
        :param str word2: Second word
638
        :type c: float or None
639
        :return The indirect bias between the two words
640
        """
641
642
        self._is_direction_identified()
643
644
        vector1 = normalize(self[word1])
645
        vector2 = normalize(self[word2])
646
647
        perpendicular_vector1 = reject_vector(vector1, self.direction)
648
        perpendicular_vector2 = reject_vector(vector2, self.direction)
649
650
        inner_product = vector1 @ vector2
651
        perpendicular_similarity = cosine_similarity(perpendicular_vector1,
652
                                                     perpendicular_vector2)
653
654
        indirect_bias = ((inner_product - perpendicular_similarity)
655
                         / inner_product)
656
        return indirect_bias
657
658
    def generate_closest_words_indirect_bias(self,
659
                                             neutral_positive_end,
660
                                             neutral_negative_end,
661
                                             words=None, n_extreme=5):
662
        """
663
        Generate closest words to a neutral direction and their indirect bias.
664
665
        The direction of the neutral words is used to find
666
        the most extreme words.
667
        The indirect bias is calculated between the most extreme words
668
        and the closest end.
669
670
        :param str neutral_positive_end: A word that define the positive side
671
                                         of the neutral direction.
672
        :param str neutral_negative_end: A word that define the negative side
673
                                         of the neutral direction.
674
        :param list words: List of words to project on the neutral direction.
675
        :param int n_extreme: The number for the most extreme words
676
                              (positive and negative) to show.
677
        :return: Data Frame of the most extreme words
678
                 with their projection scores and indirect biases.
679
        """
680
681
        neutral_direction = normalize(self[neutral_positive_end]
682
                                      - self[neutral_negative_end])
683
684
        vectors = [normalize(self[word]) for word in words]
685
        df = (pd.DataFrame([{'word': word,
686
                             'projection': vector @ neutral_direction}
687
                            for word, vector in zip(words, vectors)])
688
              .sort_values('projection', ascending=False))
689
690
        df = take_two_sides_extreme_sorted(df, n_extreme,
691
                                           'end',
692
                                           neutral_positive_end,
693
                                           neutral_negative_end)
694
695
        df['indirect_bias'] = df.apply(lambda r:
696
                                       self.calc_indirect_bias(r['word'],
697
                                                               r['end']),
698
                                       axis=1)
699
700
        df = df.set_index(['end', 'word'])
701
        df = df[['projection', 'indirect_bias']]
702
703
        return df
704
705
    def _extract_neutral_words(self, specific_words):
706
        extended_specific_words = set()
707
708
        # because or specific_full data was trained on partial word embedding
709
        for word in specific_words:
710
            extended_specific_words.add(word)
711
            extended_specific_words.add(word.lower())
712
            extended_specific_words.add(word.upper())
713
            extended_specific_words.add(word.title())
714
715
        neutral_words = [word for word in self.model.vocab
716
                         if word not in extended_specific_words]
717
718
        return neutral_words
719
720
    def _neutralize(self, neutral_words):
721
        self._is_direction_identified()
722
723
        if self._verbose:
724
            neutral_words_iter = tqdm(neutral_words)
725
        else:
726
            neutral_words_iter = iter(neutral_words)
727
728
        for word in neutral_words_iter:
729
            neutralized_vector = reject_vector(self[word],
730
                                               self.direction)
731
            update_word_vector(self.model, word, neutralized_vector)
732
733
        self.model.init_sims(replace=True)
734
735
    def _equalize(self, equality_sets):
736
        # pylint: disable=R0914
737
738
        self._is_direction_identified()
739
740
        if self._verbose:
741
            words_data = []
742
743
        for equality_set_index, equality_set_words in enumerate(equality_sets):
744
            equality_set_vectors = [normalize(self[word])
745
                                    for word in equality_set_words]
746
            center = np.mean(equality_set_vectors, axis=0)
747
            (projected_center,
748
             rejected_center) = project_reject_vector(center,
749
                                                      self.direction)
750
            scaling = np.sqrt(1 - np.linalg.norm(rejected_center)**2)
751
752
            for word, vector in zip(equality_set_words, equality_set_vectors):
753
                projected_vector = project_vector(vector, self.direction)
754
755
                projected_part = normalize(projected_vector - projected_center)
756
757
                # In the code it is different of Bolukbasi
758
                # It behaves the same only for equality_sets
759
                # with size of 2 (pairs) - not sure!
760
                # However, my code is the same as the article
761
                # equalized_vector = rejected_center + scaling * self.direction
762
                # https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/debias.py#L36-L37
763
                # For pairs, projected_part_vector1 == -projected_part_vector2,
764
                # and this is the same as
765
                # projected_part_vector1 == self.direction
766
                equalized_vector = rejected_center + scaling * projected_part
767
768
                update_word_vector(self.model, word, equalized_vector)
769
770
                if self._verbose:
771
                    words_data.append({
0 ignored issues
show
introduced by
The variable words_data does not seem to be defined in case self._verbose on line 740 is False. Are you sure this can never be the case?
Loading history...
772
                        'equality_set_index': equality_set_index,
773
                        'word': word,
774
                        'scaling': scaling,
775
                        'projected_scalar': vector @ self.direction,
776
                        'equalized_projected_scalar': (equalized_vector
777
                                                       @ self.direction),
778
                    })
779
780
        if self._verbose:
781
            print('Equalize Words Data '
782
                  '(all equal for 1-dim bias space (direction):')
783
            words_data_df = (pd.DataFrame(words_data)
784
                             .set_index(['equality_set_index', 'word']))
785
            print(tabulate(words_data_df, headers='keys'))
786
787
        self.model.init_sims(replace=True)
788
789
    def _generate_pair_candidates(self, pairs):
790
        # pylint: disable=line-too-long
791
        return {(candidate1, candidate2)
792
                for word1, word2 in pairs
793
                for candidate1, candidate2 in zip(generate_one_word_forms(word1),
794
                                                  generate_one_word_forms(word2))
795
                if candidate1 in self.model and candidate2 in self.model}
796
797
    def debias(self, method='hard', neutral_words=None, equality_sets=None,
798
               inplace=True):
799
        """Debias the word embedding.
800
801
        :param str method: The method of debiasing.
802
        :param list neutral_words: List of neutral words
803
                                   for the neutralize step
804
        :param list equality_sets: List of equality sets,
805
                                   for the equalize step.
806
                                   The sets represent the direction.
807
        :param bool inplace: Whether to debias the object inplace
808
                             or return a new one
809
810
        .. warning::
811
812
          After calling `debias`,
813
          all the vectors of the word embedding
814
          will be normalized to unit length.
815
816
        """
817
818
        # pylint: disable=W0212
819
        if inplace:
820
            bias_word_embedding = self
821
        else:
822
            bias_word_embedding = copy.deepcopy(self)
823
824
        if method not in DEBIAS_METHODS:
825
            raise ValueError('method should be one of {}, {} was given'.format(
826
                DEBIAS_METHODS, method))
827
828
        if method in ['hard', 'neutralize']:
829
            if self._verbose:
830
                print('Neutralize...')
831
            bias_word_embedding._neutralize(neutral_words)
832
833
        if method == 'hard':
834
            if self._verbose:
835
                print('Equalize...')
836
837
            assert all(len(equality_set) == 2
838
                       for equality_set in equality_sets), \
839
                   'Currently supporting only equality pairs.'
840
841
            equality_sets = self._generate_pair_candidates(equality_sets)
842
843
            bias_word_embedding._equalize(equality_sets)
844
845
        if inplace:
846
            return None
847
        else:
848
            return bias_word_embedding
849
850
    def evaluate_word_embedding(self,
851
                                kwargs_word_pairs=None,
852
                                kwargs_word_analogies=None):
853
        """
854
        Evaluate word pairs tasks and word analogies tasks.
855
856
        :param model: Word embedding.
857
        :param kwargs_word_pairs: Kwargs for
858
                                  evaluate_word_pairs
859
                                  method.
860
        :type kwargs_word_pairs: dict or None
861
        :param kwargs_word_analogies: Kwargs for
862
                                      evaluate_word_analogies
863
                                      method.
864
        :type evaluate_word_analogies: dict or None
865
        :return: Tuple of :class:`pandas.DataFrame`
866
                 for the evaluation results.
867
        """
868
869
        return evaluate_word_embedding(self.model,
870
                                       kwargs_word_pairs,
871
                                       kwargs_word_analogies)
872
873
    def learn_full_specific_words(self, seed_specific_words,
874
                                  max_non_specific_examples=None, debug=None):
875
        """Learn specific words given a list of seed specific wordsself.
876
877
        Using Linear SVM.
878
879
        :param list seed_specific_words: List of seed specific words
880
        :param int max_non_specific_examples: The number of non-specific words
881
                                              to sample for training
882
        :return: List of learned specific words and the classifier object
883
        """
884
885
        if debug is None:
886
            debug = False
887
888
        if max_non_specific_examples is None:
889
            max_non_specific_examples = MAX_NON_SPECIFIC_EXAMPLES
890
891
        data = []
892
        non_specific_example_count = 0
893
894
        for word in self.model.vocab:
895
            is_specific = word in seed_specific_words
896
897
            if not is_specific:
898
                non_specific_example_count += 1
899
                if non_specific_example_count <= max_non_specific_examples:
900
                    data.append((self[word], is_specific))
901
            else:
902
                data.append((self[word], is_specific))
903
904
        np.random.seed(RANDOM_STATE)
905
        np.random.shuffle(data)
906
907
        X, y = zip(*data)
908
909
        X = np.array(X)
910
        X /= np.linalg.norm(X, axis=1)[:, None]
911
912
        y = np.array(y).astype('int')
913
914
        clf = LinearSVC(C=1, class_weight='balanced',
915
                        random_state=RANDOM_STATE)
916
917
        clf.fit(X, y)
918
919
        full_specific_words = []
920
        for word in self.model.vocab:
921
            vector = [normalize(self[word])]
922
            if clf.predict(vector):
923
                full_specific_words.append(word)
924
925
        if not debug:
926
            return full_specific_words, clf
927
928
        return full_specific_words, clf, X, y
929
930
    def _plot_most_biased_one_cluster(self,
931
                                      most_biased_neutral_words, y_bias,
932
                                      random_state=1, ax=None):
933
        most_biased_vectors = [self.model[word]
934
                               for word in most_biased_neutral_words]
935
936
        return plot_clustering_as_classification(most_biased_vectors,
937
                                                 y_bias,
938
                                                 random_state=random_state,
939
                                                 ax=ax)
940
941
    def compute_factual_association(self, factual_properity):
942
        """Compute association of a factual property to the projection.
943
944
        Inspired by WEFAT (Word-Embedding Factual Association Test),
945
        but it is not the same:
946
        - Caliskan, A., Bryson, J. J., & Narayanan, A. (2017).
947
        `Semantics derived automatically
948
        from language corpora contain human-like biases
949
        <http://opus.bath.ac.uk/55288/>`_.
950
        Science, 356(6334), 183-186.
951
952
        In a future version, the WEFAT will also be implemented.
953
954
        If a word doesn't exist in the word embedding,
955
        then it will be filtered out.
956
957
        For example, in :class:`ethically.we.bias.GenderBiasWE`,
958
        the defuat factual property is the percentage of female
959
        in various occupations
960
        from the Labor Force Statistics of 2017 Population Survey,
961
        Taken from: https://arxiv.org/abs/1804.06876
962
963
        :param dict factual_properity: Dictionary of words
964
                                       and their factual values.
965
        :return: Pearson r, pvalue and the words with their
966
                 associated factual values
967
                 and their projection on the bias direction.
968
        """
969
970
        points = {word: (value, self.project_on_direction(word))
971
                  for word, value in factual_properity.items()
972
                  if word in self.model}
973
974
        x, y = zip(*points.values())
975
976
        return pearsonr(x, y), points
977
978
    def plot_factual_association(self, factual_properity, ax=None):
979
        """Plot association of a factual property to the projection.
980
981
        See: :meth:`BiasWordEmbedding.compute_factual_association`
982
983
        :param dict factual_properity: Dictionary of words
984
                                       and their factual values.
985
        """
986
987
        result = self.compute_factual_association(factual_properity)
988
989
        (r, pvalue), points = result
990
        x, y = zip(*points.values())
991
992
        if ax is None:
993
            _, ax = plt.subplots(1)
994
995
        ax.scatter(x, y)
996
997
        plt.title('Assocsion between Factual Property'
998
                  'and Projection on Direction '
999
                  '(Pearson R = {:0.2f} ; pvalue={:0.2f})'
1000
                  .format(r, pvalue))
1001
1002
        plt.xlabel('Factual Property')
1003
        plt.ylabel('Projection on Direction')
1004
1005
        return ax
1006
1007
    @staticmethod
1008
    def plot_most_biased_clustering(biased, debiased,
1009
                                    seed='ends', n_extreme=500,
1010
                                    random_state=1):
1011
        """Plot clustering as classification of biased neutral words.
1012
1013
        :param biased: Biased word embedding of
1014
                       :class:`~ethically.we.bias.BiasWordEmbedding`.
1015
        :param debiased: Debiased word embedding of
1016
                         :class:`~ethically.we.bias.BiasWordEmbedding`.
1017
        :param seed: The definition of the seed vector.
1018
                    Either by a tuple of two word ends,
1019
                    or by `'ends` for the pre-defined ends
1020
                    or by `'direction'` for
1021
                    the pre-defined direction vector.
1022
        :param n_extrem: The number of extreme biased
1023
                         neutral words to use.
1024
        :return: Tuple of list of ax objects of the plot,
1025
                 and a dictionary with the most positive
1026
                 and negative words.
1027
1028
        Based on:
1029
1030
        - Gonen, H., & Goldberg, Y. (2019).
1031
          `Lipstick on a Pig:
1032
          Debiasing Methods Cover up Systematic Gender Biases
1033
          in Word Embeddings But do not Remove
1034
          Them <https://arxiv.org/abs/1903.03862>`_.
1035
          arXiv preprint arXiv:1903.03862.
1036
1037
        - https://github.com/gonenhila/gender_bias_lipstick
1038
        """
1039
        # pylint: disable=protected-access,too-many-locals,line-too-long
1040
1041
        assert biased.positive_end == debiased.positive_end, \
1042
            'Postive ends should be the same.'
1043
        assert biased.negative_end == debiased.negative_end, \
1044
            'Negative ends should be the same.'
1045
1046
        seed_vector, _, _ = get_seed_vector(seed, biased)
1047
1048
        neutral_words = biased._data['neutral_words']
1049
        neutral_word_vectors = (biased[word] for word in neutral_words)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable word does not seem to be defined.
Loading history...
1050
        neutral_word_projections = [(normalize(vector) @ seed_vector, word)
1051
                                    for word, vector
1052
                                    in zip(neutral_words,
1053
                                           neutral_word_vectors)]
1054
1055
        neutral_word_projections.sort()
1056
1057
        _, most_negative_words = zip(*neutral_word_projections[:n_extreme])
1058
        _, most_positive_words = zip(*neutral_word_projections[-n_extreme:])
1059
1060
        most_biased_neutral_words = most_negative_words + most_positive_words
1061
1062
        y_bias = [False] * n_extreme + [True] * n_extreme
1063
1064
        _, axes = plt.subplots(1, 2, figsize=(20, 5))
1065
1066
        acc_biased = biased._plot_most_biased_one_cluster(most_biased_neutral_words,
1067
                                                          y_bias,
1068
                                                          random_state=random_state,
1069
                                                          ax=axes[0])
1070
        axes[0].set_title('Biased - Accuracy={}'.format(acc_biased))
1071
1072
        acc_debiased = debiased._plot_most_biased_one_cluster(most_biased_neutral_words,
1073
                                                              y_bias,
1074
                                                              random_state=random_state,
1075
                                                              ax=axes[1])
1076
        axes[1].set_title('Debiased - Accuracy={}'.format(acc_debiased))
1077
1078
        return axes, {biased.positive_end: most_positive_words,
1079
                      biased.negative_end: most_negative_words}
1080
1081
1082
class GenderBiasWE(BiasWordEmbedding):
1083
    """Measure and adjust the Gender Bias in English Word Embedding.
1084
1085
    :param model: Word embedding model of ``gensim.model.KeyedVectors``
1086
    :param bool only_lower: Whether the word embedding contrains
1087
                            only lower case words
1088
    :param bool verbose: Set verbosity
1089
    :param str identify_direction: Set the method of identifying
1090
                                   the gender direction:
1091
                                   `'single'`, `'sum'` or `'pca'`.
1092
    :param bool to_normalize: Whether to normalize all the vectors
1093
                              (recommended!)
1094
    """
1095
1096
    def __init__(self, model, only_lower=False, verbose=False,
1097
                 identify_direction='pca', to_normalize=True):
1098
        super().__init__(model=model,
1099
                         only_lower=only_lower,
1100
                         verbose=verbose,
1101
                         to_normalize=True)
1102
        self._initialize_data()
1103
1104
        if identify_direction:
1105
            definitional = None
1106
1107
            if identify_direction == 'single':
1108
                definitional = ('she', 'he')
1109
            elif identify_direction == 'sum':
1110
                definitional = zip(*self._data['definitional_pairs'])
1111
            elif identify_direction == 'pca':
1112
                definitional = self._data['definitional_pairs']
1113
1114
            self._identify_direction('she', 'he',
1115
                                     definitional,
1116
                                     identify_direction)
1117
1118
    def _initialize_data(self):
1119
        self._data = copy.deepcopy(BOLUKBASI_DATA['gender'])
1120
1121
        if not self.only_lower:
1122
            self._data['specific_full_with_definitional_equalize'] = \
1123
                generate_words_forms(self
1124
                                     ._data['specific_full_with_definitional_equalize'])  # pylint: disable=C0301
1125
1126
        for key in self._data['word_group_keys']:
1127
            self._data[key] = (self._filter_words_by_model(self
1128
                                                           ._data[key]))
1129
1130
        self._data['neutral_words'] = self._extract_neutral_words(self
1131
                                                                  ._data['specific_full_with_definitional_equalize'])  # pylint: disable=C0301
1132
        self._data['neutral_words'].sort()
1133
        self._data['word_group_keys'].append('neutral_words')
1134
1135
    def plot_projection_scores(self, words='professions', n_extreme=10,
1136
                               ax=None, axis_projection_step=None):
1137
        if words == 'professions':
1138
            words = self._data['profession_names']
1139
1140
        return super().plot_projection_scores(words, n_extreme,
1141
                                              ax, axis_projection_step)
1142
1143
    def plot_dist_projections_on_direction(self, word_groups='bolukbasi',
1144
                                           ax=None):
1145
        if word_groups == 'bolukbasi':
1146
            word_groups = {key: self._data[key]
1147
                           for key in self._data['word_group_keys']}
1148
1149
        return super().plot_dist_projections_on_direction(word_groups, ax)
1150
1151
    @classmethod
1152
    def plot_bias_across_word_embeddings(cls, word_embedding_bias_dict,
1153
                                         ax=None, scatter_kwargs=None):
1154
        # pylint: disable=W0221
1155
        words = BOLUKBASI_DATA['gender']['neutral_profession_names']
1156
        # TODO: is it correct for inheritance of class method?
1157
        super(cls, cls).plot_bias_across_word_embeddings(word_embedding_bias_dict,  # pylint: disable=C0301
1158
                                                         words,
1159
                                                         ax,
1160
                                                         scatter_kwargs)
1161
1162
    def calc_direct_bias(self, neutral_words='professions', c=None):
1163
        if isinstance(neutral_words, str) and neutral_words == 'professions':
1164
            return super().calc_direct_bias(
1165
                self._data['neutral_profession_names'], c)
1166
        else:
1167
            return super().calc_direct_bias(neutral_words)
1168
1169
    def generate_closest_words_indirect_bias(self,
1170
                                             neutral_positive_end,
1171
                                             neutral_negative_end,
1172
                                             words='professions', n_extreme=5):
1173
        # pylint: disable=C0301
1174
1175
        if words == 'professions':
1176
            words = self._data['profession_names']
1177
1178
        return super().generate_closest_words_indirect_bias(neutral_positive_end,
1179
                                                            neutral_negative_end,
1180
                                                            words,
1181
                                                            n_extreme=n_extreme)
1182
1183
    def debias(self, method='hard', neutral_words=None, equality_sets=None,
1184
               inplace=True):
1185
        # pylint: disable=line-too-long
1186
        if method in ['hard', 'neutralize']:
1187
            if neutral_words is None:
1188
                neutral_words = self._data['neutral_words']
1189
1190
        if method == 'hard' and equality_sets is None:
1191
            equality_sets = {tuple(w) for w in self._data['equalize_pairs']}
1192
            equality_sets |= {tuple(w) for w in self._data['definitional_pairs']}
1193
1194
        return super().debias(method, neutral_words, equality_sets,
1195
                              inplace)
1196
1197
    def learn_full_specific_words(self, seed_specific_words='bolukbasi',
1198
                                  max_non_specific_examples=None,
1199
                                  debug=None):
1200
        if seed_specific_words == 'bolukbasi':
1201
            seed_specific_words = self._data['specific_seed']
1202
1203
        return super().learn_full_specific_words(seed_specific_words,
1204
                                                 max_non_specific_examples,
1205
                                                 debug)
1206
1207
    def compute_factual_association(self,
1208
                                    factual_properity=OCCUPATION_FEMALE_PRECENTAGE):  # pylint: disable=line-too-long
1209
        return super().compute_factual_association(factual_properity)
1210
1211
    def plot_factual_association(self,
1212
                                 factual_properity=OCCUPATION_FEMALE_PRECENTAGE,  # pylint: disable=line-too-long
1213
                                 ax=None):
1214
        return super().plot_factual_association(factual_properity, ax)
1215