Passed
Push — master ( 170db5...8af2aa )
by Shlomi
02:43 queued 58s
created

ethically.we.bias.BiasWordsEmbedding.debias()   B

Complexity

Conditions 8

Size

Total Lines 45
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 8
eloc 19
nop 5
dl 0
loc 45
rs 7.3333
c 0
b 0
f 0
1
"""
2
Measuring and adjusting bias in words embedding by Bolukbasi (2016).
3
4
References:
5
    - Bolukbasi, T., Chang, K. W., Zou, J. Y., Saligrama, V.,
6
      & Kalai, A. T. (2016).
7
      `Man is to computer programmer as woman is to homemaker?
8
      debiasing word embeddings <https://arxiv.org/abs/1607.06520>`_.
9
      In Advances in neural information processing systems
10
      (pp. 4349-4357).
11
12
    - The code and data is based on the GitHub repository:
13
      https://github.com/tolga-b/debiaswe (MIT License).
14
15
16
Usage
17
~~~~~
18
19
.. code:: python
20
21
   >>> from ethically.we import GenderBiasWE
22
   >>> from gensim import downloader
23
   >>> w2v_model = downloader.load('word2vec-google-news-300')
24
   >>> w2v_gender_bias_we = GenderBiasWE(w2v_model)
25
   >>> w2v_gender_bias_we.calc_direct_bias()
26
   0.07307904249481942
27
   >>> w2v_gender_bias_we.debias()
28
   >>> w2v_gender_bias_we.calc_direct_bias()
29
   1.7964246601064155e-09
30
31
Types of Bias
32
~~~~~~~~~~~~~
33
34
Direct Bias
35
^^^^^^^^^^^
36
37
1. Associations
38
    Words that are closer to one end (e.g., *he*) than to
39
    the other end (*she*).
40
    For example, occupational stereotypes (page 7).
41
    Calculated by
42
    :meth:`~ethically.we.bias.BiasWordsEmbedding.calc_direct_bias`.
43
44
2. Analogies
45
    Analogies of *he:x::she:y*.
46
    For example analogies exhibiting stereotypes (page 7).
47
    Generated by
48
    :meth:`~ethically.we.bias.BiasWordsEmbedding.generate_analogies`.
49
50
51
Indirect Bias
52
^^^^^^^^^^^^^
53
54
Projection of a neutral words into a two neutral words direction
55
is explained in a great portion by a shared bias direction projection.
56
57
Calculated by
58
:meth:`~ethically.we.bias.BiasWordsEmbedding.calc_indirect_bias`
59
and
60
:meth:`~ethically.we.bias.GenderBiasWE.generate_closest_words_indirect_bias`.
61
62
"""
63
64
import copy
65
66
import matplotlib.pylab as plt
67
import numpy as np
68
import pandas as pd
69
import seaborn as sns
70
from scipy.stats import spearmanr
71
from sklearn.decomposition import PCA
72
from sklearn.metrics.pairwise import euclidean_distances
73
from sklearn.svm import LinearSVC
74
from tqdm import tqdm
75
76
from tabulate import tabulate
77
78
from ..consts import RANDOM_STATE
79
from .benchmark import evaluate_words_embedding
80
from .data import BOLUKBASI_DATA
81
from .utils import (
82
    assert_gensim_keyed_vectors, cosine_similarity, generate_one_word_forms,
83
    generate_words_forms, normalize, project_params, project_reject_vector,
84
    project_vector, reject_vector, round_to_extreme,
85
    take_two_sides_extreme_sorted, update_word_vector,
86
)
87
88
89
DIRECTION_METHODS = ['single', 'sum', 'pca']
90
DEBIAS_METHODS = ['neutralize', 'hard', 'soft']
91
FIRST_PC_THRESHOLD = 0.5
92
MAX_NON_SPECIFIC_EXAMPLES = 1000
93
94
__all__ = ['GenderBiasWE', 'BiasWordsEmbedding']
95
96
97
class BiasWordsEmbedding:
98
    """Measure and adjust a bias in English words embedding.
99
100
    :param model: Words embedding model of ``gensim.model.KeyedVectors``
101
    :param bool only_lower: Whether the words embedding contrains
102
                            only lower case words
103
    :param bool verbose: Set vebosity
104
    """
105
106
    def __init__(self, model, only_lower=False, verbose=False,
107
                 identify_direction=False):
108
        assert_gensim_keyed_vectors(model)
109
110
        # TODO: this is bad Python, ask someone about it
111
        # probably should be a better design
112
        # identify_direction doesn't have any meaning
113
        # for the calss BiasWordsEmbedding
114
        if self.__class__ == __class__ and identify_direction is not False:
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable __class__ does not seem to be defined.
Loading history...
115
            raise ValueError('identify_direction must be False'
116
                             ' for an instance of {}'
117
                             .format(__class__))
118
119
        self.model = model
120
121
        # TODO: write unitest for when it is False
122
        self.only_lower = only_lower
123
124
        self._verbose = verbose
125
126
        self.direction = None
127
        self.positive_end = None
128
        self.negative_end = None
129
130
    def __copy__(self):
131
        bias_words_embedding = self.__class__(self.model,
132
                                              self.only_lower,
133
                                              self._verbose,
134
                                              identify_direction=False)
135
        bias_words_embedding.direction = copy.deepcopy(self.direction)
136
        bias_words_embedding.positive_end = copy.deepcopy(self.positive_end)
137
        bias_words_embedding.negative_end = copy.deepcopy(self.negative_end)
138
        return bias_words_embedding
139
140
    def __deepcopy__(self, memo):
141
        bias_words_embedding = copy.copy(self)
142
        bias_words_embedding.model = copy.deepcopy(bias_words_embedding.model)
143
        return bias_words_embedding
144
145
    def __getitem__(self, key):
146
        return self.model[key]
147
148
    def __contains__(self, item):
149
        return item in self.model
150
151
    def _filter_words_by_model(self, words):
152
        return [word for word in words if word in self]
153
154
    def _is_direction_identified(self):
155
        if self.direction is None:
156
            raise RuntimeError('The direction was not identified'
157
                               ' for this {} instance'
158
                               .format(self.__class__.__name__))
159
160
    # There is a mistake in the article
161
    # it is written (section 5.1):
162
    # "To identify the gender subspace, we took the ten gender pair difference
163
    # vectors and computed its principal components (PCs)"
164
    # however in the source code:
165
    # https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/we.py#L235-L245
166
    def _identify_subspace_by_pca(self, definitional_pairs, n_components):
167
        matrix = []
168
169
        for word1, word2 in definitional_pairs:
170
            vector1 = normalize(self[word1])
171
            vector2 = normalize(self[word2])
172
173
            center = (vector1 + vector2) / 2
174
175
            matrix.append(vector1 - center)
176
            matrix.append(vector2 - center)
177
178
        pca = PCA(n_components=n_components)
179
        pca.fit(matrix)
180
181
        if self._verbose:
182
            table = enumerate(pca.explained_variance_ratio_, start=1)
183
            headers = ['Principal Component',
184
                       'Explained Variance Ratio']
185
            print(tabulate(table, headers=headers))
186
187
        return pca
188
189
    # TODO: add the SVD method from section 6 step 1
190
    # It seems there is a mistake there, I think it is the same as PCA
191
    # just with repleacing it with SVD
192
    def _identify_direction(self, positive_end, negative_end,
193
                            definitional, method='pca'):
194
        if method not in DIRECTION_METHODS:
195
            raise ValueError('method should be one of {}, {} was given'.format(
196
                DIRECTION_METHODS, method))
197
198
        if positive_end == negative_end:
199
            raise ValueError('positive_end and negative_end'
200
                             'should be different, and not the same "{}"'
201
                             .format(positive_end))
202
        if self._verbose:
203
            print('Identify direction using {} method...'.format(method))
204
205
        direction = None
206
207
        if method == 'single':
208
            direction = normalize(normalize(self[definitional[0]])
209
                                  - normalize(self[definitional[1]]))
210
211
        elif method == 'sum':
212
            group1_sum_vector = np.sum([self[word]
213
                                        for word in definitional[0]], axis=0)
214
            group2_sum_vector = np.sum([self[word]
215
                                        for word in definitional[1]], axis=0)
216
217
            diff_vector = (normalize(group1_sum_vector)
218
                           - normalize(group2_sum_vector))
219
220
            direction = normalize(diff_vector)
221
222
        elif method == 'pca':
223
            pca = self._identify_subspace_by_pca(definitional, 10)
224
            if pca.explained_variance_ratio_[0] < FIRST_PC_THRESHOLD:
225
                raise RuntimeError('The Explained variance'
226
                                   'of the first principal component should be'
227
                                   'at least {}, but it is {}'
228
                                   .format(FIRST_PC_THRESHOLD,
229
                                           pca.explained_variance_ratio_[0]))
230
            direction = pca.components_[0]
231
232
            # if direction is oposite (e.g. we cannot control
233
            # what the PCA will return)
234
            ends_diff_projection = cosine_similarity((self[positive_end]
235
                                                      - self[negative_end]),
236
                                                     direction)
237
            if ends_diff_projection < 0:
238
                direction = -direction  # pylint: disable=invalid-unary-operand-type
239
240
        self.direction = direction
241
        self.positive_end = positive_end
242
        self.negative_end = negative_end
243
244
    def project_on_direction(self, word):
245
        """Project the normalized vector of the word on the direction.
246
247
        :param str word: The word tor project
248
        :return float: The projection scalar
249
        """
250
251
        self._is_direction_identified()
252
253
        vector = self[word]
254
        projection_score = self.model.cosine_similarities(self.direction,
255
                                                          [vector])[0]
256
        return projection_score
257
258
    def _calc_projection_scores(self, words):
259
        self._is_direction_identified()
260
261
        df = pd.DataFrame({'word': words})
262
263
        # TODO: maybe using cosine_similarities on all the vectors?
264
        # it might be faster
265
        df['projection'] = df['word'].apply(self.project_on_direction)
266
        df = df.sort_values('projection', ascending=False)
267
268
        return df
269
270
    def calc_projection_data(self, words):
271
        """
272
        Calculate projection, projected and rejected vectors of a words list.
273
274
        :param list words: List of words
275
        :return: :class:`pandas.DataFrame` of the projection,
276
                 projected and rejected vectors of the words list
277
        """
278
        projection_data = []
279
        for word in words:
280
            vector = self[word]
281
            projection = self.project_on_direction(word)
282
            normalized_vector = normalize(vector)
283
284
            (projection,
285
             projected_vector,
286
             rejected_vector) = project_params(normalized_vector,
287
                                               self.direction)
288
289
            projection_data.append({'word': word,
290
                                    'vector': vector,
291
                                    'projection': projection,
292
                                    'projected_vector': projected_vector,
293
                                    'rejected_vector': rejected_vector})
294
295
        return pd.DataFrame(projection_data)
296
297
    def plot_projection_scores(self, words, n_extreme=10,
298
                               ax=None, axis_projection_step=None):
299
        """Plot the projection scalar of words on the direction.
300
301
        :param list words: The words tor project
302
        :param int or None n_extreme: The number of extreme words to show
303
        :return: The ax object of the plot
304
        """
305
306
        self._is_direction_identified()
307
308
        projections_df = self._calc_projection_scores(words)
309
        projections_df['projection'] = projections_df['projection'].round(2)
310
311
        if n_extreme is not None:
312
            projections_df = take_two_sides_extreme_sorted(projections_df,
313
                                                           n_extreme=n_extreme)
314
315
        if ax is None:
316
            _, ax = plt.subplots(1)
317
318
        if axis_projection_step is None:
319
            axis_projection_step = 0.1
320
321
        cmap = plt.get_cmap('RdBu')
322
        projections_df['color'] = ((projections_df['projection'] + 0.5)
323
                                   .apply(cmap))
324
325
        most_extream_projection = (projections_df['projection']
326
                                   .abs()
327
                                   .max()
328
                                   .round(1))
329
330
        sns.barplot(x='projection', y='word', data=projections_df,
331
                    palette=projections_df['color'])
332
333
        plt.xticks(np.arange(-most_extream_projection,
334
                             most_extream_projection + axis_projection_step,
335
                             axis_projection_step))
336
        plt.title('← {} {} {} →'.format(self.negative_end,
337
                                        ' ' * 20,
338
                                        self.positive_end))
339
340
        plt.xlabel('Direction Projection')
341
        plt.ylabel('Words')
342
343
        return ax
344
345
    def plot_dist_projections_on_direction(self, word_groups, ax=None):
346
        """Plot the projection scalars distribution on the direction.
347
348
        :param dict word_groups word: The groups to projects
349
        :return float: The ax object of the plot
350
        """
351
352
        if ax is None:
353
            _, ax = plt.subplots(1)
354
355
        names = sorted(word_groups.keys())
356
357
        for name in names:
358
            words = word_groups[name]
359
            label = '{} (#{})'.format(name, len(words))
360
            vectors = [self[word] for word in words]
361
            projections = self.model.cosine_similarities(self.direction,
362
                                                         vectors)
363
            sns.distplot(projections, hist=False, label=label, ax=ax)
364
365
        plt.axvline(0, color='k', linestyle='--')
366
367
        plt.title('← {} {} {} →'.format(self.negative_end,
368
                                        ' ' * 20,
369
                                        self.positive_end))
370
        plt.xlabel('Direction Projection')
371
        plt.ylabel('Density')
372
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
373
374
        return ax
375
376
    @classmethod
377
    def _calc_bias_across_words_embeddings(cls,
378
                                           words_embedding_bias_dict,
379
                                           words):
380
        """
381
        Calculate to projections and rho of words for two words embeddings.
382
383
        :param dict words_embedding_bias_dict: ``WordsEmbeddingBias`` objects
384
                                               as values,
385
                                               and their names as keys.
386
        :param list words: Words to be projected.
387
        :return tuple: Projections and spearman rho.
388
        """
389
        # pylint: disable=W0212
390
        assert len(words_embedding_bias_dict) == 2, 'Support only in two'\
391
                                                    'words embeddings'
392
393
        intersection_words = [word for word in words
394
                              if all(word in web
395
                                     for web in (words_embedding_bias_dict
396
                                                 .values()))]
397
398
        projections = {name: web._calc_projection_scores(intersection_words)['projection']  # pylint: disable=C0301
399
                       for name, web in words_embedding_bias_dict.items()}
400
401
        df = pd.DataFrame(projections)
402
        df.index = intersection_words
403
404
        rho, _ = spearmanr(*df.transpose().values)
405
        return df, rho
406
407
    @classmethod
408
    def plot_bias_across_words_embeddings(cls, words_embedding_bias_dict,
409
                                          words, ax=None, scatter_kwargs=None):
410
        """
411
        Plot the projections of same words of two words Embeddings.
412
413
        :param dict words_embedding_bias_dict: ``WordsEmbeddingBias`` objects
414
                                               as values,
415
                                               and their names as keys.
416
        :param list words: Words to be projected.
417
        :param scatter_kwargs: Kwargs for matplotlib.pylab.scatter.
418
        :type scatter_kwargs: dict or None
419
        :return: The ax object of the plot
420
        """
421
        # pylint: disable=W0212
422
423
        df, rho = cls._calc_bias_across_words_embeddings(words_embedding_bias_dict,  # pylint: disable=C0301
424
                                                         words)
425
426
        if ax is None:
427
            _, ax = plt.subplots(1)
428
429
        if scatter_kwargs is None:
430
            scatter_kwargs = {}
431
432
        name1, name2 = words_embedding_bias_dict.keys()
433
434
        ax.scatter(x=name1, y=name2, data=df, **scatter_kwargs)
435
436
        plt.title('Bias Across Words Embeddings'
437
                  '(Spearman Rho = {:0.2f})'.format(rho))
438
439
        negative_end = words_embedding_bias_dict[name1].negative_end
440
        positive_end = words_embedding_bias_dict[name1].positive_end
441
        plt.xlabel('← {}     {}     {} →'.format(negative_end,
442
                                                 name1,
443
                                                 positive_end))
444
        plt.ylabel('← {}     {}     {} →'.format(negative_end,
445
                                                 name2,
446
                                                 positive_end))
447
448
        ax_min = round_to_extreme(df.values.min())
449
        ax_max = round_to_extreme(df.values.max())
450
        plt.xlim(ax_min, ax_max)
451
        plt.ylim(ax_min, ax_max)
452
453
        return ax
454
455
    # TODO: refactor for speed and clarity
456
    def generate_analogies(self, n_analogies=100, multiple=False,
457
                           delta=1., restrict_vocab=30000):
458
        """
459
        Generate analogies based on the bias directionself.
460
461
        x - y ~ direction.
462
        or a:x::b:y when a-b ~ direction.
463
464
        ``delta`` is used for semantically coherent. Default vale of 1
465
        corresponds to an angle <= pi/3.
466
467
        :param int n_analogies: Number of analogies to generate.
468
        :param bool multiple: Whether to allow multiple appearances of a word
469
                              in the analogies.
470
        :param float delta: Threshold for semantic similarity.
471
                            The maximal distance between x and y.
472
        :param int restrict_vocab: The vocabulary size to use.
473
        :return: Data Frame of analogies (x, y), their distances,
474
                 and their cosine similarity scores
475
        """
476
477
        # pylint: disable=C0301,R0914
478
479
        self._is_direction_identified()
480
481
        restrict_vocab_vectors = self.model.vectors[:restrict_vocab]
482
483
        normalized_vectors = (restrict_vocab_vectors
484
                              / np.linalg.norm(restrict_vocab_vectors, axis=1)[:, None])
485
486
        pairs_distances = euclidean_distances(normalized_vectors, normalized_vectors)
487
        pairs_indices = np.array(np.nonzero(
488
            ((pairs_distances < delta)
489
             & (pairs_distances != 0)))).T
490
        x_vectors = np.take(normalized_vectors, pairs_indices[:, 0], axis=0)
491
        y_vectors = np.take(normalized_vectors, pairs_indices[:, 1], axis=0)
492
493
        x_minus_y_vectors = x_vectors - y_vectors
494
        normalized_x_minus_y_vectors = (x_minus_y_vectors
495
                                        / np.linalg.norm(x_minus_y_vectors, axis=1)[:, None])
496
497
        cos_distances = normalized_x_minus_y_vectors @ self.direction
498
499
        sorted_cos_distances_indices = np.argsort(cos_distances)[::-1]
500
501
        sorted_cos_distances_indices_iter = iter(sorted_cos_distances_indices)
502
503
        analogies = []
504
        generated_words_x = set()
505
        generated_words_y = set()
506
507
        while len(analogies) < n_analogies:
508
            cos_distance_index = next(sorted_cos_distances_indices_iter)
509
            paris_index = pairs_indices[cos_distance_index]
510
            word_x, word_y = [self.model.index2word[index]
511
                              for index in paris_index]
512
513
            if multiple or (not multiple
514
                            and (word_x not in generated_words_x
515
                                 and word_y not in generated_words_y)):
516
                analogies.append({'x': word_x,
517
                                  'y': word_y,
518
                                  'score': cos_distances[cos_distance_index],
519
                                  'distance': pairs_distances[tuple(paris_index)]})
520
            generated_words_x.add(word_x)
521
            generated_words_y.add(word_y)
522
523
        df = pd.DataFrame(analogies)
524
        df = df[['x', 'y', 'distance', 'score']]
525
        return df
526
527
    def calc_direct_bias(self, neutral_words, c=None):
528
        """Calculate the direct bias.
529
530
        Based on the projection of neutral words on the direction.
531
532
        :param list neutral_words: List of neutral words
533
        :param c: Strictness of bias measuring
534
        :type c: float or None
535
        :return: The direct bias
536
        """
537
538
        if c is None:
539
            c = 1
540
541
        projections = self._calc_projection_scores(neutral_words)['projection']
542
        direct_bias_terms = np.abs(projections) ** c
543
        direct_bias = direct_bias_terms.sum() / len(neutral_words)
544
545
        return direct_bias
546
547
    def calc_indirect_bias(self, word1, word2):
548
        """Calculate the indirect bias between two words.
549
550
        Based on the amount of shared projection of the words on the direction.
551
552
        Also called PairBias.
553
        :param str word1: First word
554
        :param str word2: Second word
555
        :type c: float or None
556
        :return The indirect bias between the two words
557
        """
558
559
        self._is_direction_identified()
560
561
        vector1 = normalize(self[word1])
562
        vector2 = normalize(self[word2])
563
564
        perpendicular_vector1 = reject_vector(vector1, self.direction)
565
        perpendicular_vector2 = reject_vector(vector2, self.direction)
566
567
        inner_product = vector1 @ vector2
568
        perpendicular_similarity = cosine_similarity(perpendicular_vector1,
569
                                                     perpendicular_vector2)
570
571
        indirect_bias = ((inner_product - perpendicular_similarity)
572
                         / inner_product)
573
        return indirect_bias
574
575
    def generate_closest_words_indirect_bias(self,
576
                                             neutral_positive_end,
577
                                             neutral_negative_end,
578
                                             words=None, n_extreme=5):
579
        """
580
        Generate closest words to a neutral direction and their indirect bias.
581
582
        The direction of the neutral words is used to find
583
        the most extreme words.
584
        The indirect bias is calculated between the most extreme words
585
        and the closest end.
586
587
        :param str neutral_positive_end: A word that define the positive side
588
                                         of the neutral direction.
589
        :param str neutral_negative_end: A word that define the negative side
590
                                         of the neutral direction.
591
        :param list words: List of words to project on the neutral direction.
592
        :param int n_extreme: The number for the most extreme words
593
                              (positive and negative) to show.
594
        :return: Data Frame of the most extreme words
595
                 with their projection scores and indirect biases.
596
        """
597
598
        neutral_direction = normalize(self[neutral_positive_end]
599
                                      - self[neutral_negative_end])
600
601
        vectors = [normalize(self[word]) for word in words]
602
        df = (pd.DataFrame([{'word': word,
603
                             'projection': vector @ neutral_direction}
604
                            for word, vector in zip(words, vectors)])
605
              .sort_values('projection', ascending=False))
606
607
        df = take_two_sides_extreme_sorted(df, n_extreme,
608
                                           'end',
609
                                           neutral_positive_end,
610
                                           neutral_negative_end)
611
612
        df['indirect_bias'] = df.apply(lambda r:
613
                                       self.calc_indirect_bias(r['word'],
614
                                                               r['end']),
615
                                       axis=1)
616
617
        df = df.set_index(['end', 'word'])
618
        df = df[['projection', 'indirect_bias']]
619
620
        return df
621
622
    def _extract_neutral_words(self, specific_words):
623
        extended_specific_words = set()
624
625
        # because or specific_full data was trained on partial words embedding
626
        for word in specific_words:
627
            extended_specific_words.add(word)
628
            extended_specific_words.add(word.lower())
629
            extended_specific_words.add(word.upper())
630
            extended_specific_words.add(word.title())
631
632
        neutral_words = [word for word in self.model.vocab
633
                         if word not in extended_specific_words]
634
635
        return neutral_words
636
637
    def _neutralize(self, neutral_words):
638
        self._is_direction_identified()
639
640
        if self._verbose:
641
            neutral_words_iter = tqdm(neutral_words)
642
        else:
643
            neutral_words_iter = iter(neutral_words)
644
645
        for word in neutral_words_iter:
646
            neutralized_vector = reject_vector(self[word],
647
                                               self.direction)
648
            update_word_vector(self.model, word, neutralized_vector)
649
650
        self.model.init_sims(replace=True)
651
652
    def _equalize(self, equality_sets):
653
        # pylint: disable=R0914
654
655
        self._is_direction_identified()
656
657
        if self._verbose:
658
            words_data = []
659
660
        for equality_set_index, equality_set_words in enumerate(equality_sets):
661
            equality_set_vectors = [normalize(self[word])
662
                                    for word in equality_set_words]
663
            center = np.mean(equality_set_vectors, axis=0)
664
            (projected_center,
665
             rejected_center) = project_reject_vector(center,
666
                                                      self.direction)
667
            scaling = np.sqrt(1 - np.linalg.norm(rejected_center)**2)
668
669
            for word, vector in zip(equality_set_words, equality_set_vectors):
670
                projected_vector = project_vector(vector, self.direction)
671
672
                projected_part = normalize(projected_vector - projected_center)
673
674
                # In the code it is different of Bolukbasi
675
                # It behaves the same only for equality_sets
676
                # with size of 2 (pairs) - not sure!
677
                # However, my code is the same as the article
678
                # equalized_vector = rejected_center + scaling * self.direction
679
                # https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/debias.py#L36-L37
680
                # For pairs, projected_part_vector1 == -projected_part_vector2,
681
                # and this is the same as
682
                # projected_part_vector1 == self.direction
683
                equalized_vector = rejected_center + scaling * projected_part
684
685
                update_word_vector(self.model, word, equalized_vector)
686
687
                if self._verbose:
688
                    words_data.append({
0 ignored issues
show
introduced by
The variable words_data does not seem to be defined in case self._verbose on line 657 is False. Are you sure this can never be the case?
Loading history...
689
                        'equality_set_index': equality_set_index,
690
                        'word': word,
691
                        'scaling': scaling,
692
                        'projected_scalar': vector @ self.direction,
693
                        'equalized_projected_scalar': (equalized_vector
694
                                                       @ self.direction),
695
                    })
696
697
        if self._verbose:
698
            print('Equalize Words Data '
699
                  '(all equal for 1-dim bias space (direction):')
700
            words_data_df = (pd.DataFrame(words_data)
701
                             .set_index(['equality_set_index', 'word']))
702
            print(tabulate(words_data_df, headers='keys'))
703
704
        self.model.init_sims(replace=True)
705
706
    def debias(self, method='hard', neutral_words=None, equality_sets=None,
707
               inplace=True):
708
        """Debias the words embedding.
709
710
        :param str method: The method of debiasing.
711
        :param list neutral_words: List of neutral words
712
                                   for the neutralize step
713
        :param list equality_sets: List of equality sets,
714
                                   for the equalize step.
715
                                   The sets represent the direction.
716
        :param bool inplace: Whether to debias the object inplace
717
                             or return a new one
718
719
        .. warning::
720
721
          After calling `debias`,
722
          all the vectors of the words embedding
723
          will be normalized to unit length.
724
725
        """
726
727
        # pylint: disable=W0212
728
        if inplace:
729
            bias_words_embedding = self
730
        else:
731
            bias_words_embedding = copy.deepcopy(self)
732
733
        if method not in DEBIAS_METHODS:
734
            raise ValueError('method should be one of {}, {} was given'.format(
735
                DEBIAS_METHODS, method))
736
737
        if method in ['hard', 'neutralize']:
738
            if self._verbose:
739
                print('Neutralize...')
740
            bias_words_embedding._neutralize(neutral_words)
741
742
        if method == 'hard':
743
            if self._verbose:
744
                print('Equalize...')
745
            bias_words_embedding._equalize(equality_sets)
746
747
        if inplace:
748
            return None
749
        else:
750
            return bias_words_embedding
751
752
    def evaluate_words_embedding(self,
753
                                 kwargs_word_pairs=None,
754
                                 kwargs_word_analogies=None):
755
        """
756
        Evaluate word pairs tasks and word analogies tasks.
757
758
        :param model: Words embedding.
759
        :param kwargs_word_pairs: Kwargs for
760
                                  evaluate_word_pairs
761
                                  method.
762
        :type kwargs_word_pairs: dict or None
763
        :param kwargs_word_analogies: Kwargs for
764
                                      evaluate_word_analogies
765
                                      method.
766
        :type evaluate_word_analogies: dict or None
767
        :return: Tuple of :class:`pandas.DataFrame`
768
                 for the evaluation results.
769
        """
770
771
        return evaluate_words_embedding(self.model,
772
                                        kwargs_word_pairs,
773
                                        kwargs_word_analogies)
774
775
    def learn_full_specific_words(self, seed_specific_words,
776
                                  max_non_specific_examples=None, debug=None):
777
        """Learn specific words given a list of seed specific wordsself.
778
779
        Using Linear SVM.
780
781
        :param list seed_specific_words: List of seed specific words
782
        :param int max_non_specific_examples: The number of non-specifc words
783
                                              to sample for training
784
        :return: List of learned specific words and the classifier object
785
        """
786
787
        if debug is None:
788
            debug = False
789
790
        if max_non_specific_examples is None:
791
            max_non_specific_examples = MAX_NON_SPECIFIC_EXAMPLES
792
793
        data = []
794
        non_specific_example_count = 0
795
796
        for word in self.model.vocab:
797
            is_specific = word in seed_specific_words
798
799
            if not is_specific:
800
                non_specific_example_count += 1
801
                if non_specific_example_count <= max_non_specific_examples:
802
                    data.append((self[word], is_specific))
803
            else:
804
                data.append((self[word], is_specific))
805
806
        np.random.seed(RANDOM_STATE)
807
        np.random.shuffle(data)
808
809
        X, y = zip(*data)
810
811
        X = np.array(X)
812
        X /= np.linalg.norm(X, axis=1)[:, None]
813
814
        y = np.array(y).astype('int')
815
816
        clf = LinearSVC(C=1, class_weight='balanced',
817
                        random_state=RANDOM_STATE)
818
819
        clf.fit(X, y)
820
821
        full_specific_words = []
822
        for word in self.model.vocab:
823
            vector = [normalize(self[word])]
824
            if clf.predict(vector):
825
                full_specific_words.append(word)
826
827
        if not debug:
828
            return full_specific_words, clf
829
830
        return full_specific_words, clf, X, y
831
832
833
class GenderBiasWE(BiasWordsEmbedding):
834
    """Measure and adjust the Gender Bias in English Words Embedding.
835
836
    :param model: Words embedding model of ``gensim.model.KeyedVectors``
837
    :param bool only_lower: Whether the words embedding contrains
838
                            only lower case words
839
    :param bool verbose: Set vebosity
840
    """
841
842
    def __init__(self, model, only_lower=False, verbose=False,
843
                 identify_direction=True):
844
        super().__init__(model, only_lower, verbose)
845
        self._initialize_data()
846
        if identify_direction:
847
            self._identify_direction('she', 'he',
848
                                     self._data['definitional_pairs'],
849
                                     'pca')
850
851
    def _initialize_data(self):
852
        self._data = copy.deepcopy(BOLUKBASI_DATA['gender'])
853
854
        if not self.only_lower:
855
            self._data['specific_full_with_definitional'] = \
856
                generate_words_forms(self
857
                                     ._data['specific_full_with_definitional'])  # pylint: disable=C0301
858
859
        for key in self._data['word_group_keys']:
860
            self._data[key] = (self._filter_words_by_model(self
861
                                                           ._data[key]))
862
863
        self._data['neutral_words'] = self._extract_neutral_words(self
864
                                                                  ._data['specific_full_with_definitional'])  # pylint: disable=C0301
865
        self._data['neutral_words'].sort()
866
        self._data['word_group_keys'].append('neutral_words')
867
868
    def plot_projection_scores(self, words='professions', n_extreme=10,
869
                               ax=None, axis_projection_step=None):
870
        if words == 'professions':
871
            words = self._data['profession_names']
872
873
        return super().plot_projection_scores(words, n_extreme,
874
                                              ax, axis_projection_step)
875
876
    def plot_dist_projections_on_direction(self, word_groups='bolukbasi',
877
                                           ax=None):
878
        if word_groups == 'bolukbasi':
879
            word_groups = {key: self._data[key]
880
                           for key in self._data['word_group_keys']}
881
882
        return super().plot_dist_projections_on_direction(word_groups, ax)
883
884
    @classmethod
885
    def plot_bias_across_words_embeddings(cls, words_embedding_bias_dict,
886
                                          ax=None, scatter_kwargs=None):
887
        # pylint: disable=W0221
888
        words = BOLUKBASI_DATA['gender']['neutral_profession_names']
889
        # TODO: is it correct for inhertence of class method?
890
        super(cls, cls).plot_bias_across_words_embeddings(words_embedding_bias_dict,  # pylint: disable=C0301
891
                                                          words,
892
                                                          ax,
893
                                                          scatter_kwargs)
894
895
    def calc_direct_bias(self, neutral_words='professions', c=None):
896
        if isinstance(neutral_words, str) and neutral_words == 'professions':
897
            return super().calc_direct_bias(
898
                self._data['neutral_profession_names'], c)
899
        else:
900
            return super().calc_direct_bias(neutral_words)
901
902
    def generate_closest_words_indirect_bias(self,
903
                                             neutral_positive_end,
904
                                             neutral_negative_end,
905
                                             words='professions', n_extreme=5):
906
        # pylint: disable=C0301
907
908
        if words == 'professions':
909
            words = self._data['profession_names']
910
911
        return super().generate_closest_words_indirect_bias(neutral_positive_end,
912
                                                            neutral_negative_end,
913
                                                            words,
914
                                                            n_extreme=n_extreme)
915
916
    def debias(self, method='hard', neutral_words=None, equality_sets=None,
917
               inplace=True):
918
        # pylint: disable=C0301
919
        if method in ['hard', 'neutralize']:
920
            if neutral_words is None:
921
                neutral_words = self._data['neutral_words']
922
923
        if method == 'hard' and equality_sets is None:
924
            equality_sets = self._data['definitional_pairs']
925
926
            if not self.only_lower:
927
                assert all(len(equality_set) == 2
928
                           for equality_set in equality_sets), 'currently supporting only equality pairs if only_lower is False'
929
                # TODO: refactor
930
                equality_sets = {(candidate1, candidate2)
931
                                 for word1, word2 in equality_sets
932
                                 for candidate1, candidate2 in zip(generate_one_word_forms(word1),
933
                                                                   generate_one_word_forms(word2))}
934
935
        return super().debias(method, neutral_words, equality_sets,
936
                              inplace)
937
938
    def learn_full_specific_words(self, seed_specific_words='bolukbasi',
939
                                  max_non_specific_examples=None,
940
                                  debug=None):
941
        if seed_specific_words == 'bolukbasi':
942
            seed_specific_words = self._data['specific_seed']
943
944
        return super().learn_full_specific_words(seed_specific_words,
945
                                                 max_non_specific_examples,
946
                                                 debug)
947