| 1 |  |  | # pylint: disable=too-many-lines | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | Measuring and adjusting bias in word embedding by Bolukbasi (2016). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | References: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |     - Bolukbasi, T., Chang, K. W., Zou, J. Y., Saligrama, V., | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |       & Kalai, A. T. (2016). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |       `Man is to computer programmer as woman is to homemaker? | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |       debiasing word embeddings <https://arxiv.org/abs/1607.06520>`_. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |       In Advances in neural information processing systems | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |       (pp. 4349-4357). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |     - The code and data is based on the GitHub repository: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |       https://github.com/tolga-b/debiaswe (MIT License). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |     - Gonen, H., & Goldberg, Y. (2019). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |       `Lipstick on a Pig: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |       Debiasing Methods Cover up Systematic Gender Biases | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |       in Word Embeddings But do not Remove Them | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |       <https://arxiv.org/abs/1903.03862>`_. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |       arXiv preprint arXiv:1903.03862. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |     - Nissim, M., van Noord, R., van der Goot, R. (2019). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |       `Fair is Better than Sensational: Man is to Doctor | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |       as Woman is to Doctor <https://arxiv.org/abs/1905.09866>`_. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  | Usage | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  | ~~~~~ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  | .. code:: python | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |    >>> from ethically.we import GenderBiasWE | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |    >>> from gensim import downloader | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |    >>> w2v_model = downloader.load('word2vec-google-news-300') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |    >>> w2v_gender_bias_we = GenderBiasWE(w2v_model) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |    >>> w2v_gender_bias_we.calc_direct_bias() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |    0.07307904249481942 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |    >>> w2v_gender_bias_we.debias() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |    >>> w2v_gender_bias_we.calc_direct_bias() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |    1.7964246601064155e-09 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  | Types of Bias | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  | ~~~~~~~~~~~~~ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  | Direct Bias | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  | ^^^^^^^^^^^ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  | 1. Associations | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |     Words that are closer to one end (e.g., *he*) than to | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |     the other end (*she*). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |     For example, occupational stereotypes (page 7). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |     Calculated by | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |     :meth:`~ethically.we.bias.BiasWordEmbedding.calc_direct_bias`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  | 2. Analogies | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |     Analogies of *he:x::she:y*. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |     For example analogies exhibiting stereotypes (page 7). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |     Generated by | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |     :meth:`~ethically.we.bias.BiasWordEmbedding.generate_analogies`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  | Indirect Bias | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  | ^^^^^^^^^^^^^ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  | Projection of a neutral words into a two neutral words direction | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  | is explained in a great portion by a shared bias direction projection. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  | Calculated by | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  | :meth:`~ethically.we.bias.BiasWordEmbedding.calc_indirect_bias` | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  | and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  | :meth:`~ethically.we.bias.GenderBiasWE.generate_closest_words_indirect_bias`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  | """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  | import copy | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  | import warnings | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  | import matplotlib.pylab as plt | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  | import numpy as np | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  | import pandas as pd | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  | import seaborn as sns | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  | from scipy.stats import pearsonr, spearmanr | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  | from sklearn.decomposition import PCA | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  | from sklearn.metrics.pairwise import euclidean_distances | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  | from sklearn.svm import LinearSVC | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  | from tqdm import tqdm | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  | from ethically.consts import RANDOM_STATE | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  | from ethically.utils import _warning_setup | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  | from ethically.we.benchmark import evaluate_word_embedding | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  | from ethically.we.data import BOLUKBASI_DATA, OCCUPATION_FEMALE_PRECENTAGE | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  | from ethically.we.utils import ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |     assert_gensim_keyed_vectors, cosine_similarity, generate_one_word_forms, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |     generate_words_forms, get_seed_vector, most_similar, normalize, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |     plot_clustering_as_classification, project_params, project_reject_vector, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |     project_vector, reject_vector, round_to_extreme, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |     take_two_sides_extreme_sorted, update_word_vector, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  | ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  | from tabulate import tabulate | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  | DIRECTION_METHODS = ['single', 'sum', 'pca'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  | DEBIAS_METHODS = ['neutralize', 'hard', 'soft'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  | FIRST_PC_THRESHOLD = 0.5 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  | MAX_NON_SPECIFIC_EXAMPLES = 1000 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  | __all__ = ['GenderBiasWE', 'BiasWordEmbedding'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  | _warning_setup() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  | class BiasWordEmbedding: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |     """Measure and adjust a bias in English word embedding. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |     :param model: Word embedding model of ``gensim.model.KeyedVectors`` | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |     :param bool only_lower: Whether the word embedding contrains | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |                             only lower case words | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |     :param bool verbose: Set verbosity | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |     :param bool to_normalize: Whether to normalize all the vectors | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |                               (recommended!) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |     def __init__(self, model, only_lower=False, verbose=False, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |                  identify_direction=False, to_normalize=True): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |         assert_gensim_keyed_vectors(model) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |         # TODO: this is bad Python, ask someone about it | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |         # probably should be a better design | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |         # identify_direction doesn't have any meaning | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |         # for the class BiasWordEmbedding | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |         if self.__class__ == __class__ and identify_direction is not False: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |             raise ValueError('identify_direction must be False' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |                              ' for an instance of {}' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |                              .format(__class__)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |         self.model = model | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |         # TODO: write unitest for when it is False | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |         self.only_lower = only_lower | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |         self._verbose = verbose | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  |         self.direction = None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  |         self.positive_end = None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |         self.negative_end = None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 |  |  |         if to_normalize: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  |             self.model.init_sims(replace=True) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  |     def __copy__(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  |         bias_word_embedding = self.__class__(self.model, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  |                                              self.only_lower, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |                                              self._verbose, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  |                                              identify_direction=False) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 |  |  |         bias_word_embedding.direction = copy.deepcopy(self.direction) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |         bias_word_embedding.positive_end = copy.deepcopy(self.positive_end) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  |         bias_word_embedding.negative_end = copy.deepcopy(self.negative_end) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |         return bias_word_embedding | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  |     def __deepcopy__(self, memo): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  |         bias_word_embedding = copy.copy(self) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 |  |  |         bias_word_embedding.model = copy.deepcopy(bias_word_embedding.model) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 |  |  |         return bias_word_embedding | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 |  |  |     def __getitem__(self, key): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 |  |  |         return self.model[key] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 168 |  |  |     def __contains__(self, item): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 169 |  |  |         return item in self.model | 
            
                                                                                                            
                            
            
                                    
            
            
                | 170 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 171 |  |  |     def _filter_words_by_model(self, words): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 172 |  |  |         return [word for word in words if word in self] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 173 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 174 |  |  |     def _is_direction_identified(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 175 |  |  |         if self.direction is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 176 |  |  |             raise RuntimeError('The direction was not identified' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 177 |  |  |                                ' for this {} instance' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 178 |  |  |                                .format(self.__class__.__name__)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 179 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 180 |  |  |     # There is a mistake in the article | 
            
                                                                                                            
                            
            
                                    
            
            
                | 181 |  |  |     # it is written (section 5.1): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 182 |  |  |     # "To identify the gender subspace, we took the ten gender pair difference | 
            
                                                                                                            
                            
            
                                    
            
            
                | 183 |  |  |     # vectors and computed its principal components (PCs)" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 184 |  |  |     # however in the source code: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 185 |  |  |     # https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/we.py#L235-L245 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 186 |  |  |     def _identify_subspace_by_pca(self, definitional_pairs, n_components): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 187 |  |  |         matrix = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 188 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 189 |  |  |         for word1, word2 in definitional_pairs: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 190 |  |  |             vector1 = normalize(self[word1]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 191 |  |  |             vector2 = normalize(self[word2]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 192 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 193 |  |  |             center = (vector1 + vector2) / 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 194 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 195 |  |  |             matrix.append(vector1 - center) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 196 |  |  |             matrix.append(vector2 - center) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 197 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 198 |  |  |         pca = PCA(n_components=n_components) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 199 |  |  |         pca.fit(matrix) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 200 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 201 |  |  |         if self._verbose: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 202 |  |  |             table = enumerate(pca.explained_variance_ratio_, start=1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 203 |  |  |             headers = ['Principal Component', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 204 |  |  |                        'Explained Variance Ratio'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 205 |  |  |             print(tabulate(table, headers=headers)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 206 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 207 |  |  |         return pca | 
            
                                                                                                            
                            
            
                                    
            
            
                | 208 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 209 |  |  |     # TODO: add the SVD method from section 6 step 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 210 |  |  |     # It seems there is a mistake there, I think it is the same as PCA | 
            
                                                                                                            
                            
            
                                    
            
            
                | 211 |  |  |     # just with replacing it with SVD | 
            
                                                                                                            
                            
            
                                    
            
            
                | 212 |  |  |     def _identify_direction(self, positive_end, negative_end, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 213 |  |  |                             definitional, method='pca'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 214 |  |  |         if method not in DIRECTION_METHODS: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 215 |  |  |             raise ValueError('method should be one of {}, {} was given'.format( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 216 |  |  |                 DIRECTION_METHODS, method)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 217 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 218 |  |  |         if positive_end == negative_end: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 219 |  |  |             raise ValueError('positive_end and negative_end' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 220 |  |  |                              'should be different, and not the same "{}"' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 221 |  |  |                              .format(positive_end)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 222 |  |  |         if self._verbose: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 223 |  |  |             print('Identify direction using {} method...'.format(method)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 224 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 225 |  |  |         direction = None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 226 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 227 |  |  |         if method == 'single': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 228 |  |  |             direction = normalize(normalize(self[definitional[0]]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 229 |  |  |                                   - normalize(self[definitional[1]])) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 230 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 231 |  |  |         elif method == 'sum': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 232 |  |  |             group1_sum_vector = np.sum([self[word] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 233 |  |  |                                         for word in definitional[0]], axis=0) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 234 |  |  |             group2_sum_vector = np.sum([self[word] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 235 |  |  |                                         for word in definitional[1]], axis=0) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 236 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 237 |  |  |             diff_vector = (normalize(group1_sum_vector) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 238 |  |  |                            - normalize(group2_sum_vector)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 239 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 240 |  |  |             direction = normalize(diff_vector) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 241 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 242 |  |  |         elif method == 'pca': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 243 |  |  |             pca = self._identify_subspace_by_pca(definitional, 10) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 244 |  |  |             if pca.explained_variance_ratio_[0] < FIRST_PC_THRESHOLD: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 245 |  |  |                 raise RuntimeError('The Explained variance' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 246 |  |  |                                    'of the first principal component should be' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 247 |  |  |                                    'at least {}, but it is {}' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 248 |  |  |                                    .format(FIRST_PC_THRESHOLD, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 249 |  |  |                                            pca.explained_variance_ratio_[0])) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 250 |  |  |             direction = pca.components_[0] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 251 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 252 |  |  |             # if direction is opposite (e.g. we cannot control | 
            
                                                                                                            
                            
            
                                    
            
            
                | 253 |  |  |             # what the PCA will return) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 254 |  |  |             ends_diff_projection = cosine_similarity((self[positive_end] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 255 |  |  |                                                       - self[negative_end]), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 256 |  |  |                                                      direction) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 257 |  |  |             if ends_diff_projection < 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 258 |  |  |                 direction = -direction  # pylint: disable=invalid-unary-operand-type | 
            
                                                                                                            
                            
            
                                    
            
            
                | 259 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 260 |  |  |         self.direction = direction | 
            
                                                                                                            
                            
            
                                    
            
            
                | 261 |  |  |         self.positive_end = positive_end | 
            
                                                                                                            
                            
            
                                    
            
            
                | 262 |  |  |         self.negative_end = negative_end | 
            
                                                                                                            
                            
            
                                    
            
            
                | 263 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 264 |  |  |     def project_on_direction(self, word): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 265 |  |  |         """Project the normalized vector of the word on the direction. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 266 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 267 |  |  |         :param str word: The word tor project | 
            
                                                                                                            
                            
            
                                    
            
            
                | 268 |  |  |         :return float: The projection scalar | 
            
                                                                                                            
                            
            
                                    
            
            
                | 269 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 270 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 271 |  |  |         self._is_direction_identified() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 272 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 273 |  |  |         vector = self[word] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 274 |  |  |         projection_score = self.model.cosine_similarities(self.direction, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 275 |  |  |                                                           [vector])[0] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 276 |  |  |         return projection_score | 
            
                                                                                                            
                            
            
                                    
            
            
                | 277 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 278 |  |  |     def _calc_projection_scores(self, words): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 279 |  |  |         self._is_direction_identified() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 280 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 281 |  |  |         df = pd.DataFrame({'word': words}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 282 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 283 |  |  |         # TODO: maybe using cosine_similarities on all the vectors? | 
            
                                                                                                            
                            
            
                                    
            
            
                | 284 |  |  |         # it might be faster | 
            
                                                                                                            
                            
            
                                    
            
            
                | 285 |  |  |         df['projection'] = df['word'].apply(self.project_on_direction) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 286 |  |  |         df = df.sort_values('projection', ascending=False) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 287 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 288 |  |  |         return df | 
            
                                                                                                            
                            
            
                                    
            
            
                | 289 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 290 |  |  |     def calc_projection_data(self, words): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 291 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 292 |  |  |         Calculate projection, projected and rejected vectors of a words list. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 293 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 294 |  |  |         :param list words: List of words | 
            
                                                                                                            
                            
            
                                    
            
            
                | 295 |  |  |         :return: :class:`pandas.DataFrame` of the projection, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 296 |  |  |                  projected and rejected vectors of the words list | 
            
                                                                                                            
                            
            
                                    
            
            
                | 297 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 298 |  |  |         projection_data = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 299 |  |  |         for word in words: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 300 |  |  |             vector = self[word] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 301 |  |  |             projection = self.project_on_direction(word) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 302 |  |  |             normalized_vector = normalize(vector) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 303 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 304 |  |  |             (projection, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 305 |  |  |              projected_vector, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 306 |  |  |              rejected_vector) = project_params(normalized_vector, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 307 |  |  |                                                self.direction) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 308 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 309 |  |  |             projection_data.append({'word': word, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 310 |  |  |                                     'vector': vector, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 311 |  |  |                                     'projection': projection, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 312 |  |  |                                     'projected_vector': projected_vector, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 313 |  |  |                                     'rejected_vector': rejected_vector}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 314 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 315 |  |  |         return pd.DataFrame(projection_data) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 316 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 317 |  |  |     def plot_projection_scores(self, words, n_extreme=10, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 318 |  |  |                                ax=None, axis_projection_step=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 319 |  |  |         """Plot the projection scalar of words on the direction. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 320 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 321 |  |  |         :param list words: The words tor project | 
            
                                                                                                            
                            
            
                                    
            
            
                | 322 |  |  |         :param int or None n_extreme: The number of extreme words to show | 
            
                                                                                                            
                            
            
                                    
            
            
                | 323 |  |  |         :return: The ax object of the plot | 
            
                                                                                                            
                            
            
                                    
            
            
                | 324 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 325 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 326 |  |  |         self._is_direction_identified() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 327 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 328 |  |  |         projections_df = self._calc_projection_scores(words) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 329 |  |  |         projections_df['projection'] = projections_df['projection'].round(2) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 330 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 331 |  |  |         if n_extreme is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 332 |  |  |             projections_df = take_two_sides_extreme_sorted(projections_df, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 333 |  |  |                                                            n_extreme=n_extreme) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 334 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 335 |  |  |         if ax is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 336 |  |  |             _, ax = plt.subplots(1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 337 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 338 |  |  |         if axis_projection_step is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 339 |  |  |             axis_projection_step = 0.1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 340 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 341 |  |  |         cmap = plt.get_cmap('RdBu') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 342 |  |  |         projections_df['color'] = ((projections_df['projection'] + 0.5) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 343 |  |  |                                    .apply(cmap)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 344 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 345 |  |  |         most_extream_projection = (projections_df['projection'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 346 |  |  |                                    .abs() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 347 |  |  |                                    .max() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 348 |  |  |                                    .round(1)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 349 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 350 |  |  |         sns.barplot(x='projection', y='word', data=projections_df, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 351 |  |  |                     palette=projections_df['color']) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 352 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 353 |  |  |         plt.xticks(np.arange(-most_extream_projection, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 354 |  |  |                              most_extream_projection + axis_projection_step, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 355 |  |  |                              axis_projection_step)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 356 |  |  |         plt.title('← {} {} {} →'.format(self.negative_end, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 357 |  |  |                                         ' ' * 20, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 358 |  |  |                                         self.positive_end)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 359 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 360 |  |  |         plt.xlabel('Direction Projection') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 361 |  |  |         plt.ylabel('Words') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 362 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 363 |  |  |         return ax | 
            
                                                                                                            
                            
            
                                    
            
            
                | 364 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 365 |  |  |     def plot_dist_projections_on_direction(self, word_groups, ax=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 366 |  |  |         """Plot the projection scalars distribution on the direction. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 367 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 368 |  |  |         :param dict word_groups word: The groups to projects | 
            
                                                                                                            
                            
            
                                    
            
            
                | 369 |  |  |         :return float: The ax object of the plot | 
            
                                                                                                            
                            
            
                                    
            
            
                | 370 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 371 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 372 |  |  |         if ax is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 373 |  |  |             _, ax = plt.subplots(1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 374 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 375 |  |  |         names = sorted(word_groups.keys()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 376 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 377 |  |  |         for name in names: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 378 |  |  |             words = word_groups[name] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 379 |  |  |             label = '{} (#{})'.format(name, len(words)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 380 |  |  |             vectors = [self[word] for word in words] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 381 |  |  |             projections = self.model.cosine_similarities(self.direction, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 382 |  |  |                                                          vectors) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 383 |  |  |             sns.distplot(projections, hist=False, label=label, ax=ax) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 384 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 385 |  |  |         plt.axvline(0, color='k', linestyle='--') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 386 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 387 |  |  |         plt.title('← {} {} {} →'.format(self.negative_end, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 388 |  |  |                                         ' ' * 20, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 389 |  |  |                                         self.positive_end)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 390 |  |  |         plt.xlabel('Direction Projection') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 391 |  |  |         plt.ylabel('Density') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 392 |  |  |         ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 393 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 394 |  |  |         return ax | 
            
                                                                                                            
                            
            
                                    
            
            
                | 395 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 396 |  |  |     @classmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 397 |  |  |     def _calc_bias_across_word_embeddings(cls, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 398 |  |  |                                           word_embedding_bias_dict, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 399 |  |  |                                           words): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 400 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 401 |  |  |         Calculate to projections and rho of words for two word embeddings. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 402 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 403 |  |  |         :param dict word_embedding_bias_dict: ``WordsEmbeddingBias`` objects | 
            
                                                                                                            
                            
            
                                    
            
            
                | 404 |  |  |                                                as values, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 405 |  |  |                                                and their names as keys. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 406 |  |  |         :param list words: Words to be projected. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 407 |  |  |         :return tuple: Projections and spearman rho. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 408 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 409 |  |  |         # pylint: disable=W0212 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 410 |  |  |         assert len(word_embedding_bias_dict) == 2, 'Support only in two'\ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 411 |  |  |                                                     'word embeddings' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 412 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 413 |  |  |         intersection_words = [word for word in words | 
            
                                                                                                            
                            
            
                                    
            
            
                | 414 |  |  |                               if all(word in web | 
            
                                                                                                            
                            
            
                                    
            
            
                | 415 |  |  |                                      for web in (word_embedding_bias_dict | 
            
                                                                                                            
                            
            
                                    
            
            
                | 416 |  |  |                                                  .values()))] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 417 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 418 |  |  |         projections = {name: web._calc_projection_scores(intersection_words)['projection']  # pylint: disable=C0301 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 419 |  |  |                        for name, web in word_embedding_bias_dict.items()} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 420 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 421 |  |  |         df = pd.DataFrame(projections) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 422 |  |  |         df.index = intersection_words | 
            
                                                                                                            
                            
            
                                    
            
            
                | 423 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 424 |  |  |         rho, _ = spearmanr(*df.transpose().values) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 425 |  |  |         return df, rho | 
            
                                                                                                            
                            
            
                                    
            
            
                | 426 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 427 |  |  |     @classmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 428 |  |  |     def plot_bias_across_word_embeddings(cls, word_embedding_bias_dict, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 429 |  |  |                                          words, ax=None, scatter_kwargs=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 430 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 431 |  |  |         Plot the projections of same words of two word mbeddings. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 432 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 433 |  |  |         :param dict word_embedding_bias_dict: ``WordsEmbeddingBias`` objects | 
            
                                                                                                            
                            
            
                                    
            
            
                | 434 |  |  |                                                as values, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 435 |  |  |                                                and their names as keys. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 436 |  |  |         :param list words: Words to be projected. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 437 |  |  |         :param scatter_kwargs: Kwargs for matplotlib.pylab.scatter. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 438 |  |  |         :type scatter_kwargs: dict or None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 439 |  |  |         :return: The ax object of the plot | 
            
                                                                                                            
                            
            
                                    
            
            
                | 440 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 441 |  |  |         # pylint: disable=W0212 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 442 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 443 |  |  |         df, rho = cls._calc_bias_across_word_embeddings(word_embedding_bias_dict,  # pylint: disable=C0301 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 444 |  |  |                                                         words) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 445 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 446 |  |  |         if ax is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 447 |  |  |             _, ax = plt.subplots(1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 448 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 449 |  |  |         if scatter_kwargs is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 450 |  |  |             scatter_kwargs = {} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 451 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 452 |  |  |         name1, name2 = word_embedding_bias_dict.keys() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 453 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 454 |  |  |         ax.scatter(x=name1, y=name2, data=df, **scatter_kwargs) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 455 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 456 |  |  |         plt.title('Bias Across Word Embeddings' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 457 |  |  |                   '(Spearman Rho = {:0.2f})'.format(rho)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 458 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 459 |  |  |         negative_end = word_embedding_bias_dict[name1].negative_end | 
            
                                                                                                            
                            
            
                                    
            
            
                | 460 |  |  |         positive_end = word_embedding_bias_dict[name1].positive_end | 
            
                                                                                                            
                            
            
                                    
            
            
                | 461 |  |  |         plt.xlabel('← {}     {}     {} →'.format(negative_end, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 462 |  |  |                                                  name1, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 463 |  |  |                                                  positive_end)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 464 |  |  |         plt.ylabel('← {}     {}     {} →'.format(negative_end, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 465 |  |  |                                                  name2, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 466 |  |  |                                                  positive_end)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 467 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 468 |  |  |         ax_min = round_to_extreme(df.values.min()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 469 |  |  |         ax_max = round_to_extreme(df.values.max()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 470 |  |  |         plt.xlim(ax_min, ax_max) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 471 |  |  |         plt.ylim(ax_min, ax_max) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 472 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 473 |  |  |         return ax | 
            
                                                                                                            
                            
            
                                    
            
            
                | 474 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 475 |  |  |     # TODO: refactor for speed and clarity | 
            
                                                                                                            
                            
            
                                    
            
            
                | 476 |  |  |     def generate_analogies(self, n_analogies=100, seed='ends', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 477 |  |  |                            multiple=False, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 478 |  |  |                            delta=1., restrict_vocab=30000, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 479 |  |  |                            unrestricted=False): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 480 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 481 |  |  |         Generate analogies based on a seed vector. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 482 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 483 |  |  |         x - y ~ seed vector. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 484 |  |  |         or a:x::b:y when a-b ~ seed vector. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 485 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 486 |  |  |         The seed vector can be defined by two word ends, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 487 |  |  |         or by the bias direction. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 488 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 489 |  |  |         ``delta`` is used for semantically coherent. Default vale of 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 490 |  |  |         corresponds to an angle <= pi/3. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 491 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 492 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 493 |  |  |         There is criticism regarding generating analogies | 
            
                                                                                                            
                            
            
                                    
            
            
                | 494 |  |  |         when used with `unstricted=False` and not ignoring analogies | 
            
                                                                                                            
                            
            
                                    
            
            
                | 495 |  |  |         with `match` column equal to `False`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 496 |  |  |         Tolga's technique of generating analogies, as implemented in this | 
            
                                                                                                            
                            
            
                                    
            
            
                | 497 |  |  |         method, is limited inherently to analogies with x != y, which may | 
            
                                                                                                            
                            
            
                                    
            
            
                | 498 |  |  |         be force "fake" bias analogies. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 499 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 500 |  |  |         See: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 501 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 502 |  |  |         - Nissim, M., van Noord, R., van der Goot, R. (2019). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 503 |  |  |           `Fair is Better than Sensational: Man is to Doctor | 
            
                                                                                                            
                            
            
                                    
            
            
                | 504 |  |  |           as Woman is to Doctor <https://arxiv.org/abs/1905.09866>`_. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 505 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 506 |  |  |         :param seed: The definition of the seed vector. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 507 |  |  |                      Either by a tuple of two word ends, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 508 |  |  |                      or by `'ends` for the pre-defined ends | 
            
                                                                                                            
                            
            
                                    
            
            
                | 509 |  |  |                      or by `'direction'` for the pre-defined direction vector. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 510 |  |  |         :param int n_analogies: Number of analogies to generate. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 511 |  |  |         :param bool multiple: Whether to allow multiple appearances of a word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 512 |  |  |                               in the analogies. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 513 |  |  |         :param float delta: Threshold for semantic similarity. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 514 |  |  |                             The maximal distance between x and y. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 515 |  |  |         :param int restrict_vocab: The vocabulary size to use. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 516 |  |  |         :param bool unrestricted: Whether to validate the generated analogies | 
            
                                                                                                            
                            
            
                                    
            
            
                | 517 |  |  |                                   with unrestricted `most_similar`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 518 |  |  |         :return: Data Frame of analogies (x, y), their distances, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 519 |  |  |                  and their cosine similarity scores | 
            
                                                                                                            
                            
            
                                    
            
            
                | 520 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 521 |  |  |         # pylint: disable=C0301,R0914 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 522 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 523 |  |  |         if not unrestricted: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 524 |  |  |             warnings.warn('Not Using unrestricted most_similar ' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 525 |  |  |                           'may introduce fake biased analogies.') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 526 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 527 |  |  |         (seed_vector, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 528 |  |  |          positive_end, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 529 |  |  |          negative_end) = get_seed_vector(seed, self) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 530 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 531 |  |  |         restrict_vocab_vectors = self.model.vectors[:restrict_vocab] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 532 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 533 |  |  |         normalized_vectors = (restrict_vocab_vectors | 
            
                                                                                                            
                            
            
                                    
            
            
                | 534 |  |  |                               / np.linalg.norm(restrict_vocab_vectors, axis=1)[:, None]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 535 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 536 |  |  |         pairs_distances = euclidean_distances(normalized_vectors, normalized_vectors) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 537 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 538 |  |  |         # `pairs_distances` must be not-equal to zero | 
            
                                                                                                            
                            
            
                                    
            
            
                | 539 |  |  |         # otherwise, x-y will be the zero vector, and every cosine similarity | 
            
                                                                                                            
                            
            
                                    
            
            
                | 540 |  |  |         # will be equal to zero. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 541 |  |  |         # This cause to the **limitation** of this method which enforce a not-same | 
            
                                                                                                            
                            
            
                                    
            
            
                | 542 |  |  |         # words for x and y. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 543 |  |  |         pairs_mask = (pairs_distances < delta) & (pairs_distances != 0) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 544 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 545 |  |  |         pairs_indices = np.array(np.nonzero(pairs_mask)).T | 
            
                                                                                                            
                            
            
                                    
            
            
                | 546 |  |  |         x_vectors = np.take(normalized_vectors, pairs_indices[:, 0], axis=0) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 547 |  |  |         y_vectors = np.take(normalized_vectors, pairs_indices[:, 1], axis=0) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 548 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 549 |  |  |         x_minus_y_vectors = x_vectors - y_vectors | 
            
                                                                                                            
                            
            
                                    
            
            
                | 550 |  |  |         normalized_x_minus_y_vectors = (x_minus_y_vectors | 
            
                                                                                                            
                            
            
                                    
            
            
                | 551 |  |  |                                         / np.linalg.norm(x_minus_y_vectors, axis=1)[:, None]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 552 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 553 |  |  |         cos_distances = normalized_x_minus_y_vectors @ seed_vector | 
            
                                                                                                            
                            
            
                                    
            
            
                | 554 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 555 |  |  |         sorted_cos_distances_indices = np.argsort(cos_distances)[::-1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 556 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 557 |  |  |         sorted_cos_distances_indices_iter = iter(sorted_cos_distances_indices) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 558 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 559 |  |  |         analogies = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 560 |  |  |         generated_words_x = set() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 561 |  |  |         generated_words_y = set() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 562 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 563 |  |  |         while len(analogies) < n_analogies: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 564 |  |  |             cos_distance_index = next(sorted_cos_distances_indices_iter) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 565 |  |  |             paris_index = pairs_indices[cos_distance_index] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 566 |  |  |             word_x, word_y = [self.model.index2word[index] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 567 |  |  |                               for index in paris_index] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 568 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 569 |  |  |             if multiple or (not multiple | 
            
                                                                                                            
                            
            
                                    
            
            
                | 570 |  |  |                             and (word_x not in generated_words_x | 
            
                                                                                                            
                            
            
                                    
            
            
                | 571 |  |  |                                  and word_y not in generated_words_y)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 572 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 573 |  |  |                 analogy = ({positive_end: word_x, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 574 |  |  |                             negative_end: word_y, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 575 |  |  |                             'score': cos_distances[cos_distance_index], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 576 |  |  |                             'distance': pairs_distances[tuple(paris_index)]}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 577 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 578 |  |  |                 generated_words_x.add(word_x) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 579 |  |  |                 generated_words_y.add(word_y) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 580 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 581 |  |  |                 if unrestricted: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 582 |  |  |                     most_x = next(word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 583 |  |  |                                   for word, _ in most_similar(self.model, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 584 |  |  |                                                               [word_y, positive_end], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 585 |  |  |                                                               [negative_end])) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 586 |  |  |                     most_y = next(word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 587 |  |  |                                   for word, _ in most_similar(self.model, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 588 |  |  |                                                               [word_x, negative_end], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 589 |  |  |                                                               [positive_end])) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 590 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 591 |  |  |                     analogy['most_x'] = most_x | 
            
                                                                                                            
                            
            
                                    
            
            
                | 592 |  |  |                     analogy['most_y'] = most_y | 
            
                                                                                                            
                            
            
                                    
            
            
                | 593 |  |  |                     analogy['match'] = ((word_x == most_x) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 594 |  |  |                                         and (word_y == most_y)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 595 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 596 |  |  |                 analogies.append(analogy) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 597 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 598 |  |  |         df = pd.DataFrame(analogies) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 599 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 600 |  |  |         columns = [positive_end, negative_end, 'distance', 'score'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 601 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 602 |  |  |         if unrestricted: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 603 |  |  |             columns.extend(['most_x', 'most_y', 'match']) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 604 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 605 |  |  |         df = df[columns] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 606 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 607 |  |  |         return df | 
            
                                                                                                            
                            
            
                                    
            
            
                | 608 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 609 |  |  |     def calc_direct_bias(self, neutral_words, c=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 610 |  |  |         """Calculate the direct bias. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 611 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 612 |  |  |         Based on the projection of neutral words on the direction. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 613 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 614 |  |  |         :param list neutral_words: List of neutral words | 
            
                                                                                                            
                            
            
                                    
            
            
                | 615 |  |  |         :param c: Strictness of bias measuring | 
            
                                                                                                            
                            
            
                                    
            
            
                | 616 |  |  |         :type c: float or None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 617 |  |  |         :return: The direct bias | 
            
                                                                                                            
                            
            
                                    
            
            
                | 618 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 619 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 620 |  |  |         if c is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 621 |  |  |             c = 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 622 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 623 |  |  |         projections = self._calc_projection_scores(neutral_words)['projection'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 624 |  |  |         direct_bias_terms = np.abs(projections) ** c | 
            
                                                                                                            
                            
            
                                    
            
            
                | 625 |  |  |         direct_bias = direct_bias_terms.sum() / len(neutral_words) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 626 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 627 |  |  |         return direct_bias | 
            
                                                                                                            
                            
            
                                    
            
            
                | 628 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 629 |  |  |     def calc_indirect_bias(self, word1, word2): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 630 |  |  |         """Calculate the indirect bias between two words. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 631 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 632 |  |  |         Based on the amount of shared projection of the words on the direction. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 633 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 634 |  |  |         Also called PairBias. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 635 |  |  |         :param str word1: First word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 636 |  |  |         :param str word2: Second word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 637 |  |  |         :type c: float or None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 638 |  |  |         :return The indirect bias between the two words | 
            
                                                                                                            
                            
            
                                    
            
            
                | 639 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 640 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 641 |  |  |         self._is_direction_identified() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 642 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 643 |  |  |         vector1 = normalize(self[word1]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 644 |  |  |         vector2 = normalize(self[word2]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 645 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 646 |  |  |         perpendicular_vector1 = reject_vector(vector1, self.direction) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 647 |  |  |         perpendicular_vector2 = reject_vector(vector2, self.direction) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 648 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 649 |  |  |         inner_product = vector1 @ vector2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 650 |  |  |         perpendicular_similarity = cosine_similarity(perpendicular_vector1, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 651 |  |  |                                                      perpendicular_vector2) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 652 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 653 |  |  |         indirect_bias = ((inner_product - perpendicular_similarity) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 654 |  |  |                          / inner_product) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 655 |  |  |         return indirect_bias | 
            
                                                                                                            
                            
            
                                    
            
            
                | 656 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 657 |  |  |     def generate_closest_words_indirect_bias(self, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 658 |  |  |                                              neutral_positive_end, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 659 |  |  |                                              neutral_negative_end, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 660 |  |  |                                              words=None, n_extreme=5): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 661 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 662 |  |  |         Generate closest words to a neutral direction and their indirect bias. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 663 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 664 |  |  |         The direction of the neutral words is used to find | 
            
                                                                                                            
                            
            
                                    
            
            
                | 665 |  |  |         the most extreme words. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 666 |  |  |         The indirect bias is calculated between the most extreme words | 
            
                                                                                                            
                            
            
                                    
            
            
                | 667 |  |  |         and the closest end. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 668 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 669 |  |  |         :param str neutral_positive_end: A word that define the positive side | 
            
                                                                                                            
                            
            
                                    
            
            
                | 670 |  |  |                                          of the neutral direction. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 671 |  |  |         :param str neutral_negative_end: A word that define the negative side | 
            
                                                                                                            
                            
            
                                    
            
            
                | 672 |  |  |                                          of the neutral direction. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 673 |  |  |         :param list words: List of words to project on the neutral direction. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 674 |  |  |         :param int n_extreme: The number for the most extreme words | 
            
                                                                                                            
                            
            
                                    
            
            
                | 675 |  |  |                               (positive and negative) to show. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 676 |  |  |         :return: Data Frame of the most extreme words | 
            
                                                                                                            
                            
            
                                    
            
            
                | 677 |  |  |                  with their projection scores and indirect biases. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 678 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 679 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 680 |  |  |         neutral_direction = normalize(self[neutral_positive_end] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 681 |  |  |                                       - self[neutral_negative_end]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 682 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 683 |  |  |         vectors = [normalize(self[word]) for word in words] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 684 |  |  |         df = (pd.DataFrame([{'word': word, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 685 |  |  |                              'projection': vector @ neutral_direction} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 686 |  |  |                             for word, vector in zip(words, vectors)]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 687 |  |  |               .sort_values('projection', ascending=False)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 688 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 689 |  |  |         df = take_two_sides_extreme_sorted(df, n_extreme, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 690 |  |  |                                            'end', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 691 |  |  |                                            neutral_positive_end, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 692 |  |  |                                            neutral_negative_end) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 693 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 694 |  |  |         df['indirect_bias'] = df.apply(lambda r: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 695 |  |  |                                        self.calc_indirect_bias(r['word'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 696 |  |  |                                                                r['end']), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 697 |  |  |                                        axis=1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 698 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 699 |  |  |         df = df.set_index(['end', 'word']) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 700 |  |  |         df = df[['projection', 'indirect_bias']] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 701 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 702 |  |  |         return df | 
            
                                                                                                            
                            
            
                                    
            
            
                | 703 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 704 |  |  |     def _extract_neutral_words(self, specific_words): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 705 |  |  |         extended_specific_words = set() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 706 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 707 |  |  |         # because or specific_full data was trained on partial word embedding | 
            
                                                                                                            
                            
            
                                    
            
            
                | 708 |  |  |         for word in specific_words: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 709 |  |  |             extended_specific_words.add(word) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 710 |  |  |             extended_specific_words.add(word.lower()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 711 |  |  |             extended_specific_words.add(word.upper()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 712 |  |  |             extended_specific_words.add(word.title()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 713 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 714 |  |  |         neutral_words = [word for word in self.model.vocab | 
            
                                                                                                            
                            
            
                                    
            
            
                | 715 |  |  |                          if word not in extended_specific_words] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 716 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 717 |  |  |         return neutral_words | 
            
                                                                                                            
                            
            
                                    
            
            
                | 718 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 719 |  |  |     def _neutralize(self, neutral_words): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 720 |  |  |         self._is_direction_identified() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 721 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 722 |  |  |         if self._verbose: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 723 |  |  |             neutral_words_iter = tqdm(neutral_words) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 724 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 725 |  |  |             neutral_words_iter = iter(neutral_words) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 726 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 727 |  |  |         for word in neutral_words_iter: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 728 |  |  |             neutralized_vector = reject_vector(self[word], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 729 |  |  |                                                self.direction) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 730 |  |  |             update_word_vector(self.model, word, neutralized_vector) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 731 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 732 |  |  |         self.model.init_sims(replace=True) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 733 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 734 |  |  |     def _equalize(self, equality_sets): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 735 |  |  |         # pylint: disable=R0914 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 736 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 737 |  |  |         self._is_direction_identified() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 738 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 739 |  |  |         if self._verbose: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 740 |  |  |             words_data = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 741 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 742 |  |  |         for equality_set_index, equality_set_words in enumerate(equality_sets): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 743 |  |  |             equality_set_vectors = [normalize(self[word]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 744 |  |  |                                     for word in equality_set_words] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 745 |  |  |             center = np.mean(equality_set_vectors, axis=0) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 746 |  |  |             (projected_center, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 747 |  |  |              rejected_center) = project_reject_vector(center, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 748 |  |  |                                                       self.direction) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 749 |  |  |             scaling = np.sqrt(1 - np.linalg.norm(rejected_center)**2) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 750 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 751 |  |  |             for word, vector in zip(equality_set_words, equality_set_vectors): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 752 |  |  |                 projected_vector = project_vector(vector, self.direction) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 753 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 754 |  |  |                 projected_part = normalize(projected_vector - projected_center) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 755 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 756 |  |  |                 # In the code it is different of Bolukbasi | 
            
                                                                                                            
                            
            
                                    
            
            
                | 757 |  |  |                 # It behaves the same only for equality_sets | 
            
                                                                                                            
                            
            
                                    
            
            
                | 758 |  |  |                 # with size of 2 (pairs) - not sure! | 
            
                                                                                                            
                            
            
                                    
            
            
                | 759 |  |  |                 # However, my code is the same as the article | 
            
                                                                                                            
                            
            
                                    
            
            
                | 760 |  |  |                 # equalized_vector = rejected_center + scaling * self.direction | 
            
                                                                                                            
                            
            
                                    
            
            
                | 761 |  |  |                 # https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/debias.py#L36-L37 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 762 |  |  |                 # For pairs, projected_part_vector1 == -projected_part_vector2, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 763 |  |  |                 # and this is the same as | 
            
                                                                                                            
                            
            
                                    
            
            
                | 764 |  |  |                 # projected_part_vector1 == self.direction | 
            
                                                                                                            
                            
            
                                    
            
            
                | 765 |  |  |                 equalized_vector = rejected_center + scaling * projected_part | 
            
                                                                                                            
                            
            
                                    
            
            
                | 766 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 767 |  |  |                 update_word_vector(self.model, word, equalized_vector) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 768 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 769 |  |  |                 if self._verbose: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 770 |  |  |                     words_data.append({ | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 771 |  |  |                         'equality_set_index': equality_set_index, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 772 |  |  |                         'word': word, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 773 |  |  |                         'scaling': scaling, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 774 |  |  |                         'projected_scalar': vector @ self.direction, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 775 |  |  |                         'equalized_projected_scalar': (equalized_vector | 
            
                                                                                                            
                            
            
                                    
            
            
                | 776 |  |  |                                                        @ self.direction), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 777 |  |  |                     }) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 778 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 779 |  |  |         if self._verbose: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 780 |  |  |             print('Equalize Words Data ' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 781 |  |  |                   '(all equal for 1-dim bias space (direction):') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 782 |  |  |             words_data_df = (pd.DataFrame(words_data) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 783 |  |  |                              .set_index(['equality_set_index', 'word'])) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 784 |  |  |             print(tabulate(words_data_df, headers='keys')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 785 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 786 |  |  |         self.model.init_sims(replace=True) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 787 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 788 |  |  |     def debias(self, method='hard', neutral_words=None, equality_sets=None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 789 |  |  |                inplace=True): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 790 |  |  |         """Debias the word embedding. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 791 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 792 |  |  |         :param str method: The method of debiasing. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 793 |  |  |         :param list neutral_words: List of neutral words | 
            
                                                                                                            
                            
            
                                    
            
            
                | 794 |  |  |                                    for the neutralize step | 
            
                                                                                                            
                            
            
                                    
            
            
                | 795 |  |  |         :param list equality_sets: List of equality sets, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 796 |  |  |                                    for the equalize step. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 797 |  |  |                                    The sets represent the direction. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 798 |  |  |         :param bool inplace: Whether to debias the object inplace | 
            
                                                                                                            
                            
            
                                    
            
            
                | 799 |  |  |                              or return a new one | 
            
                                                                                                            
                            
            
                                    
            
            
                | 800 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 801 |  |  |         .. warning:: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 802 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 803 |  |  |           After calling `debias`, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 804 |  |  |           all the vectors of the word embedding | 
            
                                                                                                            
                            
            
                                    
            
            
                | 805 |  |  |           will be normalized to unit length. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 806 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 807 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 808 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 809 |  |  |         # pylint: disable=W0212 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 810 |  |  |         if inplace: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 811 |  |  |             bias_word_embedding = self | 
            
                                                                                                            
                            
            
                                    
            
            
                | 812 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 813 |  |  |             bias_word_embedding = copy.deepcopy(self) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 814 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 815 |  |  |         if method not in DEBIAS_METHODS: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 816 |  |  |             raise ValueError('method should be one of {}, {} was given'.format( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 817 |  |  |                 DEBIAS_METHODS, method)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 818 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 819 |  |  |         if method in ['hard', 'neutralize']: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 820 |  |  |             if self._verbose: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 821 |  |  |                 print('Neutralize...') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 822 |  |  |             bias_word_embedding._neutralize(neutral_words) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 823 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 824 |  |  |         if method == 'hard': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 825 |  |  |             if self._verbose: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 826 |  |  |                 print('Equalize...') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 827 |  |  |             bias_word_embedding._equalize(equality_sets) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 828 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 829 |  |  |         if inplace: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 830 |  |  |             return None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 831 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 832 |  |  |             return bias_word_embedding | 
            
                                                                                                            
                            
            
                                    
            
            
                | 833 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 834 |  |  |     def evaluate_word_embedding(self, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 835 |  |  |                                 kwargs_word_pairs=None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 836 |  |  |                                 kwargs_word_analogies=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 837 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 838 |  |  |         Evaluate word pairs tasks and word analogies tasks. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 839 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 840 |  |  |         :param model: Word embedding. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 841 |  |  |         :param kwargs_word_pairs: Kwargs for | 
            
                                                                                                            
                            
            
                                    
            
            
                | 842 |  |  |                                   evaluate_word_pairs | 
            
                                                                                                            
                            
            
                                    
            
            
                | 843 |  |  |                                   method. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 844 |  |  |         :type kwargs_word_pairs: dict or None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 845 |  |  |         :param kwargs_word_analogies: Kwargs for | 
            
                                                                                                            
                            
            
                                    
            
            
                | 846 |  |  |                                       evaluate_word_analogies | 
            
                                                                                                            
                            
            
                                    
            
            
                | 847 |  |  |                                       method. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 848 |  |  |         :type evaluate_word_analogies: dict or None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 849 |  |  |         :return: Tuple of :class:`pandas.DataFrame` | 
            
                                                                                                            
                            
            
                                    
            
            
                | 850 |  |  |                  for the evaluation results. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 851 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 852 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 853 |  |  |         return evaluate_word_embedding(self.model, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 854 |  |  |                                        kwargs_word_pairs, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 855 |  |  |                                        kwargs_word_analogies) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 856 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 857 |  |  |     def learn_full_specific_words(self, seed_specific_words, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 858 |  |  |                                   max_non_specific_examples=None, debug=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 859 |  |  |         """Learn specific words given a list of seed specific wordsself. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 860 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 861 |  |  |         Using Linear SVM. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 862 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 863 |  |  |         :param list seed_specific_words: List of seed specific words | 
            
                                                                                                            
                            
            
                                    
            
            
                | 864 |  |  |         :param int max_non_specific_examples: The number of non-specific words | 
            
                                                                                                            
                            
            
                                    
            
            
                | 865 |  |  |                                               to sample for training | 
            
                                                                                                            
                            
            
                                    
            
            
                | 866 |  |  |         :return: List of learned specific words and the classifier object | 
            
                                                                                                            
                            
            
                                    
            
            
                | 867 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 868 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 869 |  |  |         if debug is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 870 |  |  |             debug = False | 
            
                                                                                                            
                            
            
                                    
            
            
                | 871 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 872 |  |  |         if max_non_specific_examples is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 873 |  |  |             max_non_specific_examples = MAX_NON_SPECIFIC_EXAMPLES | 
            
                                                                                                            
                            
            
                                    
            
            
                | 874 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 875 |  |  |         data = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 876 |  |  |         non_specific_example_count = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 877 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 878 |  |  |         for word in self.model.vocab: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 879 |  |  |             is_specific = word in seed_specific_words | 
            
                                                                                                            
                            
            
                                    
            
            
                | 880 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 881 |  |  |             if not is_specific: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 882 |  |  |                 non_specific_example_count += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 883 |  |  |                 if non_specific_example_count <= max_non_specific_examples: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 884 |  |  |                     data.append((self[word], is_specific)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 885 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 886 |  |  |                 data.append((self[word], is_specific)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 887 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 888 |  |  |         np.random.seed(RANDOM_STATE) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 889 |  |  |         np.random.shuffle(data) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 890 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 891 |  |  |         X, y = zip(*data) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 892 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 893 |  |  |         X = np.array(X) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 894 |  |  |         X /= np.linalg.norm(X, axis=1)[:, None] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 895 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 896 |  |  |         y = np.array(y).astype('int') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 897 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 898 |  |  |         clf = LinearSVC(C=1, class_weight='balanced', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 899 |  |  |                         random_state=RANDOM_STATE) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 900 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 901 |  |  |         clf.fit(X, y) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 902 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 903 |  |  |         full_specific_words = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 904 |  |  |         for word in self.model.vocab: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 905 |  |  |             vector = [normalize(self[word])] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 906 |  |  |             if clf.predict(vector): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 907 |  |  |                 full_specific_words.append(word) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 908 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 909 |  |  |         if not debug: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 910 |  |  |             return full_specific_words, clf | 
            
                                                                                                            
                            
            
                                    
            
            
                | 911 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 912 |  |  |         return full_specific_words, clf, X, y | 
            
                                                                                                            
                            
            
                                    
            
            
                | 913 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 914 |  |  |     def _plot_most_biased_one_cluster(self, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 915 |  |  |                                       most_biased_neutral_words, y_bias, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 916 |  |  |                                       random_state=1, ax=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 917 |  |  |         most_biased_vectors = [self.model[word] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 918 |  |  |                                for word in most_biased_neutral_words] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 919 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 920 |  |  |         return plot_clustering_as_classification(most_biased_vectors, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 921 |  |  |                                                  y_bias, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 922 |  |  |                                                  random_state=random_state, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 923 |  |  |                                                  ax=ax) | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 924 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 925 |  |  |     def compute_factual_association(self, factual_properity): | 
            
                                                                        
                            
            
                                    
            
            
                | 926 |  |  |         """Compute association of a factual property to the projection. | 
            
                                                                        
                            
            
                                    
            
            
                | 927 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 928 |  |  |         Inspired by WEFAT (Word-Embedding Factual Association Test), | 
            
                                                                        
                            
            
                                    
            
            
                | 929 |  |  |         but it is not the same: | 
            
                                                                        
                            
            
                                    
            
            
                | 930 |  |  |         - Caliskan, A., Bryson, J. J., & Narayanan, A. (2017). | 
            
                                                                        
                            
            
                                    
            
            
                | 931 |  |  |         `Semantics derived automatically | 
            
                                                                        
                            
            
                                    
            
            
                | 932 |  |  |         from language corpora contain human-like biases | 
            
                                                                        
                            
            
                                    
            
            
                | 933 |  |  |         <http://opus.bath.ac.uk/55288/>`_. | 
            
                                                                        
                            
            
                                    
            
            
                | 934 |  |  |         Science, 356(6334), 183-186. | 
            
                                                                        
                            
            
                                    
            
            
                | 935 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 936 |  |  |         In a future version, the WEFAT will also be implemented. | 
            
                                                                        
                            
            
                                    
            
            
                | 937 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 938 |  |  |         If a word doesn't exist in the word embedding, | 
            
                                                                        
                            
            
                                    
            
            
                | 939 |  |  |         then it will be filtered out. | 
            
                                                                        
                            
            
                                    
            
            
                | 940 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 941 |  |  |         For example, in :class:`ethically.we.bias.GenderBiasWE`, | 
            
                                                                        
                            
            
                                    
            
            
                | 942 |  |  |         the defuat factual property is the percentage of female | 
            
                                                                        
                            
            
                                    
            
            
                | 943 |  |  |         in various occupations | 
            
                                                                        
                            
            
                                    
            
            
                | 944 |  |  |         from the Labor Force Statistics of 2017 Population Survey, | 
            
                                                                        
                            
            
                                    
            
            
                | 945 |  |  |         Taken from: https://arxiv.org/abs/1804.06876 | 
            
                                                                        
                            
            
                                    
            
            
                | 946 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 947 |  |  |         :param dict factual_properity: Dictionary of words | 
            
                                                                        
                            
            
                                    
            
            
                | 948 |  |  |                                        and their factual values. | 
            
                                                                        
                            
            
                                    
            
            
                | 949 |  |  |         :return: Pearson r, pvalue and the words with their | 
            
                                                                        
                            
            
                                    
            
            
                | 950 |  |  |                  associated factual values | 
            
                                                                        
                            
            
                                    
            
            
                | 951 |  |  |                  and their projection on the bias direction. | 
            
                                                                        
                            
            
                                    
            
            
                | 952 |  |  |         """ | 
            
                                                                        
                            
            
                                    
            
            
                | 953 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 954 |  |  |         points = {word: (value, self.project_on_direction(word)) | 
            
                                                                        
                            
            
                                    
            
            
                | 955 |  |  |                   for word, value in factual_properity.items() | 
            
                                                                        
                            
            
                                    
            
            
                | 956 |  |  |                   if word in self.model} | 
            
                                                                        
                            
            
                                    
            
            
                | 957 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 958 |  |  |         x, y = zip(*points.values()) | 
            
                                                                        
                            
            
                                    
            
            
                | 959 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 960 |  |  |         return pearsonr(x, y), points | 
            
                                                                                                            
                            
            
                                    
            
            
                | 961 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 962 |  |  |     def plot_factual_association(self, factual_properity, ax=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 963 |  |  |         """Plot association of a factual property to the projection. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 964 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 965 |  |  |         See: :meth:`BiasWordEmbedding.compute_factual_association` | 
            
                                                                                                            
                            
            
                                    
            
            
                | 966 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 967 |  |  |         :param dict factual_properity: Dictionary of words | 
            
                                                                                                            
                            
            
                                    
            
            
                | 968 |  |  |                                        and their factual values. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 969 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 970 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 971 |  |  |         result = self.compute_factual_association(factual_properity) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 972 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 973 |  |  |         (r, pvalue), points = result | 
            
                                                                                                            
                            
            
                                    
            
            
                | 974 |  |  |         x, y = zip(*points.values()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 975 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 976 |  |  |         if ax is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 977 |  |  |             _, ax = plt.subplots(1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 978 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 979 |  |  |         ax.scatter(x, y) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 980 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 981 |  |  |         plt.title('Assocsion between Factual Property' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 982 |  |  |                   'and Projection on Direction ' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 983 |  |  |                   '(Pearson R = {:0.2f} ; pvalue={:0.2f})' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 984 |  |  |                   .format(r, pvalue)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 985 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 986 |  |  |         plt.xlabel('Factual Property') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 987 |  |  |         plt.ylabel('Projection on Direction') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 988 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 989 |  |  |         return ax | 
            
                                                                                                            
                            
            
                                    
            
            
                | 990 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 991 |  |  |     @staticmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 992 |  |  |     def plot_most_biased_clustering(biased, debiased, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 993 |  |  |                                     seed='ends', n_extreme=500, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 994 |  |  |                                     random_state=1): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 995 |  |  |         """Plot clustering as classification of biased neutral words. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 996 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 997 |  |  |         :param biased: Biased word embedding of | 
            
                                                                                                            
                            
            
                                    
            
            
                | 998 |  |  |                        :class:`~ethically.we.bias.BiasWordEmbedding`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 999 |  |  |         :param debiased: Debiased word embedding of | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1000 |  |  |                          :class:`~ethically.we.bias.BiasWordEmbedding`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1001 |  |  |         :param seed: The definition of the seed vector. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1002 |  |  |                     Either by a tuple of two word ends, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1003 |  |  |                     or by `'ends` for the pre-defined ends | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1004 |  |  |                     or by `'direction'` for | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1005 |  |  |                     the pre-defined direction vector. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1006 |  |  |         :param n_extrem: The number of extreme biased | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1007 |  |  |                          neutral words to use. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1008 |  |  |         :return: Tuple of list of ax objects of the plot, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1009 |  |  |                  and a dictionary with the most positive | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1010 |  |  |                  and negative words. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1011 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1012 |  |  |         Based on: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1013 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1014 |  |  |         - Gonen, H., & Goldberg, Y. (2019). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1015 |  |  |           `Lipstick on a Pig: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1016 |  |  |           Debiasing Methods Cover up Systematic Gender Biases | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1017 |  |  |           in Word Embeddings But do not Remove | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1018 |  |  |           Them <https://arxiv.org/abs/1903.03862>`_. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1019 |  |  |           arXiv preprint arXiv:1903.03862. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1020 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1021 |  |  |         - https://github.com/gonenhila/gender_bias_lipstick | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1022 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1023 |  |  |         # pylint: disable=protected-access,too-many-locals,line-too-long | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1024 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1025 |  |  |         assert biased.positive_end == debiased.positive_end, \ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1026 |  |  |             'Postive ends should be the same.' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1027 |  |  |         assert biased.negative_end == debiased.negative_end, \ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1028 |  |  |             'Negative ends should be the same.' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1029 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1030 |  |  |         seed_vector, _, _ = get_seed_vector(seed, biased) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1031 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1032 |  |  |         neutral_words = biased._data['neutral_words'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1033 |  |  |         neutral_word_vectors = (biased[word] for word in neutral_words) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1034 |  |  |         neutral_word_projections = [(normalize(vector) @ seed_vector, word) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1035 |  |  |                                     for word, vector | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1036 |  |  |                                     in zip(neutral_words, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1037 |  |  |                                            neutral_word_vectors)] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1038 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1039 |  |  |         neutral_word_projections.sort() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1040 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1041 |  |  |         _, most_negative_words = zip(*neutral_word_projections[:n_extreme]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1042 |  |  |         _, most_positive_words = zip(*neutral_word_projections[-n_extreme:]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1043 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1044 |  |  |         most_biased_neutral_words = most_negative_words + most_positive_words | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1045 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1046 |  |  |         y_bias = [False] * n_extreme + [True] * n_extreme | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1047 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1048 |  |  |         _, axes = plt.subplots(1, 2, figsize=(20, 5)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1049 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1050 |  |  |         acc_biased = biased._plot_most_biased_one_cluster(most_biased_neutral_words, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1051 |  |  |                                                           y_bias, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1052 |  |  |                                                           random_state=random_state, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1053 |  |  |                                                           ax=axes[0]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1054 |  |  |         axes[0].set_title('Biased - Accuracy={}'.format(acc_biased)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1055 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1056 |  |  |         acc_debiased = debiased._plot_most_biased_one_cluster(most_biased_neutral_words, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1057 |  |  |                                                               y_bias, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1058 |  |  |                                                               random_state=random_state, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1059 |  |  |                                                               ax=axes[1]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1060 |  |  |         axes[1].set_title('Debiased - Accuracy={}'.format(acc_debiased)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1061 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1062 |  |  |         return axes, {biased.positive_end: most_positive_words, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1063 |  |  |                       biased.negative_end: most_negative_words} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1064 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1065 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1066 |  |  | class GenderBiasWE(BiasWordEmbedding): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1067 |  |  |     """Measure and adjust the Gender Bias in English Word Embedding. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1068 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1069 |  |  |     :param model: Word embedding model of ``gensim.model.KeyedVectors`` | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1070 |  |  |     :param bool only_lower: Whether the word embedding contrains | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1071 |  |  |                             only lower case words | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1072 |  |  |     :param bool verbose: Set verbosity | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1073 |  |  |     :param bool to_normalize: Whether to normalize all the vectors | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1074 |  |  |                               (recommended!) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1075 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1076 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1077 |  |  |     def __init__(self, model, only_lower=False, verbose=False, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1078 |  |  |                  identify_direction=True, to_normalize=True): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1079 |  |  |         super().__init__(model=model, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1080 |  |  |                          only_lower=only_lower, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1081 |  |  |                          verbose=verbose, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1082 |  |  |                          to_normalize=True) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1083 |  |  |         self._initialize_data() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1084 |  |  |         if identify_direction: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1085 |  |  |             self._identify_direction('she', 'he', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1086 |  |  |                                      self._data['definitional_pairs'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1087 |  |  |                                      'pca') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1088 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1089 |  |  |     def _initialize_data(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1090 |  |  |         self._data = copy.deepcopy(BOLUKBASI_DATA['gender']) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1091 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1092 |  |  |         if not self.only_lower: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1093 |  |  |             self._data['specific_full_with_definitional_equalize'] = \ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1094 |  |  |                 generate_words_forms(self | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1095 |  |  |                                      ._data['specific_full_with_definitional_equalize'])  # pylint: disable=C0301 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1096 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1097 |  |  |         for key in self._data['word_group_keys']: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1098 |  |  |             self._data[key] = (self._filter_words_by_model(self | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1099 |  |  |                                                            ._data[key])) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1100 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1101 |  |  |         self._data['neutral_words'] = self._extract_neutral_words(self | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1102 |  |  |                                                                   ._data['specific_full_with_definitional_equalize'])  # pylint: disable=C0301 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1103 |  |  |         self._data['neutral_words'].sort() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1104 |  |  |         self._data['word_group_keys'].append('neutral_words') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1105 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1106 |  |  |     def plot_projection_scores(self, words='professions', n_extreme=10, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1107 |  |  |                                ax=None, axis_projection_step=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1108 |  |  |         if words == 'professions': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1109 |  |  |             words = self._data['profession_names'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1110 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1111 |  |  |         return super().plot_projection_scores(words, n_extreme, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1112 |  |  |                                               ax, axis_projection_step) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1113 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1114 |  |  |     def plot_dist_projections_on_direction(self, word_groups='bolukbasi', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1115 |  |  |                                            ax=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1116 |  |  |         if word_groups == 'bolukbasi': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1117 |  |  |             word_groups = {key: self._data[key] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1118 |  |  |                            for key in self._data['word_group_keys']} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1119 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1120 |  |  |         return super().plot_dist_projections_on_direction(word_groups, ax) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1121 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1122 |  |  |     @classmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1123 |  |  |     def plot_bias_across_word_embeddings(cls, word_embedding_bias_dict, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1124 |  |  |                                          ax=None, scatter_kwargs=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1125 |  |  |         # pylint: disable=W0221 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1126 |  |  |         words = BOLUKBASI_DATA['gender']['neutral_profession_names'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1127 |  |  |         # TODO: is it correct for inheritance of class method? | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1128 |  |  |         super(cls, cls).plot_bias_across_word_embeddings(word_embedding_bias_dict,  # pylint: disable=C0301 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1129 |  |  |                                                          words, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1130 |  |  |                                                          ax, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1131 |  |  |                                                          scatter_kwargs) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1132 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1133 |  |  |     def calc_direct_bias(self, neutral_words='professions', c=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1134 |  |  |         if isinstance(neutral_words, str) and neutral_words == 'professions': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1135 |  |  |             return super().calc_direct_bias( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1136 |  |  |                 self._data['neutral_profession_names'], c) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1137 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1138 |  |  |             return super().calc_direct_bias(neutral_words) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1139 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1140 |  |  |     def generate_closest_words_indirect_bias(self, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1141 |  |  |                                              neutral_positive_end, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1142 |  |  |                                              neutral_negative_end, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1143 |  |  |                                              words='professions', n_extreme=5): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1144 |  |  |         # pylint: disable=C0301 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1145 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1146 |  |  |         if words == 'professions': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1147 |  |  |             words = self._data['profession_names'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1148 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1149 |  |  |         return super().generate_closest_words_indirect_bias(neutral_positive_end, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1150 |  |  |                                                             neutral_negative_end, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1151 |  |  |                                                             words, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1152 |  |  |                                                             n_extreme=n_extreme) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1153 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1154 |  |  |     def debias(self, method='hard', neutral_words=None, equality_sets=None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1155 |  |  |                inplace=True): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1156 |  |  |         # pylint: disable=C0301 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1157 |  |  |         if method in ['hard', 'neutralize']: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1158 |  |  |             if neutral_words is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1159 |  |  |                 neutral_words = self._data['neutral_words'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1160 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1161 |  |  |         if method == 'hard' and equality_sets is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1162 |  |  |             equality_sets = self._data['definitional_pairs'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1163 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1164 |  |  |             if not self.only_lower: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1165 |  |  |                 assert all(len(equality_set) == 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1166 |  |  |                            for equality_set in equality_sets), 'currently supporting only equality pairs if only_lower is False' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1167 |  |  |                 # TODO: refactor | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1168 |  |  |                 equality_sets = {(candidate1, candidate2) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1169 |  |  |                                  for word1, word2 in equality_sets | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1170 |  |  |                                  for candidate1, candidate2 in zip(generate_one_word_forms(word1), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1171 |  |  |                                                                    generate_one_word_forms(word2))} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1172 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1173 |  |  |         return super().debias(method, neutral_words, equality_sets, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1174 |  |  |                               inplace) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1175 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1176 |  |  |     def learn_full_specific_words(self, seed_specific_words='bolukbasi', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1177 |  |  |                                   max_non_specific_examples=None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1178 |  |  |                                   debug=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1179 |  |  |         if seed_specific_words == 'bolukbasi': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1180 |  |  |             seed_specific_words = self._data['specific_seed'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1181 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1182 |  |  |         return super().learn_full_specific_words(seed_specific_words, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1183 |  |  |                                                  max_non_specific_examples, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1184 |  |  |                                                  debug) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1185 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1186 |  |  |     def compute_factual_association(self, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1187 |  |  |                                     factual_properity=OCCUPATION_FEMALE_PRECENTAGE):  # pylint: disable=line-too-long | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1188 |  |  |         return super().compute_factual_association(factual_properity) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1189 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1190 |  |  |     def plot_factual_association(self, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1191 |  |  |                                  factual_properity=OCCUPATION_FEMALE_PRECENTAGE,  # pylint: disable=line-too-long | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1192 |  |  |                                  ax=None): | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 1193 |  |  |         return super().plot_factual_association(factual_properity, ax) | 
            
                                                        
            
                                    
            
            
                | 1194 |  |  |  |