Passed
Push — master ( fe038f...170db5 )
by Shlomi
03:29 queued 01:43
created

ethically.we.benchmark.evaluate_word_pairs()   A

Complexity

Conditions 3

Size

Total Lines 36
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 20
nop 2
dl 0
loc 36
rs 9.4
c 0
b 0
f 0
1
"""
2
Evaluate words embeedings by standard benchmarks.
3
4
Reference: https://github.com/kudkudak/word-embeddings-benchmarks
5
6
7
Word Pairs Tasks
8
9
1. The WordSimilarity-353 Test Collection
10
   http://www.cs.technion.ac.il/~gabr/resources/data/wordsim353/
11
12
2. Rubenstein, H., and Goodenough, J. 1965. Contextual correlates of synonymy
13
   https://www.seas.upenn.edu/~hansens/conceptSim/
14
15
3. Stanford Rare Word (RW) Similarity Dataset
16
   https://nlp.stanford.edu/~lmthang/morphoNLM/
17
18
4. The Word Relatedness Mturk-771 Test Collection
19
   http://www2.mta.ac.il/~gideon/datasets/mturk_771.html
20
21
5. The MEN Test Collection
22
   http://clic.cimec.unitn.it/~elia.bruni/MEN.html
23
24
6. SimLex-999
25
   https://fh295.github.io/simlex.html
26
27
7. TR9856
28
   https://www.research.ibm.com/haifa/dept/vst/files/IBM_Debater_(R)_TR9856.v2.zip
29
30
31
Analogies Tasks
32
33
1. Google Analogies (subset of WordRep)
34
   https://code.google.com/archive/p/word2vec/source
35
36
2. MSR - Syntactic Analogies
37
   http://research.microsoft.com/en-us/projects/rnn/
38
39
"""
40
41
import os
42
import warnings
43
44
import pandas as pd
45
from pkg_resources import resource_filename
46
47
48
with warnings.catch_warnings():
49
    warnings.simplefilter('ignore', category=FutureWarning)
50
51
52
WORD_PAIRS_TASKS = {'WS353': 'wordsim353.tsv',
53
                    'RG65': 'RG_word.tsv',
54
                    'RW': 'rw.tsv',
55
                    'Mturk': 'MTURK-771.tsv',
56
                    'MEN': 'MEN_dataset_natural_form_full.tsv',
57
                    'SimLex999': 'SimLex-999.tsv',
58
                    'TR9856': 'TermRelatednessResults.tsv'}
59
60
ANALOGIES_TASKS = {'MSR-syntax': 'MSR-syntax.txt',
61
                   'Google': 'questions-words.txt'}
62
63
PAIR_WORDS_EVALUATION_FIELDS = ['pearson_r', 'pearson_pvalue',
64
                                'spearman_r', 'spearman_pvalue',
65
                                'ratio_unkonwn_words']
66
67
68
def get_data_resource_path(filename):
69
    return resource_filename(__name__, os.path.join('data',
70
                                                    'benchmark',
71
                                                    filename))
72
73
74
def prepare_word_pairs_file(src, dst, delimiter='\t'):
75
    """Transform formats of word pairs files to tsv."""
76
    df = pd.read_csv(src, header=None, delimiter=delimiter)
77
    df.loc[:, :2].to_csv(dst, sep=delimiter, index=False, header=False)
78
79
80
def evaluate_word_pairs(model, kwargs_word_pairs=None):
81
    """
82
    Evaluate word pairs tasks.
83
84
    :param model: Words embedding.
85
    :param kwargs_word_pairs: Kwargs for
86
                              evaluate_word_pairs
87
                              method.
88
    :type kwargs_word_pairs: dict or None
89
    :return: DataFrame of evaluation results.
90
    """
91
92
    if kwargs_word_pairs is None:
93
        kwargs_word_pairs = {}
94
95
    results = {}
96
97
    for name, filename in WORD_PAIRS_TASKS.items():
98
        path = get_data_resource_path(filename)
99
        (pearson,
100
         spearman,
101
         ratio_unknown_words) = model.evaluate_word_pairs(path,
102
                                                          **kwargs_word_pairs)  # pylint: disable=C0301
103
104
        results[name] = {'pearson_r': pearson[0],
105
                         'pearson_pvalue': pearson[1],
106
                         'spearman_r': spearman.correlation,
107
                         'spearman_pvalue': spearman.pvalue,
108
                         'ratio_unkonwn_words': ratio_unknown_words}
109
110
    df = (pd.DataFrame(results)
111
          .reindex(PAIR_WORDS_EVALUATION_FIELDS)
112
          .transpose()
113
          .round(3))
114
115
    return df
116
117
118
def evaluate_word_analogies(model, kwargs_word_analogies=None):
119
    """
120
    Evaluate word analogies tasks.
121
122
    :param model: Words embedding.
123
    :param kwargs_word_analogies: Kwargs for
124
                                  evaluate_word_analogies
125
                                  method.
126
    :type evaluate_word_analogies: dict or None
127
    :return: DataFrame of evaluation results.
128
    """
129
130
    if kwargs_word_analogies is None:
131
        kwargs_word_analogies = {}
132
133
    results = {}
134
135
    for name, filename in ANALOGIES_TASKS.items():
136
        path = get_data_resource_path(filename)
137
        overall_score, _ = model.evaluate_word_analogies(path,
138
                                                         **kwargs_word_analogies)  # pylint: disable=C0301
139
140
        results[name] = {'score': overall_score}
141
142
        df = (pd.DataFrame(results)
143
              .transpose()
144
              .round(3))
145
146
    return df
0 ignored issues
show
introduced by
The variable df does not seem to be defined in case the for loop on line 135 is not entered. Are you sure this can never be the case?
Loading history...
147
148
149
def evaluate_words_embedding(model,
150
                             kwargs_word_pairs=None,
151
                             kwargs_word_analogies=None):
152
    """
153
    Evaluate word pairs tasks and word analogies tasks.
154
155
    :param model: Words embedding.
156
    :param kwargs_word_pairs: Kwargs fo
157
                              evaluate_word_pairs
158
                              method.
159
    :type kwargs_word_pairs: dict or None
160
    :param kwargs_word_analogies: Kwargs for
161
                                  evaluate_word_analogies
162
                                  method.
163
    :type evaluate_word_analogies: dict or None
164
    :return: Tuple of DataFrame for the evaluation results.
165
    """
166
    return (evaluate_word_pairs(model, kwargs_word_pairs),
167
            evaluate_word_analogies(model, kwargs_word_analogies))
168