Passed
Pull Request — dev (#30)
by Shlomi
02:40
created

responsibly.we.benchmark   A

Complexity

Total Complexity 9

Size/Duplication

Total Lines 166
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 62
dl 0
loc 166
rs 10
c 0
b 0
f 0
wmc 9

5 Functions

Rating   Name   Duplication   Size   Complexity  
A _prepare_word_pairs_file() 0 4 1
A evaluate_word_embedding() 0 19 1
A _get_data_resource_path() 0 4 1
A evaluate_word_pairs() 0 36 3
A evaluate_word_analogies() 0 29 3
1
"""
2
Evaluate word embedding by standard benchmarks.
3
4
Reference:
5
    - https://github.com/kudkudak/word-embeddings-benchmarks
6
7
8
Word Pairs Tasks
9
~~~~~~~~~~~~~~~~
10
11
1. The WordSimilarity-353 Test Collection
12
   http://www.cs.technion.ac.il/~gabr/resources/data/wordsim353/
13
14
2. Rubenstein, H., and Goodenough, J. 1965. Contextual correlates of synonymy
15
   https://www.seas.upenn.edu/~hansens/conceptSim/
16
17
3. Stanford Rare Word (RW) Similarity Dataset
18
   https://nlp.stanford.edu/~lmthang/morphoNLM/
19
20
4. The Word Relatedness Mturk-771 Test Collection
21
   http://www2.mta.ac.il/~gideon/datasets/mturk_771.html
22
23
5. The MEN Test Collection
24
   http://clic.cimec.unitn.it/~elia.bruni/MEN.html
25
26
6. SimLex-999
27
   https://fh295.github.io/simlex.html
28
29
7. TR9856
30
   https://www.research.ibm.com/haifa/dept/vst/files/IBM_Debater_(R)_TR9856.v2.zip
31
32
33
Analogies Tasks
34
~~~~~~~~~~~~~~~
35
36
1. Google Analogies (subset of WordRep)
37
   https://code.google.com/archive/p/word2vec/source
38
39
2. MSR - Syntactic Analogies
40
   http://research.microsoft.com/en-us/projects/rnn/
41
42
"""
43
44
import os
45
46
import pandas as pd
47
from pkg_resources import resource_filename
48
49
50
WORD_PAIRS_TASKS = {'WS353': 'wordsim353.tsv',
51
                    'RG65': 'RG_word.tsv',
52
                    'RW': 'rw.tsv',
53
                    'Mturk': 'MTURK-771.tsv',
54
                    'MEN': 'MEN_dataset_natural_form_full.tsv',
55
                    'SimLex999': 'SimLex-999.tsv',
56
                    'TR9856': 'TermRelatednessResults.tsv'}
57
58
ANALOGIES_TASKS = {'MSR-syntax': 'MSR-syntax.txt',
59
                   'Google': 'questions-words.txt'}
60
61
PAIR_WORDS_EVALUATION_FIELDS = ['pearson_r', 'pearson_pvalue',
62
                                'spearman_r', 'spearman_pvalue',
63
                                'ratio_unkonwn_words']
64
65
66
def _get_data_resource_path(filename):
67
    return resource_filename(__name__, os.path.join('data',
68
                                                    'benchmark',
69
                                                    filename))
70
71
72
def _prepare_word_pairs_file(src, dst, delimiter='\t'):
73
    """Transform formats of word pairs files to tsv."""
74
    df = pd.read_csv(src, header=None, delimiter=delimiter)
75
    df.loc[:, :2].to_csv(dst, sep=delimiter, index=False, header=False)
76
77
78
def evaluate_word_pairs(model, kwargs_word_pairs=None):
79
    """
80
    Evaluate word pairs tasks.
81
82
    :param model: Word embedding.
83
    :param kwargs_word_pairs: Kwargs for
84
                              evaluate_word_pairs
85
                              method.
86
    :type kwargs_word_pairs: dict or None
87
    :return: :class:`pandas.DataFrame` of evaluation results.
88
    """
89
90
    if kwargs_word_pairs is None:
91
        kwargs_word_pairs = {}
92
93
    results = {}
94
95
    for name, filename in WORD_PAIRS_TASKS.items():
96
        path = _get_data_resource_path(filename)
97
        (pearson,
98
         spearman,
99
         ratio_unknown_words) = model.evaluate_word_pairs(path,
100
                                                          **kwargs_word_pairs)  # pylint: disable=C0301
101
102
        results[name] = {'pearson_r': pearson[0],
103
                         'pearson_pvalue': pearson[1],
104
                         'spearman_r': spearman.correlation,
105
                         'spearman_pvalue': spearman.pvalue,
106
                         'ratio_unkonwn_words': ratio_unknown_words}
107
108
    df = (pd.DataFrame(results)
109
          .reindex(PAIR_WORDS_EVALUATION_FIELDS)
110
          .transpose()
111
          .round(3))
112
113
    return df
114
115
116
def evaluate_word_analogies(model, kwargs_word_analogies=None):
117
    """
118
    Evaluate word analogies tasks.
119
120
    :param model: Word embedding.
121
    :param kwargs_word_analogies: Kwargs for
122
                                  evaluate_word_analogies
123
                                  method.
124
    :type evaluate_word_analogies: dict or None
125
    :return: :class:`pandas.DataFrame` of evaluation results.
126
    """
127
128
    if kwargs_word_analogies is None:
129
        kwargs_word_analogies = {}
130
131
    results = {}
132
133
    for name, filename in ANALOGIES_TASKS.items():
134
        path = _get_data_resource_path(filename)
135
        overall_score, _ = model.evaluate_word_analogies(path,
136
                                                         **kwargs_word_analogies)  # pylint: disable=C0301
137
138
        results[name] = {'score': overall_score}
139
140
        df = (pd.DataFrame(results)
141
              .transpose()
142
              .round(3))
143
144
    return df
0 ignored issues
show
introduced by
The variable df does not seem to be defined in case the for loop on line 133 is not entered. Are you sure this can never be the case?
Loading history...
145
146
147
def evaluate_word_embedding(model,
148
                            kwargs_word_pairs=None,
149
                            kwargs_word_analogies=None):
150
    """
151
    Evaluate word pairs tasks and word analogies tasks.
152
153
    :param model: Word embedding.
154
    :param kwargs_word_pairs: Kwargs fo
155
                              evaluate_word_pairs
156
                              method.
157
    :type kwargs_word_pairs: dict or None
158
    :param kwargs_word_analogies: Kwargs for
159
                                  evaluate_word_analogies
160
                                  method.
161
    :type evaluate_word_analogies: dict or None
162
    :return: Tuple of DataFrame for the evaluation results.
163
    """
164
    return (evaluate_word_pairs(model, kwargs_word_pairs),
165
            evaluate_word_analogies(model, kwargs_word_analogies))
166