1
|
|
|
import e2edutch.metrics |
|
|
|
|
2
|
|
|
import math |
|
|
|
|
3
|
|
|
|
4
|
|
|
|
5
|
|
|
def test_f1(): |
|
|
|
|
6
|
|
|
num = 10 |
7
|
|
|
p_den = 100 |
8
|
|
|
r_den = 50 |
9
|
|
|
f1 = e2edutch.metrics.f1(num, p_den, num, r_den) |
|
|
|
|
10
|
|
|
assert math.isclose(f1, 2.0/15) |
11
|
|
|
|
12
|
|
|
|
13
|
|
|
def test_f1_zero(): |
|
|
|
|
14
|
|
|
num = 0 |
15
|
|
|
p_den = 0 |
16
|
|
|
r_den = 0 |
17
|
|
|
f1 = e2edutch.metrics.f1(num, p_den, num, r_den) |
|
|
|
|
18
|
|
|
assert math.isclose(f1, 0) |
19
|
|
|
|
20
|
|
|
|
21
|
|
|
def test_corefevaluator(): |
|
|
|
|
22
|
|
|
evaluator = e2edutch.metrics.CorefEvaluator() |
23
|
|
|
assert evaluator.get_f1() == 0 |
24
|
|
|
assert evaluator.get_recall() == 0 |
25
|
|
|
assert evaluator.get_precision() == 0 |
26
|
|
|
|
27
|
|
|
# empty update |
28
|
|
|
evaluator.update([], [], {}, {}) |
29
|
|
|
assert evaluator.get_f1() == 0 |
30
|
|
|
assert evaluator.get_recall() == 0 |
31
|
|
|
assert evaluator.get_precision() == 0 |
32
|
|
|
|
33
|
|
|
|
34
|
|
|
def test_evaluate_documents(): |
|
|
|
|
35
|
|
|
metrics = [e2edutch.metrics.muc, |
36
|
|
|
e2edutch.metrics.b_cubed, |
37
|
|
|
e2edutch.metrics.lea, |
38
|
|
|
e2edutch.metrics.ceafe] |
39
|
|
|
documents = [] |
40
|
|
|
for metric in metrics: |
41
|
|
|
p, r, f = e2edutch.metrics.evaluate_documents(documents, metric) |
|
|
|
|
42
|
|
|
assert p == 0 |
43
|
|
|
assert r == 0 |
44
|
|
|
assert f == 0 |
45
|
|
|
|
46
|
|
|
|
47
|
|
|
def example_clusters(): |
|
|
|
|
48
|
|
|
mentions = tuple([(i*2, i*2+1) for i in range(9)]) |
49
|
|
|
a, b, c, d, e, f, g, h, i = mentions |
|
|
|
|
50
|
|
|
pred_clusters = [(a, b), (c, d), (f, g, h, i)] |
51
|
|
|
gold_clusters = [(a, b, c), (d, e, f, g)] |
52
|
|
|
mentions_to_gold = {m: cl for cl in tuple(gold_clusters) for m in cl} |
53
|
|
|
return pred_clusters, gold_clusters, mentions_to_gold |
54
|
|
|
|
55
|
|
|
|
56
|
|
|
def test_muc(): |
|
|
|
|
57
|
|
|
clusters = [((0, 1), (2, 3))] |
58
|
|
|
mentions_to_gold = {m: cl for cl in tuple(clusters) for m in cl} |
59
|
|
|
num, dem = e2edutch.metrics.muc(clusters, mentions_to_gold) |
60
|
|
|
assert math.isclose(num/dem, 1) |
61
|
|
|
|
62
|
|
|
clusters, _, mentions_to_gold = example_clusters() |
63
|
|
|
num, dem = e2edutch.metrics.muc(clusters, mentions_to_gold) |
64
|
|
|
assert math.isclose(num/dem, 0.4) |
65
|
|
|
|
66
|
|
|
|
67
|
|
|
def test_b_cubed(): |
|
|
|
|
68
|
|
|
clusters = [((0, 1), (2, 3))] |
69
|
|
|
mentions_to_gold = {m: cl for cl in tuple(clusters) for m in cl} |
70
|
|
|
num, dem = e2edutch.metrics.b_cubed(clusters, mentions_to_gold) |
71
|
|
|
assert math.isclose(num/dem, 1) |
72
|
|
|
|
73
|
|
|
clusters, _, mentions_to_gold = example_clusters() |
74
|
|
|
num, dem = e2edutch.metrics.b_cubed(clusters, mentions_to_gold) |
75
|
|
|
assert math.isclose(num/dem, 0.5) |
76
|
|
|
|
77
|
|
|
|
78
|
|
|
def test_ceafe(): |
|
|
|
|
79
|
|
|
clusters = [((0, 1), (2, 3))] |
80
|
|
|
pn, pd, rn, rd = e2edutch.metrics.ceafe(clusters, clusters) |
|
|
|
|
81
|
|
|
assert math.isclose(pn/pd, 1) |
82
|
|
|
|
83
|
|
|
clusters, gold_clusters, _ = example_clusters() |
84
|
|
|
pn, pd, rn, rd = e2edutch.metrics.ceafe(clusters, gold_clusters) |
|
|
|
|
85
|
|
|
assert math.isclose(pn/pd, 0.43, abs_tol=0.005) |
86
|
|
|
|