1
|
|
|
import e2edutch.conll |
|
|
|
|
2
|
|
|
import os |
|
|
|
|
3
|
|
|
__here__ = os.path.dirname(os.path.realpath(__file__)) |
4
|
|
|
|
5
|
|
|
|
6
|
|
|
def test_get_doc_key(): |
|
|
|
|
7
|
|
|
doc_key = e2edutch.conll.get_doc_key('test', '1') |
8
|
|
|
assert doc_key == 'test.p.1' |
9
|
|
|
|
10
|
|
|
|
11
|
|
|
def test_get_doc_key_nopart(): |
|
|
|
|
12
|
|
|
doc_key = e2edutch.conll.get_doc_key('test') |
13
|
|
|
assert doc_key == 'test' |
14
|
|
|
|
15
|
|
|
|
16
|
|
|
def test_get_prediction_map(): |
|
|
|
|
17
|
|
|
predictions = {'doc1': [[(0, 1)]]} |
18
|
|
|
prediction_map = e2edutch.conll.get_prediction_map(predictions) |
19
|
|
|
assert len(prediction_map) == 1 |
20
|
|
|
assert len(prediction_map['doc1']) == 3 |
21
|
|
|
start_map, end_map, word_map = prediction_map['doc1'] |
22
|
|
|
assert start_map[0] == [0] |
23
|
|
|
assert end_map[1] == [0] |
24
|
|
|
assert word_map == {} |
25
|
|
|
|
26
|
|
|
|
27
|
|
|
def test_predictions_to_brackets(): |
|
|
|
|
28
|
|
|
sentences = [['Een', 'zin', '.'], |
29
|
|
|
['Nog', 'een', 'zin']] |
30
|
|
|
predictions = [[(1, 1)], [(4, 5)]] |
31
|
|
|
brackets = e2edutch.conll.clusters_to_brackets(sentences, predictions) |
32
|
|
|
assert len(brackets) == 2 |
33
|
|
|
assert len(brackets[0]) == 3 |
34
|
|
|
assert brackets[0][1] == '(0)' |
35
|
|
|
assert brackets[1][1] == '(1' |
36
|
|
|
assert brackets[1][2] == '1)' |
37
|
|
|
|
38
|
|
|
|
39
|
|
|
def test_output_conll(): |
|
|
|
|
40
|
|
|
output_file = '/tmp/tmp.conll' |
41
|
|
|
sentences = {'doc1.p.1': [['Dit', 'is', 'een', 'test', '.']]} |
42
|
|
|
predictions = {'doc1.p.1': [[(0, 0), (2, 3)]]} |
43
|
|
|
with open(output_file, 'w') as fout: |
44
|
|
|
e2edutch.conll.output_conll(fout, sentences, predictions) |
45
|
|
|
assert os.path.exists(output_file) |
46
|
|
|
content = open(output_file).readlines() |
47
|
|
|
nonempty = [line for line in content if line.strip() != ''] |
48
|
|
|
assert len(nonempty) == 7 |
49
|
|
|
assert content[0].strip() == '#begin document (doc1); part 1' |
50
|
|
|
|
51
|
|
|
|
52
|
|
|
def test_output_conll_align(): |
|
|
|
|
53
|
|
|
input_file = os.path.join(__here__, 'data', 'test.conll') |
54
|
|
|
output_file = '/tmp/tmp.conll' |
55
|
|
|
predictions = {'doc1.p.1': [[(0, 0), (2, 3)]]} |
56
|
|
|
with open(input_file) as fin: |
57
|
|
|
with open(output_file, 'w') as fout: |
58
|
|
|
e2edutch.conll.output_conll_align(fin, fout, predictions) |
59
|
|
|
content = open(output_file).readlines() |
60
|
|
|
nonempty = [line for line in content if line.strip() != ''] |
61
|
|
|
assert len(nonempty) == 7 |
62
|
|
|
assert content[0].strip() == '#begin document (doc1); part 1' |
63
|
|
|
|