Passed
Pull Request — master (#1184)
by Konstantin
03:15
created

tests.model.test_ocrd_page   B

Complexity

Total Complexity 46

Size/Duplication

Total Lines 468
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 46
eloc 250
dl 0
loc 468
rs 8.72
c 0
b 0
f 0

27 Functions

Rating   Name   Duplication   Size   Complexity  
A test_extend_all_indexed_no_validation() 0 15 2
A test_delete_region() 0 9 1
A test_hashable() 0 9 1
A test_faulty_glyphs_to_xml() 0 6 1
A test_alternative_image_additions() 0 25 1
A test_empty_groups_to_regionrefindexed() 0 22 2
A test_extend_all_indexed_validate_continuity() 0 14 3
A test_serialize_no_empty_readingorder() 0 9 1
A test_get_all_text_lines() 0 6 2
A test_parse_string_succeeds() 0 3 1
A test_get_all_indexed_index_sort() 0 16 2
A test_pcgts_id_matches() 0 2 1
A test_simple_types() 0 13 1
A _fixture_faulty_glyphs() 0 6 2
A test_get_AllAlternativeImages() 0 13 2
A test_to_xml_unicode_nsprefix() 0 14 2
A test_orderedgroup_export_order() 0 29 2
A test_id() 0 13 1
A test_get_all_alternative_image_paths() 0 17 2
A test_issue_269() 0 10 1
A test_set_image_filename() 0 8 1
A test_all_regions_with_reading_order() 0 20 2
A test_get_all_indexed_classes() 0 12 2
A test_all_regions_without_reading_order() 0 22 2
A test_get_unordered_group_children() 0 10 2
A test_get_all_regions_invalid_order_raises_exception() 0 11 3
A test_get_all_regions_invalid_depth_raises_exception() 0 11 3

How to fix   Complexity   

Complexity

Complex classes like tests.model.test_ocrd_page often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
3
import pytest
4
5
from tests.base import main, assets, create_ocrd_file_with_defaults
6
7
from ocrd_modelfactory import page_from_image
8
from ocrd_models.ocrd_page_generateds import TextTypeSimpleType
9
from ocrd_models.ocrd_page import (
10
    AlternativeImageType,
11
    PcGtsType,
12
    PageType,
13
    TextRegionType,
14
    TextLineType,
15
    OrderedGroupIndexedType,
16
    UnorderedGroupIndexedType,
17
    ReadingOrderType,
18
    RegionRefIndexedType,
19
    WordType,
20
    GlyphType,
21
22
    parseString,
23
    parse,
24
    to_xml
25
)
26
27
simple_page = """\
28
<PcGts xmlns="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd">
29
    <Metadata>
30
        <Creator>OCR-D</Creator>
31
        <Created>2016-09-20T11:09:27.041+02:00</Created>
32
        <LastChange>2018-04-25T17:44:49.605+01:00</LastChange>
33
    </Metadata>
34
    <Page
35
        imageFilename="https://github.com/OCR-D/assets/raw/master/data/kant_aufklaerung_1784/data/OCR-D-IMG/INPUT_0017.tif"
36
        imageWidth="1457"
37
        imageHeight="2083"
38
        type="content">
39
        <TextRegion type="heading" id="r_1_1" custom="readingOrder {index:0;} structure {type:heading;}">
40
            <Coords points="113,365 919,365 919,439 113,439"/>
41
            <TextLine id="tl_1" primaryLanguage="German" custom="readingOrder {index:0;} textStyle {offset:0; length:26;fontFamily:Arial; fontSize:17.0; bold:true;}">
42
                <Coords points="114,366 918,366 918,438 114,438"/>
43
                <Baseline points="114,429 918,429"/>
44
                <Word id="w_w1aab1b1b2b1b1ab1" language="German" custom="readingOrder {index:0;} textStyle {offset:0; length:11;fontFamily:Arial; fontSize:17.0; bold:true;}">
45
                    <Coords points="114,368 442,368 442,437 114,437"/>
46
                    <TextEquiv conf="1">
47
                        <Unicode>Berliniſche</Unicode>
48
                    </TextEquiv>
49
                </Word>
50
            </TextLine>
51
        </TextRegion>
52
    </Page>
53
</PcGts>
54
"""
55
56
57
@pytest.fixture(name='faulty_glyphs')
58
def _fixture_faulty_glyphs():
59
    with open(assets.path_to('glyph-consistency/data/OCR-D-GT-PAGE/FAULTY_GLYPHS.xml'), 'rb') as f:
60
        xml_as_str = f.read()
61
    pcgts = parseString(xml_as_str, silence=True)
62
    yield pcgts
63
64
65
def test_pcgts_id_matches(faulty_glyphs):
66
    assert faulty_glyphs.pcGtsId == 'FAULTY_GLYPHS_FILE'
67
68
69
def test_faulty_glyphs_to_xml(faulty_glyphs):
70
    as_xml = to_xml(faulty_glyphs)
71
    assert ' xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"' in as_xml[:1000]
72
    assert ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd"', as_xml[:1000]
73
    assert '<pc:PcGts' in as_xml[0:100]
74
    assert '<pc:TextRegion' in as_xml[1000:3000]
75
76
77
def test_to_xml_unicode_nsprefix():
78
    """see https://github.com/OCR-D/core/pull/474#issuecomment-621477590"""
79
80
    # arrange
81
    with open(assets.path_to('kant_aufklaerung_1784-binarized/data/OCR-D-GT-WORD/INPUT_0020.xml'), 'rb') as f:
82
        from_xml = f.read()
83
84
    # assert
85
    assert '<Unicode>' in from_xml.decode('utf-8'), 'without NS prefix'
86
    assert '<Created' in from_xml.decode('utf-8'), 'without NS prefix'
87
    pcgts = parseString(from_xml, silence=True)
88
    as_xml = to_xml(pcgts)
89
    assert '<pc:Unicode>' in as_xml, 'with NS prefix'
90
    assert '<pc:Created>' in as_xml, 'with NS prefix'
91
92
93
def test_issue_269(faulty_glyphs):
94
    """
95
    @conf is parsed as str but should be float
96
    https://github.com/OCR-D/core/issues/269
97
    """
98
    # GIGO
99
    faulty_glyphs.get_Page().get_TextRegion()[0].get_TextEquiv()[0].set_conf(1.0)
100
    assert type(faulty_glyphs.get_Page().get_TextRegion()[0].get_TextEquiv()[0].get_conf()) == float
101
    faulty_glyphs.get_Page().get_TextRegion()[0].get_TextEquiv()[0].set_conf('1.0')
102
    assert type(faulty_glyphs.get_Page().get_TextRegion()[0].get_TextEquiv()[0].get_conf()) == str
103
104
105
def test_parse_string_succeeds():
106
    """parseString with @conf in TextEquiv won't throw an error"""
107
    assert parseString(simple_page, silence=True) is not None
108
109
110
def test_delete_region():
111
    pcgts = parseString(simple_page, silence=True)
112
    assert len(pcgts.get_Page().get_TextRegion()) == 1
113
114
    # act
115
    del pcgts.get_Page().get_TextRegion()[0]
116
117
    # assert
118
    assert len(pcgts.get_Page().get_TextRegion()) == 0
119
120
121
def test_set_image_filename(faulty_glyphs):
122
    assert faulty_glyphs.get_Page().imageFilename == '00000259.sw.tif'
123
124
    # act
125
    faulty_glyphs.get_Page().imageFilename = 'foo'
126
127
    # assert
128
    assert faulty_glyphs.get_Page().imageFilename == 'foo'
129
130
131
def test_alternative_image_additions():
132
    pcgts = PcGtsType(pcGtsId="foo")
133
    assert pcgts.pcGtsId == 'foo'
134
135
    # act
136
    # Page/AlternativeImage
137
    page = PageType()
138
    pcgts.set_Page(page)
139
    page.add_AlternativeImage(AlternativeImageType())
140
    # TextRegion/AlternativeImage
141
    region = TextRegionType()
142
    page.add_TextRegion(region)
143
    region.add_AlternativeImage(AlternativeImageType())
144
    # TextLine/AlternativeImage
145
    line = TextLineType()
146
    region.add_TextLine(line)
147
    line.add_AlternativeImage(AlternativeImageType())
148
    # Word/AlternativeImage
149
    word = WordType()
150
    line.add_Word(word)
151
    word.add_AlternativeImage(AlternativeImageType())
152
    # Glyph/AlternativeImage
153
    glyph = GlyphType()
154
    word.add_Glyph(glyph)
155
    glyph.add_AlternativeImage(AlternativeImageType())
156
157
    # TODO assertions
158
159
160
def test_simple_types(faulty_glyphs):
161
    regions = faulty_glyphs.get_Page().get_TextRegion()
162
    reg = regions[0]
163
164
    # assert
165
    assert isinstance(reg.get_type(), str)
166
    assert reg.get_type() == TextTypeSimpleType.CREDIT
167
    assert isinstance(TextTypeSimpleType.CREDIT, str)
168
    assert reg.get_type() == 'credit'
169
    assert isinstance(TextTypeSimpleType.CREDIT, str)
170
    reg.set_type(TextTypeSimpleType.PAGENUMBER)
171
    assert reg.get_type() == 'page-number'
172
    assert isinstance(reg.get_type(), str)
173
174
175
def test_orderedgroup_export_order():
176
    """
177
    See https://github.com/OCR-D/core/issues/475
178
    """
179
    # arrange
180
    with open(assets.path_to('gutachten/data/TEMP1/PAGE_TEMP1.xml'), 'r') as f:
181
        pcgts = parseString(f.read().encode('utf8'), silence=True)
182
183
    # act
184
    og = pcgts.get_Page().get_ReadingOrder().get_OrderedGroup()
185
    xml_before = to_xml(og)
186
    children = og.get_AllIndexed()
187
188
    # assert
189
    assert len(children) == 22
190
    assert [c.index for c in children] == list(range(0, 22))
191
    # mix up the indexes
192
    children[0].index = 11
193
    children[11].index = 3
194
    children[3].index = 0
195
    assert [c.index for c in children] == [11, 1, 2, 0, 4, 5, 6, 7, 8, 9, 10, 3, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
196
    assert [c.index for c in og.get_AllIndexed()] == list(range(0, 22))
197
    assert og.get_AllIndexed()[1].__class__ == OrderedGroupIndexedType
198
    # serialize and make sure the correct order was serialized
199
    new_pcgts = parseString(to_xml(pcgts).encode('utf8'), silence=True)
200
    new_og = new_pcgts.get_Page().get_ReadingOrder().get_OrderedGroup()
201
    assert [c.index for c in new_og.get_AllIndexed()] == list(range(0, 22))
202
203
    xml_after = to_xml(new_og)
204
    # TODO why not working?
205
    #assert xml_after == xml_before
206
207
208
def test_empty_groups_to_regionrefindexed():
209
    """
210
    Corrolary See https://github.com/OCR-D/core/issues/475
211
    """
212
    # arrange
213
    with open(assets.path_to('gutachten/data/TEMP1/PAGE_TEMP1.xml'), 'r') as f:
214
        pcgts = parseString(f.read().encode('utf8'), silence=True)
215
216
    og = pcgts.get_Page().get_ReadingOrder().get_OrderedGroup()
217
    children = og.get_AllIndexed()
218
219
    # assert
220
    assert isinstance(children[1], OrderedGroupIndexedType)
221
    assert isinstance(children[21], UnorderedGroupIndexedType)
222
    # empty all the elements in the first orederdGroupIndexed
223
    children[1].set_RegionRefIndexed([])
224
    # serialize apnd parse to see empty group converted
225
    pcgts = parseString(to_xml(pcgts).encode('utf8'), silence=True)
226
    og = pcgts.get_Page().get_ReadingOrder().get_OrderedGroup()
227
    children = og.get_AllIndexed()
228
    assert isinstance(children[1], RegionRefIndexedType)
229
    assert isinstance(children[21], RegionRefIndexedType)
230
231
232
def test_all_regions_without_reading_order():
233
    """
234
    https://github.com/OCR-D/core/pull/479
235
    https://github.com/OCR-D/core/issues/240#issuecomment-493135797
236
    """
237
    with open(assets.path_to('gutachten/data/TEMP1/PAGE_TEMP1.xml'), 'r') as f:
238
        pcgts = parseString(f.read().encode('utf8'), silence=True)
239
240
    # act
241
    pg = pcgts.get_Page()
242
243
    # assert
244
    assert len(pg.get_AllRegions()) == 65
245
    assert len(pg.get_AllRegions(depth=0)) == 65
246
    assert len(pg.get_AllRegions(depth=1)) == 45
247
    assert len(pg.get_AllRegions(depth=2)) == 65
248
    assert len(pg.get_AllRegions(depth=3)) == 65
249
    assert len(pg.get_AllRegions(classes=['Separator'])) == 25
250
    assert len(pg.get_AllRegions(classes=['Table'])) == 3
251
    assert len(pg.get_AllRegions(classes=['Text'])) == 37
252
    assert len(pg.get_AllRegions(classes=['Text'], depth=1)) == 17
253
    assert len(pg.get_AllRegions(classes=['Text'], depth=2)) == 37
254
255
256
def test_get_all_regions_invalid_order_raises_exception():
257
    # arrange
258
    with open(assets.path_to('gutachten/data/TEMP1/PAGE_TEMP1.xml'), 'r') as f:
259
        pg = parseString(f.read().encode('utf8'), silence=True).get_Page()
260
261
    # act
262
    with pytest.raises(Exception) as exc:
263
        pg.get_AllRegions(order='random')
264
265
    # assert
266
    assert "Argument 'order' must be either 'document', 'reading-order' or 'reading-order-only', not 'random'" in str(exc.value)
267
268
269
def test_get_all_regions_invalid_depth_raises_exception():
270
    # arrange
271
    with open(assets.path_to('gutachten/data/TEMP1/PAGE_TEMP1.xml'), 'r') as f:
272
        pg = parseString(f.read().encode('utf8'), silence=True).get_Page()
273
274
    # act
275
    with pytest.raises(Exception) as exc:
276
        pg.get_AllRegions(depth=-1)
277
278
    # assert
279
    assert "Argument 'depth' must be an integer greater-or-equal 0, not '-1'" in str(exc.value)
280
281
282
def test_all_regions_with_reading_order():
283
    """
284
    https://github.com/OCR-D/core/pull/479
285
    https://github.com/OCR-D/core/issues/240#issuecomment-493135797
286
    """
287
288
    # arrange
289
    with open(assets.path_to('gutachten/data/TEMP1/PAGE_TEMP1.xml'), 'r') as f:
290
        pg = parseString(f.read().encode('utf8'), silence=True).get_Page()
291
292
    # assert
293
    assert len(pg.get_AllRegions(order='reading-order-only')) == 40
294
    assert len(pg.get_AllRegions(order='reading-order-only', depth=1)) == 20
295
    assert len(pg.get_AllRegions(order='reading-order-only', depth=2)) == 40
296
    assert len(pg.get_AllRegions(order='reading-order', depth=0)) == 65
297
    assert len(pg.get_AllRegions(order='reading-order', depth=1)) == 45
298
    assert len(pg.get_AllRegions(order='reading-order', depth=2)) == 65
299
    assert len(pg.get_AllRegions(classes=['Table'], order='reading-order')) == 3
300
    assert len(pg.get_AllRegions(classes=['Text'], order='reading-order')) == 37
301
    assert len(pg.get_AllRegions(classes=['Text'], order='reading-order', depth=1)) == 17
302
303
304
def test_get_unordered_group_children():
305
    # arrange
306
    with open(assets.path_to('gutachten/data/TEMP1/PAGE_TEMP1.xml'), 'r') as f:
307
        pcgts = parseString(f.read().encode('utf8'), silence=True)
308
309
    # act
310
    ug = pcgts.get_Page().get_ReadingOrder().get_OrderedGroup().get_UnorderedGroupIndexed()[0]
311
312
    # assert
313
    assert len(ug.get_UnorderedGroupChildren()) == 1
314
315
316
def test_get_all_indexed_classes():
317
    # arrange
318
    with open(assets.path_to('gutachten/data/TEMP1/PAGE_TEMP1.xml'), 'r') as f:
319
        pcgts = parseString(f.read().encode('utf8'), silence=True)
320
321
    # act
322
    og = pcgts.get_Page().get_ReadingOrder().get_OrderedGroup()
323
324
    # assert
325
    assert len(og.get_AllIndexed(classes=['RegionRef'])) == 17
326
    assert len(og.get_AllIndexed(classes=['OrderedGroup'])) == 3
327
    assert len(og.get_AllIndexed(classes=['UnorderedGroup'])) == 2
328
329
330
def test_get_all_indexed_index_sort():
331
    # arrange
332
    with open(assets.path_to('gutachten/data/TEMP1/PAGE_TEMP1.xml'), 'r') as f:
333
        og = parseString(f.read().encode('utf8'), silence=True).get_Page().get_ReadingOrder().get_OrderedGroup()
334
335
    # act
336
    unogs = og.get_UnorderedGroupIndexed()
337
338
    # assert
339
    assert [x.index for x in unogs] == [20, 21]
340
    unogs[0].index = 21
341
    unogs[1].index = 20
342
    assert [x.index for x in og.get_AllIndexed(classes=['UnorderedGroup'], index_sort=True)] == [20, 21]
343
    assert [x.index for x in og.get_AllIndexed(classes=['UnorderedGroup'], index_sort=False)] == [21, 20]
344
    og.sort_AllIndexed()
345
    assert [x.index for x in og.get_AllIndexed(classes=['UnorderedGroup'], index_sort=False)] == [20, 21]
346
347
348
def test_extend_all_indexed_no_validation():
349
    # arrange
350
    with open(assets.path_to('gutachten/data/TEMP1/PAGE_TEMP1.xml'), 'r') as f:
351
        og = parseString(f.read().encode('utf8'), silence=True).get_Page().get_ReadingOrder().get_OrderedGroup()
352
353
    # act
354
    og.extend_AllIndexed([
355
        RegionRefIndexedType(index=3, id='r3'),
356
        RegionRefIndexedType(index=2, id='r2'),
357
        RegionRefIndexedType(index=1, id='r1'),
358
    ])
359
    rrs = og.get_RegionRefIndexed()
360
361
    # assert
362
    assert [x.index for x in rrs][-3:] == [22, 23, 24]
363
364
365
def test_get_all_text_lines():
366
    with open(assets.path_to('gutachten/data/TEMP1/PAGE_TEMP1.xml'), 'r') as f:
367
        page = parseString(f.read().encode('utf8'), silence=True).get_Page()
368
369
    # assert
370
    assert len(page.get_AllTextLines()) == 55
371
372
373
def test_extend_all_indexed_validate_continuity():
374
    # arrange
375
    with open(assets.path_to('gutachten/data/TEMP1/PAGE_TEMP1.xml'), 'r') as f:
376
        og = parseString(f.read().encode('utf8'), silence=True).get_Page().get_ReadingOrder().get_OrderedGroup()
377
378
    # act
379
    with pytest.raises(Exception) as index_exc:
380
        og.extend_AllIndexed([
381
            RegionRefIndexedType(index=3, id='r3'),
382
            RegionRefIndexedType(index=2, id='r2'),
383
            RegionRefIndexedType(index=1, id='r1'),
384
        ], validate_continuity=True)
385
386
    assert "@index already used: 1" in str(index_exc.value)
387
388
389
def test_get_all_alternative_image_paths():
390
    # arrange
391
    with open(assets.path_to('kant_aufklaerung_1784-complex/data/OCR-D-OCR-OCRO-fraktur-SEG-LINE-tesseract-ocropy-DEWARP/OCR-D-OCR-OCRO-fraktur-SEG-LINE-tesseract-ocropy-DEWARP_0001.xml'), 'r') as f:
392
        pcgts = parseString(f.read().encode('utf8'), silence=True)
393
394
    # assert
395
    assert pcgts.get_AllAlternativeImagePaths(page=False, region=False, line=False) == []
396
    assert pcgts.get_AllAlternativeImagePaths(page=True, region=False, line=False) == [
397
        'OCR-D-IMG-BIN/OCR-D-IMG-BINPAGE-sauvola_0001-BIN_sauvola-ms-split.png',
398
        'OCR-D-IMG-CROP/OCR-D-IMG-CROP_0001.png',
399
        'OCR-D-IMG-BIN/INPUT_0017-BIN_sauvola-ms-split.png',
400
        'OCR-D-IMG-DESPECK/OCR-D-IMG-DESPECK_0001.png',
401
        'OCR-D-IMG-DESKEW/OCR-D-IMG-DESKEW_0001.png',
402
        'OCR-D-IMG-DESKEW/OCR-D-IMG-DESKEW_0001.png']
403
    assert len(pcgts.get_AllAlternativeImagePaths(page=True, region=True, line=False)) == 12
404
    assert len(pcgts.get_AllAlternativeImagePaths(page=True, region=True, line=False)) == 12
405
    assert len(pcgts.get_AllAlternativeImagePaths(page=True, region=True, line=True)) == 36
406
407
    # TODO: Test with word/glyph-level AlternativeImages
408
    # would work with len == 36
409
    # assert len(pcgts.get_AllAlternativeImagePaths(word=False)) == 37
410
411
412
def test_get_AllAlternativeImages():
413
    with open(assets.path_to('kant_aufklaerung_1784-complex/data/OCR-D-OCR-OCRO-fraktur-SEG-LINE-tesseract-ocropy-DEWARP/OCR-D-OCR-OCRO-fraktur-SEG-LINE-tesseract-ocropy-DEWARP_0001.xml'), 'r') as f:
414
        pcgts = parseString(f.read().encode('utf8'), silence=True)
415
        page = pcgts.get_Page()
416
        assert page.get_AllAlternativeImages(page=False, region=False, line=False) == []
417
        assert [x.filename for x in page.get_AllAlternativeImages(page=True, region=False, line=False)] == [
418
            'OCR-D-IMG-BIN/OCR-D-IMG-BINPAGE-sauvola_0001-BIN_sauvola-ms-split.png',
419
            'OCR-D-IMG-CROP/OCR-D-IMG-CROP_0001.png',
420
            'OCR-D-IMG-BIN/INPUT_0017-BIN_sauvola-ms-split.png',
421
            'OCR-D-IMG-DESPECK/OCR-D-IMG-DESPECK_0001.png',
422
            'OCR-D-IMG-DESKEW/OCR-D-IMG-DESKEW_0001.png',
423
            'OCR-D-IMG-DESKEW/OCR-D-IMG-DESKEW_0001.png']
424
        assert isinstance(page.get_AllAlternativeImages()[0], AlternativeImageType)
425
426
427
def test_serialize_no_empty_readingorder():
428
    """
429
    https://github.com/OCR-D/core/issues/602
430
    """
431
    pcgts = page_from_image(create_ocrd_file_with_defaults(local_filename=assets.path_to('kant_aufklaerung_1784/data/OCR-D-IMG/INPUT_0017.tif')))
432
    pcgts.get_Page().set_ReadingOrder(ReadingOrderType())
433
    assert pcgts.get_Page().get_ReadingOrder()
434
    pcgts = parseString(to_xml(pcgts, skip_declaration=True))
435
    assert not pcgts.get_Page().get_ReadingOrder()
436
437
438
def test_hashable():
439
    """
440
    https://github.com/OCR-D/ocrd_segment/issues/45
441
    """
442
    pcgts = page_from_image(create_ocrd_file_with_defaults(local_filename=assets.path_to('kant_aufklaerung_1784/data/OCR-D-IMG/INPUT_0017.tif')))
443
    page = pcgts.get_Page()
444
    testset = set()
445
    testset.add(pcgts)
446
    testset.add(page)
447
448
    # TODO: was is actually to be asserted?
449
450
451
def test_id():
452
    """
453
    https://github.com/OCR-D/core/issues/682
454
    """
455
    fpath_page = assets.path_to('kant_aufklaerung_1784/data/OCR-D-GT-PAGE/PAGE_0017_PAGE.xml')
456
    pcgts = parse(fpath_page)
457
458
    # assert
459
    assert pcgts.id == 'PAGE_0017_PAGE'
460
461
    # TODO: is this *really* desired?
462
    # I would expect for a single Page-Element the ID is like from the top-level-Pgts-Container, not like a fileName
463
    assert pcgts.get_Page().id == 'OCR-D-IMG/INPUT_0017.tif'
464
465
466
if __name__ == '__main__':
467
    main(__file__)
468