Passed
Pull Request — master (#1184)
by Konstantin
03:15
created

tests.test_utils   F

Complexity

Total Complexity 60

Size/Duplication

Total Lines 344
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 60
eloc 251
dl 0
loc 344
rs 3.6
c 0
b 0
f 0

40 Functions

Rating   Name   Duplication   Size   Complexity  
A test_xywh_from_points() 0 2 1
A test_safe_filename() 0 4 1
A test_parameter_file_comments() 0 11 2
A test_remove_non_path_from_url() 0 4 1
A test_local_filename() 0 7 1
A test_xywh_from_points_unordered() 0 2 1
A test_is_string() 0 3 1
A test_partition_list() 0 20 3
B test_generate_range() 0 12 6
A test_set_json_key_value_overrides() 0 9 1
A test_parameter_file() 0 11 2
A test_assert_file_grp_cardinality() 0 8 4
A test_concat_padded() 0 4 1
A test_parse_json_string_or_file() 0 9 1
A test_make_file_id_simple() 0 3 1
A test_bbox_from_xywh() 0 2 1
A test_pushd_popd_newcwd() 0 7 2
A test_xmllint() 0 4 1
A test_bbox_from_points() 0 2 1
A test_points_from_xywh() 0 2 1
A test_make_file_id_744() 0 9 1
A test_pil_version() 0 18 2
A test_abspath() 0 2 1
A test_parameters_invalid() 0 5 3
A test_make_file_id_mets() 0 10 2
A test_pushd_popd_tempdir() 0 9 2
A test_make_file_id_605() 0 11 1
A test_mime_ext() 0 5 1
A test_polygon_from_points() 0 2 1
A test_points_from_x0y0x1y1() 0 2 1
A test_points_from_polygon() 0 2 1
A test_sparkline() 0 5 1
A test_make_file_id_570() 0 6 1
A test_polygon_from_x0y0x1y1() 0 2 1
A test_pushd_popd_bad_call() 0 4 3
A test_nth_url_segment() 0 12 1
A test_is_local_filename() 0 6 1
A test_membername() 0 6 1
A test_points_from_bbox() 0 2 1
A test_xywh_from_polygon() 0 2 1

How to fix   Complexity   

Complexity

Complex classes like tests.test_utils often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from os import getcwd
2
from tempfile import TemporaryDirectory, gettempdir
3
from pathlib import Path
4
5
from PIL import Image
6
7
from tests.base import TestCase, main, assets, create_ocrd_file
8
from pytest import raises, warns
9
from ocrd_utils import (
10
    abspath,
11
12
    assert_file_grp_cardinality,
13
    make_file_id,
14
15
    bbox_from_points,
16
    bbox_from_xywh,
17
18
    concat_padded,
19
    is_local_filename,
20
    get_local_filename,
21
    is_string,
22
    membername,
23
    generate_range,
24
    sparkline,
25
26
    nth_url_segment,
27
    remove_non_path_from_url,
28
    safe_filename,
29
30
    parse_json_string_or_file,
31
    set_json_key_value_overrides,
32
33
    partition_list,
34
35
    points_from_bbox,
36
    points_from_x0y0x1y1,
37
    points_from_xywh,
38
    points_from_polygon,
39
40
    polygon_from_points,
41
    polygon_from_x0y0x1y1,
42
43
    xywh_from_points,
44
    xywh_from_polygon,
45
    pushd_popd,
46
47
    MIME_TO_EXT, EXT_TO_MIME,
48
    MIME_TO_PIL, PIL_TO_MIME,
49
)
50
from ocrd_models.utils import xmllint_format
51
from ocrd_models import OcrdMets
52
53
54
def test_abspath():
55
    assert abspath('file:///') == '/'
56
57
def test_points_from_xywh():
58
    assert points_from_xywh({'x': 100, 'y': 100, 'w': 100, 'h': 100}) == '100,100 200,100 200,200 100,200'
59
60
def test_points_from_bbox():
61
    assert points_from_bbox(100, 100, 200, 200) == '100,100 200,100 200,200 100,200'
62
63
def test_points_from_polygon():
64
    assert points_from_polygon([[100, 100], [200, 100], [200, 200], [100, 200]]) == '100,100 200,100 200,200 100,200'
65
66
def test_polygon_from_x0y0x1y1():
67
    assert polygon_from_x0y0x1y1([100, 100, 200, 200]) == [[100, 100], [200, 100], [200, 200], [100, 200]]
68
69
def test_points_from_x0y0x1y1():
70
    assert points_from_x0y0x1y1([100, 100, 200, 200]) == '100,100 200,100 200,200 100,200'
71
72
def test_bbox_from_points():
73
    assert bbox_from_points('100,100 200,100 200,200 100,200') == (100, 100, 200, 200)
74
75
def test_bbox_from_xywh():
76
    assert bbox_from_xywh({'x': 100, 'y': 100, 'w': 100, 'h': 100}) == (100, 100, 200, 200)
77
78
def test_xywh_from_polygon():
79
    assert xywh_from_polygon([[100, 100], [200, 100], [200, 200], [100, 200]]) == {'x': 100, 'y': 100, 'w': 100, 'h': 100}
80
81
def test_xywh_from_points():
82
    assert xywh_from_points('100,100 200,100 200,200 100,200') == {'x': 100, 'y': 100, 'w': 100, 'h': 100}
83
84
def test_xywh_from_points_unordered():
85
    assert xywh_from_points('500,500 100,100 200,100 200,200 100,200') == {'x': 100, 'y': 100, 'w': 400, 'h': 400}
86
87
def test_polygon_from_points():
88
    assert polygon_from_points('100,100 200,100 200,200 100,200') == [[100, 100], [200, 100], [200, 200], [100, 200]]
89
90
def test_concat_padded():
91
    assert concat_padded('x', 1) == 'x_0001'
92
    assert concat_padded('x', 1, 2, 3) == 'x_0001_0002_0003'
93
    assert concat_padded('x', 1, '2', 3) == 'x_0001_2_0003'
94
95
def test_is_string():
96
    assert is_string('x')
97
    assert is_string(u'x')
98
99
def test_xmllint():
100
    xml_str = '<beep>\n  <boop>42</boop>\n</beep>\n'
101
    pretty_xml = xmllint_format(xml_str).decode('utf-8')
102
    assert pretty_xml == '<?xml version="1.0" encoding="UTF-8"?>\n' + xml_str
103
104
def test_membername():
105
    class Klazz:
106
        def __init__(self):
107
            self.prop = 42
108
    instance = Klazz()
109
    assert membername(instance, 42) == 'prop'
110
111
def test_pil_version():
112
    """
113
    Test segfault issue in PIL TiffImagePlugin
114
115
    Run the same code multiple times to make segfaults more probable
116
117
    Test is failing due to segfaults in Pillow versions:
118
        6.0.0
119
        6.1.0
120
121
    Test succeeds in Pillow versions:
122
        5.3.1
123
        5.4.1
124
        6.2.0
125
    """
126
    for _ in range(0, 10):
127
        pil_image = Image.open(assets.path_to('grenzboten-test/data/OCR-D-IMG-BIN/p179470.tif'))
128
        pil_image.crop(box=[1539, 202, 1626, 271])
129
130
def test_pushd_popd_newcwd():
131
    cwd = getcwd()
132
    tmp_dir = Path(gettempdir()).resolve()
133
    with pushd_popd(tmp_dir):
134
        assert getcwd() == str(tmp_dir)
135
    assert getcwd() == cwd
136
    assert getcwd() == cwd
137
138
def test_pushd_popd_tempdir():
139
    cwd = getcwd()
140
    tmp_dir = str(Path(gettempdir()).resolve())
141
    with pushd_popd(tempdir=True) as newcwd:
142
        newcwd_str = str(newcwd)
143
        assert getcwd() == newcwd_str
144
        assert newcwd_str.startswith(tmp_dir)
145
    assert getcwd() == cwd
146
    assert getcwd() == cwd
147
148
def test_pushd_popd_bad_call():
149
    with raises(Exception, match='pushd_popd can accept either newcwd or tempdir, not both'):
150
        with pushd_popd('/foo/bar', True):
151
            pass
152
153
def test_is_local_filename():
154
    assert is_local_filename('/foo/bar')
155
    assert is_local_filename('file:///foo/bar')
156
    assert is_local_filename('file:/foo/bar')
157
    assert is_local_filename('foo/bar')
158
    assert not is_local_filename('bad-scheme://foo/bar')
159
160
def test_local_filename():
161
    assert get_local_filename('/foo/bar') == '/foo/bar'
162
    assert get_local_filename('file:///foo/bar') == '/foo/bar'
163
    assert get_local_filename('file:/foo/bar') == '/foo/bar'
164
    assert get_local_filename('/foo/bar', '/foo/') == 'bar'
165
    assert get_local_filename('/foo/bar', '/foo') == 'bar'
166
    assert get_local_filename('foo/bar', 'foo') == 'bar'
167
168
def test_remove_non_path_from_url():
169
    assert remove_non_path_from_url('https://foo/bar') == 'https://foo/bar'
170
    assert remove_non_path_from_url('https://foo//?bar#frag') == 'https://foo'
171
    assert remove_non_path_from_url('/path/to/foo#frag') == '/path/to/foo'
172
173
def test_nth_url_segment():
174
    assert nth_url_segment('') == ''
175
    assert nth_url_segment('foo') == 'foo'
176
    assert nth_url_segment('foo', n=-1) == 'foo'
177
    assert nth_url_segment('foo', n=-2) == ''
178
    assert nth_url_segment('foo/bar', n=-2) == 'foo'
179
    assert nth_url_segment('/baz/bar', n=-2) == 'baz'
180
    assert nth_url_segment('foo/') == 'foo'
181
    assert nth_url_segment('foo//?bar#frag') == 'foo'
182
    assert nth_url_segment('/path/to/foo#frag') == 'foo'
183
    assert nth_url_segment('/path/to/foo#frag', n=-2) == 'to'
184
    assert nth_url_segment('https://server/foo?xyz=zyx') == 'foo'
185
186
def test_parse_json_string_or_file():
187
    assert parse_json_string_or_file() == {}
188
    assert parse_json_string_or_file('') == {}
189
    assert parse_json_string_or_file(' ') == {}
190
    assert parse_json_string_or_file('{}') == {}
191
    assert parse_json_string_or_file('{"foo": 32}') == {'foo': 32}
192
    assert parse_json_string_or_file(
193
      '{"dpi": -1, "textequiv_level": "word", "overwrite_words": false, "raw_lines": false, "char_whitelist": "", "char_blacklist": "", "char_unblacklist": ""}') == \
194
      {"dpi": -1, "textequiv_level": "word", "overwrite_words": False, "raw_lines": False, "char_whitelist": "", "char_blacklist": "", "char_unblacklist": ""}
195
196
def test_parameter_file():
197
    """
198
    Verify that existing filenames get priority over valid JSON string interpretation
199
    """
200
    with TemporaryDirectory() as tempdir:
201
        paramfile = Path(tempdir, '{"foo":23}')  # XXX yes, the file is called '{"foo":23}'
202
        paramfile.write_text('{"bar": 42}')
203
        # /tmp/<var>/{"foo":23} -- exists, read file and parse as JSON
204
        assert parse_json_string_or_file(str(paramfile)) == {'bar': 42}
205
        # $PWD/{"foo":23} -- does not exist, parse as json
206
        assert parse_json_string_or_file(paramfile.name) == {'foo': 23}
207
208
def test_parameter_file_comments():
209
    with TemporaryDirectory() as tempdir:
210
        jsonpath = Path(tempdir, 'test.json')
211
        jsonpath.write_text("""\
212
                {
213
                    # Metasyntactical variables are rarely imaginative
214
                    "foo": 42,
215
                    # case in point:
216
                    "bar": 23
217
                }""")
218
        assert parse_json_string_or_file(str(jsonpath)) == {'foo': 42, 'bar': 23}
219
220
def test_parameters_invalid():
221
    with raises(ValueError, match='Not a valid JSON object'):
222
        parse_json_string_or_file('[]')
223
    with raises(ValueError, match='Error parsing'):
224
        parse_json_string_or_file('[}')
225
226
def test_mime_ext():
227
    assert MIME_TO_EXT['image/jp2'] == '.jp2'
228
    assert EXT_TO_MIME['.jp2'] == 'image/jp2'
229
    assert MIME_TO_PIL['image/jp2'] == 'JP2'
230
    assert PIL_TO_MIME['JP2'] == 'image/jp2'
231
232
233
def test_set_json_key_value_overrides():
234
    assert set_json_key_value_overrides({}, ('foo', 'true')) == {'foo': True}
235
    assert set_json_key_value_overrides({}, ('foo', 'false')) == {'foo': False}
236
    assert set_json_key_value_overrides({}, ('foo', '42')) == {'foo': 42}
237
    assert set_json_key_value_overrides({}, ('foo', '42.3')) == {'foo': 42.3}
238
    assert set_json_key_value_overrides({}, ('foo', '["one", 2, 3.33]')) == {'foo': ['one', 2, 3.33]}
239
    assert set_json_key_value_overrides({}, ('foo', '{"one": 2}')) == {'foo': {'one': 2}}
240
    assert set_json_key_value_overrides({}, ('foo', '"a string"')) == {'foo': 'a string'}
241
    assert set_json_key_value_overrides({}, ('foo', 'a string')) == {'foo': 'a string'}
242
243
def test_assert_file_grp_cardinality():
244
    with raises(AssertionError, match="Expected exactly 5 output file groups, but '.'FOO', 'BAR'.' has 2"):
245
        assert_file_grp_cardinality('FOO,BAR', 5)
246
    with raises(AssertionError, match="Expected exactly 1 output file group, but '.'FOO', 'BAR'.' has 2"):
247
        assert_file_grp_cardinality('FOO,BAR', 1)
248
    assert_file_grp_cardinality('FOO,BAR', 2)
249
    with raises(AssertionError, match="Expected exactly 1 output file group .foo bar., but '.'FOO', 'BAR'.' has 2"):
250
        assert_file_grp_cardinality('FOO,BAR', 1, 'foo bar')
251
252
def test_make_file_id_simple():
253
    f = create_ocrd_file('MAX', ID="MAX_0012")
254
    assert make_file_id(f, 'FOO') == 'FOO_0012'
255
256
def test_make_file_id_mets():
257
    mets = OcrdMets.empty_mets()
258
    for i in range(1, 10):
259
        mets.add_file('FOO', ID="FOO_%04d" % (i), mimetype="image/tiff", pageId='FOO_%04d' % i)
260
        mets.add_file('BAR', ID="BAR_%04d" % (i), mimetype="image/tiff", pageId='BAR_%04d' % i)
261
    assert make_file_id(mets.find_all_files(ID='BAR_0007')[0], 'FOO') == 'FOO_0007'
262
    f = mets.add_file('ABC', ID="BAR_42", mimetype="image/tiff")
263
    mets.remove_file(fileGrp='FOO')
264
    assert make_file_id(f, 'FOO') == 'FOO_BAR_42'
265
    mets.add_file('FOO', ID="FOO_0001", mimetype="image/tiff")
266
267
def test_make_file_id_570():
268
    """https://github.com/OCR-D/core/pull/570"""
269
    mets = OcrdMets.empty_mets()
270
    f = mets.add_file('GRP', ID='FOO_0001', pageId='phys0001')
271
    mets.add_file('GRP', ID='GRP2_0001', pageId='phys0002')
272
    assert make_file_id(f, 'GRP2') == 'GRP2_phys0001'
273
274
def test_make_file_id_605():
275
    """
276
    https://github.com/OCR-D/core/pull/605
277
    Also: Same fileGrp!
278
    """
279
    mets = OcrdMets.empty_mets()
280
    f = mets.add_file('GRP1', ID='FOO_0001', pageId='phys0001')
281
    f = mets.add_file('GRP2', ID='FOO_0002', pageId='phys0002')
282
    # NB: same fileGrp
283
    assert make_file_id(f, 'GRP2') == 'FOO_0002'
284
    assert make_file_id(f, 'GRP3') == 'GRP3_phys0002'
285
286
def test_make_file_id_744():
287
    """
288
    https://github.com/OCR-D/core/pull/744
289
    > Often file IDs have two numbers, one of which will clash. In that case only the numerical fallback works.
290
    """
291
    mets = OcrdMets.empty_mets()
292
    f = mets.add_file('GRP2', ID='img1796-97_00000024_img', pageId='phys0024')
293
    f = mets.add_file('GRP2', ID='img1796-97_00000025_img', pageId='phys0025')
294
    assert make_file_id(f, 'GRP3') == 'GRP3_phys0025'
295
296
def test_generate_range():
297
    assert generate_range('PHYS_0001', 'PHYS_0005') == ['PHYS_0001', 'PHYS_0002', 'PHYS_0003', 'PHYS_0004', 'PHYS_0005']
298
    with raises(ValueError, match='could not find numeric part'):
299
        assert generate_range('NONUMBER', 'ALSO_NONUMBER')
300
    with raises(ValueError, match='differ in their non-numeric part'):
301
        generate_range('PHYS_0001_123', 'PHYS_0010_123')
302
    with raises(ValueError, match='differ in their non-numeric part'):
303
        assert generate_range('1', 'PHYS_0005') == 0
304
    with raises(ValueError, match='differ in their non-numeric part'):
305
        assert generate_range('1', 'page 5') == 0
306
    with warns(UserWarning, match='same number'):
307
        assert generate_range('PHYS_0001_123', 'PHYS_0001_123') == ['PHYS_0001_123']
308
309
def test_safe_filename():
310
    assert safe_filename('Hello world,!') == 'Hello_world_'
311
    assert safe_filename(' Καλημέρα κόσμε,') == '_Καλημέρα_κόσμε_'
312
    assert safe_filename(':コンニチハ:') == '_コンニチハ_'
313
314
def test_partition_list():
315
    lst_10 = list(range(1, 11))
316
    assert partition_list(None, 1) == []
317
    assert partition_list([], 1) == []
318
    assert partition_list(lst_10, 1) == [lst_10]
319
    assert partition_list(lst_10, 3) == [[1, 2, 3, 4], [5, 6, 7], [8, 9, 10]]
320
    assert partition_list(lst_10, 3, 1) == [[5, 6, 7]]
321
    assert partition_list(lst_10, 3, 0) == [[1, 2, 3, 4]]
322
    with raises(IndexError):
323
        partition_list(lst_10, chunks=4, chunk_index=5)
324
        partition_list(lst_10, chunks=5, chunk_index=5)
325
        partition_list(lst_10, chunks=5, chunk_index=6)
326
    with raises(ValueError):
327
        partition_list(lst_10, chunks=11)
328
    # odd prime number tests
329
    lst_13 = list(range(1, 14))
330
    assert partition_list(lst_13, chunks=2) == [[1, 2, 3, 4, 5, 6, 7], [8, 9, 10, 11, 12, 13]]
331
    assert partition_list(lst_13, chunks=3) == [[1, 2, 3, 4, 5], [6, 7, 8, 9], [10, 11, 12, 13]]
332
    assert partition_list(lst_13, chunks=4) == [[1, 2, 3, 4], [5, 6, 7], [8, 9, 10], [11, 12, 13]]
333
    assert partition_list(lst_13, chunks=4, chunk_index=1) == [[5, 6, 7]]
334
335
def test_sparkline():
336
    assert sparkline([5, 2, 3]) == '█▃▄'
337
    assert sparkline([1000, 1, 2222]) == '▃ █'
338
    assert sparkline([8, 7, 6, 5, 4, 3, 2, 1, 0]) == '█▇▆▅▄▃▂▁ '
339
    assert sparkline([-1, None, 'forty-two']) == ''
340
341
342
if __name__ == '__main__':
343
    main(__file__)
344