Passed
Push — dev ( d02171...8ae202 )
by
unknown
02:10 queued 12s
created

test_dataset_class   A

Complexity

Total Complexity 4

Size/Duplication

Total Lines 46
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 4
eloc 28
dl 0
loc 46
rs 10
c 0
b 0
f 0

1 Function

Rating   Name   Duplication   Size   Complexity  
A test_uniqueness_of_automatically_generated_final_dataset_task() 0 37 4
1
from dataclasses import dataclass
2
from typing import Union
3
4
from airflow.models.dag import DAG
5
6
from egon.data.datasets import Dataset, TaskGraph, Tasks
7
8
9
def test_uniqueness_of_automatically_generated_final_dataset_task():
10
    """Test that the generated final dataset task is named uniquely.
11
12
    This is a regression test for issue #985. Having multiple `Dataset`s ending
13
    in parallel tasks doesn't work if those `Dataset`s are in a module below
14
    the `egon.data.datasets` package. In that case the code removing the module
15
    name prefix from task ids and the code generating the final dataset task
16
    which updates the dataset version once all parallel tasks have finished
17
    interact in a way that generates non-distinct task ids so that tasks
18
    generated later clobber the ones generated earlier. This leads to spurious
19
    cycles and other inconsistencies and bugs in the graph.
20
    """
21
22
    noops = [(lambda: None) for _ in range(4)]
23
    for i, noop in enumerate(noops):
24
        noop.__name__ = f"noop-{i}"
25
26
    @dataclass
27
    class Dataset_1(Dataset):
28
        name: str = "DS1"
29
        version: str = "0.0.0"
30
        tasks: Union[Tasks, TaskGraph] = ({noops[0], noops[1]},)
31
32
    @dataclass
33
    class Dataset_2(Dataset):
34
        name: str = "DS2"
35
        version: str = "0.0.0"
36
        tasks: Union[Tasks, TaskGraph] = ({noops[2], noops[3]},)
37
38
    Dataset_1.__module__ = "egon.data.datasets.test.datasets"
39
    Dataset_2.__module__ = "egon.data.datasets.test.datasets"
40
    with DAG(dag_id="Test-DAG", default_args={"start_date": "1111-11-11"}):
41
        datasets = [Dataset_1(), Dataset_2()]
42
    ids = [list(dataset.tasks)[-1] for dataset in datasets]
43
    assert (
44
        ids[0] != ids[1]
45
    ), "Expected unique names for final tasks of distinct datasets."
46