|
1
|
|
|
from dataclasses import dataclass |
|
2
|
|
|
from typing import Union |
|
3
|
|
|
|
|
4
|
|
|
from airflow.models.dag import DAG |
|
5
|
|
|
|
|
6
|
|
|
from egon.data.datasets import Dataset, TaskGraph, Tasks |
|
7
|
|
|
|
|
8
|
|
|
|
|
9
|
|
|
def test_uniqueness_of_automatically_generated_final_dataset_task(): |
|
10
|
|
|
"""Test that the generated final dataset task is named uniquely. |
|
11
|
|
|
|
|
12
|
|
|
This is a regression test for issue #985. Having multiple `Dataset`s ending |
|
13
|
|
|
in parallel tasks doesn't work if those `Dataset`s are in a module below |
|
14
|
|
|
the `egon.data.datasets` package. In that case the code removing the module |
|
15
|
|
|
name prefix from task ids and the code generating the final dataset task |
|
16
|
|
|
which updates the dataset version once all parallel tasks have finished |
|
17
|
|
|
interact in a way that generates non-distinct task ids so that tasks |
|
18
|
|
|
generated later clobber the ones generated earlier. This leads to spurious |
|
19
|
|
|
cycles and other inconsistencies and bugs in the graph. |
|
20
|
|
|
""" |
|
21
|
|
|
|
|
22
|
|
|
noops = [(lambda: None) for _ in range(4)] |
|
23
|
|
|
for i, noop in enumerate(noops): |
|
24
|
|
|
noop.__name__ = f"noop-{i}" |
|
25
|
|
|
|
|
26
|
|
|
@dataclass |
|
27
|
|
|
class Dataset_1(Dataset): |
|
28
|
|
|
name: str = "DS1" |
|
29
|
|
|
version: str = "0.0.0" |
|
30
|
|
|
tasks: Union[Tasks, TaskGraph] = ({noops[0], noops[1]},) |
|
31
|
|
|
|
|
32
|
|
|
@dataclass |
|
33
|
|
|
class Dataset_2(Dataset): |
|
34
|
|
|
name: str = "DS2" |
|
35
|
|
|
version: str = "0.0.0" |
|
36
|
|
|
tasks: Union[Tasks, TaskGraph] = ({noops[2], noops[3]},) |
|
37
|
|
|
|
|
38
|
|
|
Dataset_1.__module__ = "egon.data.datasets.test.datasets" |
|
39
|
|
|
Dataset_2.__module__ = "egon.data.datasets.test.datasets" |
|
40
|
|
|
with DAG(dag_id="Test-DAG", default_args={"start_date": "1111-11-11"}): |
|
41
|
|
|
datasets = [Dataset_1(), Dataset_2()] |
|
42
|
|
|
ids = [list(dataset.tasks)[-1] for dataset in datasets] |
|
43
|
|
|
assert ( |
|
44
|
|
|
ids[0] != ids[1] |
|
45
|
|
|
), "Expected unique names for final tasks of distinct datasets." |
|
46
|
|
|
|