Type2JoinHelper._intersect()   A
last analyzed

Complexity

Conditions 2

Size

Total Lines 17
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 2

Importance

Changes 0
Metric Value
cc 2
eloc 7
nop 4
dl 0
loc 17
ccs 5
cts 5
cp 1
crap 2
rs 10
c 0
b 0
f 0
1 1
from typing import Any, Dict, List, Optional, Tuple
2
3
from etlt.helper.Type2Helper import Type2Helper
4 1
5
6
class Type2JoinHelper(Type2Helper):
7
    """
8
    A helper class for joining data sets with date intervals.
9
    """
10 1
11 1
    # ------------------------------------------------------------------------------------------------------------------
12
    @staticmethod
13
    def _intersect(start1: int, end1: int, start2: int, end2: int) -> Tuple[Optional[int], Optional[int]]:
14
        """
15
        Returns the intersection of two intervals. Returns (None,None) if the intersection is empty.
16
17
        :param start1: The start date of the first interval.
18
        :param end1: The end date of the first interval.
19
        :param start2: The start date of the second interval.
20
        :param end2: The end date of the second interval.
21
        """
22 1
        start = max(start1, start2)
23 1
        end = min(end1, end2)
24
25 1
        if start > end:
26 1
            return None, None
27
28 1
        return start, end
29
30
    # ------------------------------------------------------------------------------------------------------------------
31 1
    def _additional_rows_date2int(self, keys: List[Tuple[str, str]], rows: List[Dict[str, Any]]) -> None:
32
        """
33
        Replaces start and end dates of the additional date intervals in the row set with their integer representation
34
35
        :param keys: The other keys with start and end date.
36
        :param rows: The list of rows.
37
        """
38
        for row in rows:
39
            for key_start_date, key_end_date in keys:
40 1
                if key_start_date not in [self._key_start_date, self._key_end_date]:
41 1
                    row[key_start_date] = self._date2int(row[key_start_date])
42 1
                if key_end_date not in [self._key_start_date, self._key_end_date]:
43 1
                    row[key_end_date] = self._date2int(row[key_end_date])
44 1
45 1
    # ------------------------------------------------------------------------------------------------------------------
46
    def _intersection(self, keys: List[Tuple[str, str]], rows: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
47
        """
48 1
        Computes the intersection of the date intervals of two or more reference data sets. If the intersection is empty
49
        the row is removed from the group.
50
51
        :param keys: The other keys with start and end date.
52
        :param rows: The list of rows.
53
        """
54
        # If there are no other keys with start and end date (i.e. nothing to merge) return immediately.
55
        if not keys:
56
            return rows
57
58
        ret = list()
59 1
        for row in rows:
60 1
            start_date = row[self._key_start_date]
61
            end_date = row[self._key_end_date]
62 1
            for key_start_date, key_end_date in keys:
63 1
                start_date, end_date = Type2JoinHelper._intersect(start_date,
64 1
                                                                  end_date,
65 1
                                                                  row[key_start_date],
66 1
                                                                  row[key_end_date])
67 1
                if not start_date:
68
                    break
69
                if key_start_date not in [self._key_start_date, self._key_end_date]:
70
                    del row[key_start_date]
71 1
                if key_end_date not in [self._key_start_date, self._key_end_date]:
72 1
                    del row[key_end_date]
73 1
74 1
            if start_date:
75 1
                row[self._key_start_date] = start_date
76 1
                row[self._key_end_date] = end_date
77
                ret.append(row)
78 1
79 1
        return ret
80 1
81 1
    # ------------------------------------------------------------------------------------------------------------------
82
    def merge(self, keys: List[Tuple[str, str]]) -> None:
83 1
        """
84
        Merges the join on pseudo keys of two or more reference data sets.
85
86 1
        :param keys: For each data set the keys of the start and end date.
87
        """
88
        deletes = []
89
        for pseudo_key, rows in self._rows.items():
90
            self._additional_rows_date2int(keys, rows)
91
            rows = self._intersection(keys, rows)
92 1
            if rows:
93 1
                rows = self._rows_sort(rows)
94 1
                self._rows[pseudo_key] = self._merge_adjacent_rows(rows)
95 1
            else:
96 1
                deletes.append(pseudo_key)
97 1
98 1
        for pseudo_key in deletes:
99
            del self._rows[pseudo_key]
100 1
101
# ----------------------------------------------------------------------------------------------------------------------
102