Passed
Push — master ( 7bf35e...b0d55d )
by P.R.
02:48
created

Type2JoinHelper   A

Complexity

Total Complexity 19

Size/Duplication

Total Lines 100
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 4
Bugs 0 Features 0
Metric Value
c 4
b 0
f 0
dl 0
loc 100
ccs 45
cts 45
cp 1
rs 10
wmc 19

4 Methods

Rating   Name   Duplication   Size   Complexity  
D _intersection() 0 36 8
A merge() 0 18 4
A _intersect() 0 19 2
B _additional_rows_date2int() 0 15 5
1
"""
2
ETLT
3
4
Copyright 2016 Set Based IT Consultancy
5
6
Licence MIT
7
"""
8 1
from etlt.helper.Type2Helper import Type2Helper
9
10
11 1
class Type2JoinHelper(Type2Helper):
12
    """
13
    A helper class for joining data sets with date intervals.
14
    """
15
16
    # ------------------------------------------------------------------------------------------------------------------
17 1
    @staticmethod
18
    def _intersect(start1, end1, start2, end2):
19
        """
20
        Returns the intersection of two intervals. Returns (None,None) if the intersection is empty.
21
22
        :param int start1: The start date of the first interval.
23
        :param int end1: The end date of the first interval.
24
        :param int start2: The start date of the second interval.
25
        :param int end2: The end date of the second interval.
26
27
        :rtype: tuple[int|None,int|None]
28
        """
29 1
        start = max(start1, start2)
30 1
        end = min(end1, end2)
31
32 1
        if start > end:
33 1
            return None, None
34
35 1
        return start, end
36
37
    # ------------------------------------------------------------------------------------------------------------------
38 1
    def _additional_rows_date2int(self, keys, rows):
39
        """
40
        Replaces start and end dates of the additional date intervals in the row set with their integer representation
41
42
        :param list[tuple[str,str]] keys: The other keys with start and end date.
43
        :param list[dict[str,T]] rows: The list of rows.
44
45
        :rtype: list[dict[str,T]]
46
        """
47 1
        for row in rows:
48 1
            for key_start_date, key_end_date in keys:
49 1
                if key_start_date not in [self._key_start_date, self._key_end_date]:
50 1
                    row[key_start_date] = self._date2int(row[key_start_date])
51 1
                if key_end_date not in [self._key_start_date, self._key_end_date]:
52 1
                    row[key_end_date] = self._date2int(row[key_end_date])
53
54
    # ------------------------------------------------------------------------------------------------------------------
55 1
    def _intersection(self, keys, rows):
56
        """
57
        Computes the intersection of the date intervals of two or more reference data sets. If the intersection is empty
58
        the row is removed from the group.
59
60
        :param list[tuple[str,str]] keys: The other keys with start and end date.
61
        :param list[dict[str,T]] rows: The list of rows.
62
63
        :rtype: list[dict[str,T]]
64
        """
65
        # If there are no other keys with start and end date (i.e. nothing to merge) return immediately.
66 1
        if not keys:
67 1
            return rows
68
69 1
        ret = list()
70 1
        for row in rows:
71 1
            start_date = row[self._key_start_date]
72 1
            end_date = row[self._key_end_date]
73 1
            for key_start_date, key_end_date in keys:
74 1
                start_date, end_date = Type2JoinHelper._intersect(start_date,
75
                                                                  end_date,
76
                                                                  row[key_start_date],
77
                                                                  row[key_end_date])
78 1
                if not start_date:
79 1
                    break
80 1
                if key_start_date not in [self._key_start_date, self._key_end_date]:
81 1
                    del row[key_start_date]
82 1
                if key_end_date not in [self._key_start_date, self._key_end_date]:
83 1
                    del row[key_end_date]
84
85 1
            if start_date:
86 1
                row[self._key_start_date] = start_date
87 1
                row[self._key_end_date] = end_date
88 1
                ret.append(row)
89
90 1
        return ret
91
92
    # ------------------------------------------------------------------------------------------------------------------
93 1
    def merge(self, keys):
94
        """
95
        Merges the join on pseudo keys of two or more reference data sets.
96
97
        :param list[tuple[str,str]] keys: For each data set the keys of the start and end date.
98
        """
99 1
        deletes = []
100 1
        for pseudo_key, rows in self._rows.items():
101 1
            self._additional_rows_date2int(keys, rows)
102 1
            rows = self._intersection(keys, rows)
103 1
            if rows:
104 1
                rows = self._rows_sort(rows)
105 1
                self._rows[pseudo_key] = self._merge_adjacent_rows(rows)
106
            else:
107 1
                deletes.append(pseudo_key)
108
109 1
        for pseudo_key in deletes:
110 1
            del self._rows[pseudo_key]
111
112
# ----------------------------------------------------------------------------------------------------------------------
113