Passed
Branch master (17b603)
by P.R.
01:31
created

Type2CondenseHelper.condense()   A

Complexity

Conditions 3

Size

Total Lines 14
Code Lines 10

Duplication

Lines 14
Ratio 100 %

Code Coverage

Tests 10
CRAP Score 3

Importance

Changes 0
Metric Value
cc 3
eloc 10
nop 1
dl 14
loc 14
ccs 10
cts 10
cp 1
crap 3
rs 9.9
c 0
b 0
f 0
1
"""
2
ETLT
3
4
Copyright 2016 Set Based IT Consultancy
5
6
Licence MIT
7
"""
8
9 1
from etlt.helper.Allen import Allen
10 1
from etlt.helper.Type2Helper import Type2Helper
11
12
13 1 View Code Duplication
class Type2CondenseHelper(Type2Helper):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
14
    """
15
    A helper class for deriving the distinct intervals in reference data with date intervals.
16
17
    A typical use case for this class is aggregate the reference data for a type 2 dimension into the reference data
18
    for another type 2 dimension at a higher in the dimension hierarchy.
19
    """
20
21
    # ------------------------------------------------------------------------------------------------------------------
22 1
    @staticmethod
23 1
    def _distinct(row1, row2):
24
        """
25
        Returns a list of distinct (or none overlapping) intervals if two intervals are overlapping. Returns None if
26
        the two intervals are none overlapping. The list can have 2 or 3 intervals.
27
28
        :param tuple[int,int] row1: The first interval.
29
        :param tuple[int,int] row2: The second interval.
30
31
        :rtype: None|list(tuple[int,int])
32
        """
33 1
        relation = Allen.relation(row1[0], row1[1], row2[0], row2[1])
34
35 1
        if relation is None:
36
            # One of the 2 intervals is invalid.
37
            return []
38
39 1
        if relation == Allen.X_BEFORE_Y:
40
            # row1: |----|
41
            # row2:            |-----|
42 1
            return None  # [(row1[0], row1[1]), (row2[0], row2[1])]
43
44 1
        if relation == Allen.X_BEFORE_Y_INVERSE:
45
            # row1:            |-----|
46
            # row2: |----|
47 1
            return None  # [(row2[0], row2[1]), (row1[0], row1[1])]
48
49 1
        if relation == Allen.X_MEETS_Y:
50
            # row1: |-------|
51
            # row2:          |-------|
52 1
            return None  # [(row1[0], row1[1]), (row2[0], row2[1])]
53
54 1
        if relation == Allen.X_MEETS_Y_INVERSE:
55
            # row1:          |-------|
56
            # row2: |-------|
57 1
            return None  # [(row2[0], row2[1]), (row1[0], row1[1])]
58
59 1
        if relation == Allen.X_OVERLAPS_WITH_Y:
60
            # row1: |-----------|
61
            # row2:       |----------|
62 1
            return [(row1[0], row2[0] - 1), (row2[0], row1[1]), (row1[1] + 1, row2[1])]
63
64 1
        if relation == Allen.X_OVERLAPS_WITH_Y_INVERSE:
65
            # row1:       |----------|
66
            # row2: |-----------|
67 1
            return [(row2[0], row1[0] - 1), (row1[0], row2[1]), (row2[1] + 1, row1[1])]
68
69 1
        if relation == Allen.X_STARTS_Y:
70
            # row1: |------|
71
            # row2: |----------------|
72
            return [(row1[0], row1[1]), (row1[1] + 1, row2[1])]
73
74 1
        if relation == Allen.X_STARTS_Y_INVERSE:
75
            # row1: |----------------|
76
            # row2: |------|
77 1
            return [(row2[0], row2[1]), (row2[1] + 1, row1[1])]
78
79 1
        if relation == Allen.X_DURING_Y:
80
            # row1:      |------|
81
            # row2: |----------------|
82 1
            return [(row2[0], row1[0] - 1), (row1[0], row1[1]), (row1[1] + 1, row2[1])]
83
84 1
        if relation == Allen.X_DURING_Y_INVERSE:
85
            # row1: |----------------|
86
            # row2:      |------|
87 1
            return [(row1[0], row2[0] - 1), (row2[0], row2[1]), (row2[1] + 1, row1[1])]
88
89 1
        if relation == Allen.X_FINISHES_Y:
90
            # row1:           |------|
91
            # row2: |----------------|
92 1
            return [(row2[0], row1[0] - 1), (row1[0], row1[1])]
93
94 1
        if relation == Allen.X_FINISHES_Y_INVERSE:
95
            # row1: |----------------|
96
            # row2:           |------|
97 1
            return [(row1[0], row2[0] - 1), (row2[0], row2[1])]
98
99 1
        if relation == Allen.X_EQUAL_Y:
100
            # row1: |----------------|
101
            # row2: |----------------|
102 1
            return None  # [(row1[0], row1[1])]
103
104
        # We got all 13 relation in Allen's interval algebra covered.
105
        raise ValueError('Unexpected relation {0}'.format(relation))
106
107
    # ------------------------------------------------------------------------------------------------------------------
108 1
    @staticmethod
109 1
    def _add_interval(all_intervals, new_interval):
110
        """
111
        Adds a new interval to a set of none overlapping intervals.
112
113
        :param set[(int,int)] all_intervals: The set of distinct intervals.
114
        :param (int,int) new_interval: The new interval.
115
        """
116 1
        intervals = None
117 1
        old_interval = None
118 1
        for old_interval in all_intervals:
119 1
            intervals = Type2CondenseHelper._distinct(new_interval, old_interval)
120 1
            if intervals:
121 1
                break
122
123 1
        if intervals is None:
124 1
            all_intervals.add(new_interval)
125
        else:
126 1
            if old_interval:
127 1
                all_intervals.remove(old_interval)
128 1
            for distinct_interval in intervals:
129 1
                Type2CondenseHelper._add_interval(all_intervals, distinct_interval)
130
131
    # ------------------------------------------------------------------------------------------------------------------
132 1
    def _derive_distinct_intervals(self, rows):
133
        """
134
        Returns the set of distinct intervals in a row set.
135
136
        :param list[dict[str,T]] rows: The rows set.
137
138
        :rtype: set[(int,int)]
139
        """
140 1
        ret = set()
141 1
        for row in rows:
142 1
            self._add_interval(ret, (row[self._key_start_date], row[self._key_end_date]))
143
144 1
        return ret
145
146
    # ------------------------------------------------------------------------------------------------------------------
147 1
    def condense(self):
148
        """
149
        Condense the data set to the distinct intervals based on the pseudo key.
150
        """
151 1
        for pseudo_key, rows in self._rows.items():
152 1
            tmp1 = []
153 1
            intervals = sorted(self._derive_distinct_intervals(rows))
154 1
            for interval in intervals:
155 1
                tmp2 = dict(zip(self._pseudo_key, pseudo_key))
156 1
                tmp2[self._key_start_date] = interval[0]
157 1
                tmp2[self._key_end_date] = interval[1]
158 1
                tmp1.append(tmp2)
159
160 1
            self._rows[pseudo_key] = tmp1
161
162
# ----------------------------------------------------------------------------------------------------------------------
163