Completed
Push — master ( da0908...c8344f )
by P.R.
01:36
created

Type2CondenseHelper._add_interval()   B

Complexity

Conditions 6

Size

Total Lines 22

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 13
CRAP Score 6

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 22
ccs 13
cts 13
cp 1
rs 7.7857
cc 6
crap 6
1
"""
2
ETLT
3
4
Copyright 2016 Set Based IT Consultancy
5
6
Licence MIT
7
"""
8 1
import copy
9
10 1
from etlt.helper.Allen import Allen
11 1
from etlt.helper.Type2Helper import Type2Helper
12
13
14 1
class Type2CondenseHelper(Type2Helper):
15
    """
16
    A helper class for deriving the distinct intervals in reference data with date intervals.
17
    """
18
19
    # ------------------------------------------------------------------------------------------------------------------
20 1
    @staticmethod
21
    def _distinct(row1, row2):
22
        """
23
        Returns a list of distinct (or none overlapping) intervals if two intervals are not distinct. Returns None if
24
        the two intervals are distinct. The list can have 2 or 3 intervals.
25
26
        :param tuple[int,int] row1: The first interval.
27
        :param tuple[int,int] row2: The second interval.
28
29
        :rtype: None|tuple[int,int]
30
        """
31 1
        relation = Allen.relation(row1[0], row1[1], row2[0], row2[1])
32 1
        if relation == Allen.X_BEFORE_Y:
33
            # row1: |----|
34
            # row2:            |-----|
35
            return None  # [(row1[0], row1[1]), (row2[0], row2[1])]
36
37 1
        if relation == Allen.X_BEFORE_Y_INVERSE:
38
            # row1:            |-----|
39
            # row2: |----|
40 1
            return None  # [(row2[0], row2[1]), (row1[0], row1[1])]
41
42 1
        if relation == Allen.X_MEETS_Y:
43
            # row1: |-------|
44
            # row2:          |-------|
45 1
            return None  # [(row1[0], row1[1]), (row2[0], row2[1])]
46
47 1
        if relation == Allen.X_MEETS_Y_INVERSE:
48
            # row1:          |-------|
49
            # row2: |-------|
50 1
            return None  # [(row2[0], row2[1]), (row1[0], row1[1])]
51
52 1
        if relation == Allen.X_OVERLAPS_WITH_Y:
53
            # row1: |-----------|
54
            # row2:       |----------|
55 1
            return [(row1[0], row2[0] - 1), (row2[0], row1[1]), (row1[1] + 1, row2[1])]
56
57 1
        if relation == Allen.X_OVERLAPS_WITH_Y_INVERSE:
58
            # row1:       |----------|
59
            # row2: |-----------|
60 1
            return [(row2[0], row1[0] - 1), (row1[0], row2[1]), (row2[1] + 1, row1[1])]
61
62 1
        if relation == Allen.X_STARTS_Y:
63
            # row1: |------|
64
            # row2: |----------------|
65
            return [(row1[0], row1[1]), (row1[1] + 1, row2[1])]
66
67 1
        if relation == Allen.X_STARTS_Y_INVERSE:
68
            # row1: |----------------|
69
            # row2: |------|
70 1
            return [(row2[0], row2[1]), (row2[1] + 1, row1[1])]
71
72 1
        if relation == Allen.X_DURING_Y:
73
            # row1:      |------|
74
            # row2: |----------------|
75 1
            return [(row2[0], row1[0] - 1), (row1[0], row1[1]), (row1[1] + 1, row2[1])]
76
77 1
        if relation == Allen.X_DURING_Y_INVERSE:
78
            # row1: |----------------|
79
            # row2:      |------|
80 1
            return [(row1[0], row2[0] - 1), (row2[0], row2[1]), (row2[1] + 1, row1[1])]
81
82 1
        if relation == Allen.X_FINISHES_Y:
83
            # row1:           |------|
84
            # row2: |----------------|
85 1
            return [(row2[0], row1[0] - 1), (row1[0], row1[1])]
86
87 1
        if relation == Allen.X_FINISHES_Y_INVERSE:
88
            # row1: |----------------|
89
            # row2:           |------|
90 1
            return [(row1[0], row2[0] - 1), (row2[0], row2[1])]
91
92 1
        if relation == Allen.X_EQUAL_Y:
93
            # row1: |----------------|
94
            # row2: |----------------|
95 1
            return None  # [(row1[0], row1[1])]
96
97
        # We got all 13 relation in Allen's interval algebra covered.
98
        raise ValueError('Unexpected relation {0:d}'.format(relation))
99
100
    # ------------------------------------------------------------------------------------------------------------------
101 1
    @staticmethod
102
    def _add_interval(all_intervals, new_interval):
103
        """
104
        Adds a new interval to a set of distinct intervals.
105
106
        :param set[(int,int)] all_intervals: The set of distinct intervals.
107
        :param (int,int) new_interval: The new interval.
108
        """
109 1
        intervals = None
110 1
        old_interval = None
111 1
        for old_interval in all_intervals:
112 1
            intervals = Type2CondenseHelper._distinct(new_interval, old_interval)
113 1
            if intervals:
114 1
                break
115
116 1
        if intervals:
117 1
            if old_interval:
118 1
                all_intervals.remove(old_interval)
119 1
            for distinct_interval in intervals:
120 1
                Type2CondenseHelper._add_interval(all_intervals, distinct_interval)
121
        else:
122 1
            all_intervals.add(new_interval)
123
124
    # ------------------------------------------------------------------------------------------------------------------
125 1
    def _derive_distinct_intervals(self, rows):
126
        """
127
        Returns the set of distinct intervals in a row set.
128
129
        :param list[dict[str,T]] rows: The rows set.
130
131
        :rtype: set[(int,int)]
132
        """
133 1
        ret = set()
134 1
        for row in rows:
135 1
            self._add_interval(ret, (row[self._key_start_date], row[self._key_end_date]))
136
137 1
        return ret
138
139
    # ------------------------------------------------------------------------------------------------------------------
140 1
    def condense(self):
141
        """
142
        Condense the data set to the distinct intervals based on the natural key.
143
        """
144 1
        for natural_key, rows in self.rows.items():
145 1
            tmp1 = []
146 1
            intervals = sorted(self._derive_distinct_intervals(rows))
147 1
            for interval in intervals:
148 1
                tmp2 = copy.copy(rows[0])
149 1
                tmp2[self._key_start_date] = interval[0]
150 1
                tmp2[self._key_end_date] = interval[1]
151 1
                tmp1.append(tmp2)
152
153 1
            self.rows[natural_key] = tmp1
154
155
# ----------------------------------------------------------------------------------------------------------------------
156