Conditions | 17 |
Total Lines | 109 |
Code Lines | 46 |
Lines | 109 |
Ratio | 100 % |
Tests | 42 |
CRAP Score | 17.0036 |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like etlt.helper.Type2Helper.Type2Helper._merge_adjacent_rows() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | 1 | import copy |
|
191 | 1 | def _merge_adjacent_rows(self, rows): |
|
192 | """ |
||
193 | Resolves adjacent and overlapping rows. Overlapping rows are resolved as follows: |
||
194 | * The interval with the most recent begin date prevails for the overlapping period. |
||
195 | * If the begin dates are the same the interval with the most recent end date prevails. |
||
196 | * If the begin and end dates are equal the last row in the data set prevails. |
||
197 | Identical (excluding begin and end date) adjacent rows are replace with a single row. |
||
198 | |||
199 | :param list[dict[str,T]] rows: The rows in a group (i.e. with the same natural key). |
||
200 | . |
||
201 | :rtype: list[dict[str,T]] |
||
202 | """ |
||
203 | 1 | ret = list() |
|
204 | |||
205 | 1 | prev_row = None |
|
206 | 1 | for row in rows: |
|
207 | 1 | if prev_row: |
|
208 | 1 | relation = Allen.relation(prev_row[self._key_start_date], |
|
209 | prev_row[self._key_end_date], |
||
210 | row[self._key_start_date], |
||
211 | row[self._key_end_date]) |
||
212 | 1 | if relation is None: |
|
213 | # row holds an invalid interval (prev_row always holds a valid interval). Hence, the join is empty. |
||
214 | 1 | return [] |
|
215 | |||
216 | 1 | elif relation == Allen.X_BEFORE_Y: |
|
217 | # Two rows with distinct intervals. |
||
218 | # prev_row: |----| |
||
219 | # row: |-----| |
||
220 | 1 | ret.append(prev_row) |
|
221 | 1 | prev_row = row |
|
222 | |||
223 | 1 | elif relation == Allen.X_MEETS_Y: |
|
224 | # The two rows are adjacent. |
||
225 | # prev_row: |-------| |
||
226 | # row: |-------| |
||
227 | 1 | if self._equal(prev_row, row): |
|
228 | # The two rows are identical (except for start and end date) and adjacent. Combine the two rows |
||
229 | # into one row. |
||
230 | 1 | prev_row[self._key_end_date] = row[self._key_end_date] |
|
231 | else: |
||
232 | # Rows are adjacent but not identical. |
||
233 | 1 | ret.append(prev_row) |
|
234 | 1 | prev_row = row |
|
235 | |||
236 | 1 | elif relation == Allen.X_OVERLAPS_WITH_Y: |
|
237 | # prev_row overlaps row. Should not occur with proper reference data. |
||
238 | # prev_row: |-----------| |
||
239 | # row: |----------| |
||
240 | 1 | if self._equal(prev_row, row): |
|
241 | # The two rows are identical (except for start and end date) and overlapping. Combine the two |
||
242 | # rows into one row. |
||
243 | 1 | prev_row[self._key_end_date] = row[self._key_end_date] |
|
244 | else: |
||
245 | # Rows are overlapping but not identical. |
||
246 | 1 | prev_row[self._key_end_date] = row[self._key_start_date] - 1 |
|
247 | 1 | ret.append(prev_row) |
|
248 | 1 | prev_row = row |
|
249 | |||
250 | 1 | elif relation == Allen.X_STARTS_Y: |
|
251 | # prev_row start row. Should not occur with proper reference data. |
||
252 | # prev_row: |------| |
||
253 | # row: |----------------| |
||
254 | 1 | prev_row = row |
|
255 | |||
256 | 1 | elif relation == Allen.X_EQUAL_Y: |
|
257 | # Can happen when the reference data sets are joined without respect for date intervals. |
||
258 | # prev_row: |----------------| |
||
259 | # row: |----------------| |
||
260 | 1 | prev_row = row |
|
261 | |||
262 | 1 | elif relation == Allen.X_DURING_Y_INVERSE: |
|
263 | # row during prev_row. Should not occur with proper reference data. |
||
264 | # prev_row: |----------------| |
||
265 | # row: |------| |
||
266 | # Note: the interval with the most recent start date prevails. Hence, the interval after |
||
267 | # row[self._key_end_date] is discarded. |
||
268 | 1 | if self._equal(prev_row, row): |
|
269 | 1 | prev_row[self._key_end_date] = row[self._key_end_date] |
|
270 | else: |
||
271 | 1 | prev_row[self._key_end_date] = row[self._key_start_date] - 1 |
|
272 | 1 | ret.append(prev_row) |
|
273 | 1 | prev_row = row |
|
274 | |||
275 | 1 | elif relation == Allen.X_FINISHES_Y_INVERSE: |
|
276 | # row finishes prev_row. Should not occur with proper reference data. |
||
277 | # prev_row: |----------------| |
||
278 | # row: |------| |
||
279 | 1 | if not self._equal(prev_row, row): |
|
280 | 1 | prev_row[self._key_end_date] = row[self._key_start_date] - 1 |
|
281 | 1 | ret.append(prev_row) |
|
282 | 1 | prev_row = row |
|
283 | |||
284 | # Note: if the two rows are identical (except for start and end date) nothing to do. |
||
285 | else: |
||
286 | # Note: The rows are sorted such that prev_row[self._key_begin_date] <= row[self._key_begin_date]. |
||
287 | # Hence the following relation should not occur: X_DURING_Y, X_FINISHES_Y, X_BEFORE_Y_INVERSE, |
||
288 | # X_MEETS_Y_INVERSE, X_OVERLAPS_WITH_Y_INVERSE, and X_STARTS_Y_INVERSE. Hence, we covered all 13 |
||
289 | # relations in Allen's interval algebra. |
||
290 | raise ValueError('Data is not sorted properly. Relation: {0}'.format(relation)) |
||
291 | |||
292 | 1 | elif row[self._key_start_date] <= row[self._key_end_date]: |
|
293 | # row is the first valid row. |
||
294 | 1 | prev_row = row |
|
295 | |||
296 | 1 | if prev_row: |
|
297 | 1 | ret.append(prev_row) |
|
298 | |||
299 | 1 | return ret |
|
300 | |||
352 |