Conditions | 35 |
Total Lines | 68 |
Code Lines | 42 |
Lines | 0 |
Ratio | 0 % |
Tests | 25 |
CRAP Score | 35 |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like etlt.cleaner.DateCleaner.DateCleaner.clean() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | 1 | import re |
|
33 | 1 | @staticmethod |
|
34 | 1 | def clean(date: Optional[str], ignore_time: bool = False) -> Optional[str]: |
|
35 | """ |
||
36 | Converts a date in miscellaneous format to ISO-8601 (YYYY-MM-DD) format. |
||
37 | |||
38 | :param date: The input date. |
||
39 | :param ignore_time: Whether any trailing time part must be ignored. |
||
40 | """ |
||
41 | # Return empty input immediately. |
||
42 | if not date: |
||
43 | return date |
||
44 | 1 | ||
45 | 1 | parts = re.split(r'[\-/. ]', date) |
|
46 | |||
47 | 1 | if (len(parts) == 3) or \ |
|
48 | (len(parts) > 3 and ignore_time) or \ |
||
49 | 1 | (len(parts) == 4 and re.match(r'^[0:]*$', parts[3])) or \ |
|
50 | (len(parts) == 5 and re.match(r'^[0:]*$', parts[3]) and re.match(r'^0*$', parts[4])): |
||
51 | if len(parts[0]) == 4 and len(parts[1]) <= 2 and len(parts[2]) <= 2: |
||
52 | # Assume date is in YYYY-MM-DD of YYYY-M-D format. |
||
53 | 1 | return parts[0] + '-' + ('00' + parts[1])[-2:] + '-' + ('00' + parts[2])[-2:] |
|
54 | |||
55 | 1 | if len(parts[0]) <= 2 and len(parts[1]) <= 2 and len(parts[2]) == 4: |
|
56 | # Assume date is in DD-MM-YYYY or D-M-YYYY format. |
||
57 | 1 | return parts[2] + '-' + ('00' + parts[1])[-2:] + '-' + ('00' + parts[0])[-2:] |
|
58 | |||
59 | 1 | if len(parts[0]) <= 2 and len(parts[1]) <= 2 and len(parts[2]) == 2: |
|
60 | # Assume date is in DD-MM-YY or D-M-YY format. |
||
61 | 1 | year = '19' + parts[2] if parts[2] >= '20' else '20' + parts[2] |
|
62 | |||
63 | 1 | return year + '-' + ('00' + parts[1])[-2:] + '-' + ('00' + parts[0])[-2:] |
|
64 | |||
65 | 1 | # Try DD-MM-YYYY HH:mm:ss format |
|
66 | pattern = r'^(\d{2})\D(\d{2})\D(\d{4})' + ('.*$' if ignore_time else r'(\D(\d{1,2})\D(\d{1,2})\D(\d{1,2}))?$') |
||
67 | match = re.match(pattern, date) |
||
68 | 1 | if match: |
|
69 | 1 | ret = match.group(3) + '-' + match.group(2) + '-' + match.group(1) |
|
70 | 1 | if len(match.groups()) == 7 and match.group(4): |
|
71 | 1 | ret += 'T' + match.group(5) + ':' + match.group(6) + ':' + match.group(7) |
|
72 | 1 | return ret |
|
73 | 1 | ||
74 | 1 | # Try DD-MM-YYYY HH:mm format |
|
75 | pattern = r'^(\d{2})\D(\d{2})\D(\d{4})' + ('.*$' if ignore_time else r'(\D(\d{1,2})\D(\d{1,2}))?$') |
||
76 | match = re.match(pattern, date) |
||
77 | 1 | if match: |
|
78 | 1 | ret = match.group(3) + '-' + match.group(2) + '-' + match.group(1) |
|
79 | 1 | if len(match.groups()) == 6 and match.group(4): |
|
80 | ret += 'T' + match.group(5) + ':' + match.group(6) + ':00' |
||
81 | 1 | return ret |
|
82 | |||
83 | # Try DDmonYYYY or DDmonYYYY HH:mm:ss format |
||
84 | 1 | pattern = r'^(\d{2})([a-z]{3})(\d{4})' + ('.*$' if ignore_time else r'(\D(\d{1,2})\D(\d{1,2})\D(\d{1,2}))?$') |
|
85 | match = re.match(pattern, date.lower()) |
||
86 | if match and match.group(2) in DateCleaner.month_map: |
||
87 | ret = match.group(3) + '-' + DateCleaner.month_map[match.group(2)] + '-' + match.group(1) |
||
88 | if len(match.groups()) == 7 and match.group(4): |
||
89 | ret += 'T' + match.group(5) + ':' + match.group(6) + ':' + match.group(7) |
||
90 | return ret |
||
91 | |||
92 | # Try YYYYMMDD format. |
||
93 | pattern = r'^\d{8}' + ('.*$' if ignore_time else '$') |
||
94 | match = re.match(pattern, date) |
||
95 | if match: |
||
96 | # Assume date is YYYYMMDD format |
||
97 | return date[0:4] + '-' + date[4:6] + '-' + date[6:8] |
||
98 | |||
99 | # Format not recognized. Just return the original string. |
||
100 | return date |
||
101 | |||
103 |