|
1
|
|
|
import datetime as dt |
|
|
|
|
|
|
2
|
|
|
import re |
|
3
|
|
|
from types import MappingProxyType |
|
4
|
|
|
|
|
5
|
|
|
import pytz |
|
|
|
|
|
|
6
|
|
|
|
|
7
|
|
|
from foil.compose import cartesian_product |
|
8
|
|
|
|
|
9
|
|
|
|
|
10
|
|
|
class IsoDatePattern: |
|
11
|
|
|
"""Builds ISO 8601 date and time regular expression patterns. |
|
12
|
|
|
|
|
13
|
|
|
The format follows: |
|
14
|
|
|
YYYY[dsep]MM[dsep]DD[dtsep]HH[tsep]MM[tsep]SS[.ffffff][tzsep]TZ, |
|
15
|
|
|
|
|
16
|
|
|
Example str to match: |
|
17
|
|
|
'2014-02-14T11:12:04.343 EST' |
|
18
|
|
|
""" |
|
19
|
|
|
|
|
20
|
|
|
YEAR = r'(?P<year>\d{4})' |
|
21
|
|
|
MONTH = r'(?P<month>[0-1]\d)' |
|
22
|
|
|
DAY = r'(?P<day>[0-3]\d)' |
|
23
|
|
|
HOUR = r'(?P<hour>2[0-3]|[0-1]\d|\d)' |
|
24
|
|
|
MINUTE = r'(?P<minute>[0-5]\d)' |
|
25
|
|
|
SECOND = r'(?P<second>[0-5]\d)' |
|
26
|
|
|
MICROSECOND = r'(.?(?P<microsecond>\d{3,6})?)' |
|
27
|
|
|
TIMEZONE = r'(?P<timezone>[A-Z][A-Z_]+(?:/[A-Z][A-Z_]+)+|[A-Z]{3,})?' |
|
28
|
|
|
|
|
29
|
|
|
def __init__(self, dsep=r'-', tsep=r':', dtsep=r'[T|\s]?', tzsep=r'\s?'): |
|
30
|
|
|
self.date = _format_re(dsep.join([self.YEAR, self.MONTH, self.DAY])) |
|
31
|
|
|
self.time = _format_re(tsep.join([self.HOUR, self.MINUTE, self.SECOND]) + self.MICROSECOND) |
|
|
|
|
|
|
32
|
|
|
self.datetime = dtsep.join([self.date, self.time]) |
|
33
|
|
|
self.datetimezone = tzsep.join([self.datetime, self.TIMEZONE]) |
|
34
|
|
|
|
|
35
|
|
|
|
|
36
|
|
|
def _format_re(pattern): |
|
37
|
|
|
return r'(?:{})?'.format(pattern) |
|
38
|
|
|
|
|
39
|
|
|
|
|
40
|
|
|
_RE_DATE = re.compile(r'^{}$'.format(IsoDatePattern().date)) |
|
41
|
|
|
_RE_DATETIMEZONE = re.compile(IsoDatePattern().datetimezone) |
|
42
|
|
|
|
|
43
|
|
|
TIMEZONE_MAP = MappingProxyType(dict(cartesian_product( |
|
44
|
|
|
(('EST', 'EDT', 'EST/EDT'), pytz.timezone('US/Eastern'))))) |
|
45
|
|
|
|
|
46
|
|
|
|
|
47
|
|
|
def parse_date(date_str: str, pattern=_RE_DATE) -> dt.date: |
|
48
|
|
|
"""Parse datetime.date from YYYY-MM-DD format.""" |
|
49
|
|
|
|
|
50
|
|
|
groups = re.match(pattern, date_str) |
|
51
|
|
|
|
|
52
|
|
|
return dt.date(*_date_to_tuple(groups.groupdict())) |
|
53
|
|
|
|
|
54
|
|
|
|
|
55
|
|
|
class DateTimeParser: |
|
|
|
|
|
|
56
|
|
|
|
|
57
|
|
|
def __init__(self, pattern=_RE_DATETIMEZONE, tz_mapper=TIMEZONE_MAP): |
|
58
|
|
|
self.pattern = pattern |
|
59
|
|
|
self.tz_mapper = tz_mapper |
|
60
|
|
|
|
|
61
|
|
|
def parse(self, date_str) -> dt.datetime: |
|
|
|
|
|
|
62
|
|
|
gd = self.pattern.match(date_str).groupdict() |
|
63
|
|
|
|
|
64
|
|
|
if gd['microsecond'] is not None: |
|
65
|
|
|
gd['microsecond'] = (gd['microsecond'] + '000000')[:6] |
|
66
|
|
|
|
|
67
|
|
|
datetime_ = dt.datetime(*_datetime_to_tuple(gd)) |
|
68
|
|
|
|
|
69
|
|
|
if gd.get('timezone') is not None: |
|
70
|
|
|
datetime_ = self.convert_2_utc(datetime_, gd.get('timezone')) |
|
71
|
|
|
|
|
72
|
|
|
return datetime_ |
|
73
|
|
|
|
|
74
|
|
|
def convert_2_utc(self, datetime_, timezone): |
|
75
|
|
|
"""convert to datetime to UTC offset.""" |
|
76
|
|
|
|
|
77
|
|
|
datetime_ = self.tz_mapper[timezone].localize(datetime_) |
|
78
|
|
|
return datetime_.astimezone(pytz.UTC) |
|
79
|
|
|
|
|
80
|
|
|
|
|
81
|
|
|
def _datetime_to_tuple(dt_dict): |
|
82
|
|
|
"""datetime.datetime components from dictionary to tuple. |
|
83
|
|
|
|
|
84
|
|
|
Example |
|
85
|
|
|
------- |
|
86
|
|
|
dt_dict = {'year': '2014','month': '07','day': '23', |
|
87
|
|
|
'hour': '13','minute': '12','second': '45','microsecond': '321'} |
|
88
|
|
|
|
|
89
|
|
|
_datetime_to_tuple(dt_dict) -> (2014, 7, 23, 13, 12, 45, 321) |
|
90
|
|
|
""" |
|
91
|
|
|
|
|
92
|
|
|
year, month, day = _date_to_tuple(dt_dict) |
|
93
|
|
|
hour, minute, second, microsecond = _time_to_tuple(dt_dict) |
|
94
|
|
|
|
|
95
|
|
|
return year, month, day, hour, minute, second, microsecond |
|
96
|
|
|
|
|
97
|
|
|
|
|
98
|
|
|
def _date_to_tuple(dt_dict): |
|
|
|
|
|
|
99
|
|
|
|
|
100
|
|
|
ymd = ['year', 'month', 'day'] |
|
101
|
|
|
|
|
102
|
|
|
try: |
|
103
|
|
|
year, month, day = [int(dt_dict[d]) for d in ymd] |
|
104
|
|
|
except TypeError: |
|
105
|
|
|
raise TypeError('date components must cast to ints.') |
|
106
|
|
|
|
|
107
|
|
|
return year, month, day |
|
108
|
|
|
|
|
109
|
|
|
|
|
110
|
|
|
def _time_to_tuple(dt_dict): |
|
|
|
|
|
|
111
|
|
|
|
|
112
|
|
|
times = ['hour', 'minute', 'second', 'microsecond'] |
|
113
|
|
|
|
|
114
|
|
|
hour, minute, second, microsecond = ( |
|
115
|
|
|
int(dt_dict[t]) if dt_dict[t] is not None else 0 for t in times) |
|
116
|
|
|
|
|
117
|
|
|
return hour, minute, second, microsecond |
|
118
|
|
|
|
The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:
If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.