Completed
Pull Request — master (#947)
by Joe
01:25
created

zipline.pipeline.loaders.normalize_data_query_time()   B

Complexity

Conditions 1

Size

Total Lines 25

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 1
dl 0
loc 25
rs 8.8571
1
import datetime
2
3
import pandas as pd
4
5
6
def normalize_data_query_time(dt, time, tz):
7
    """Apply the correct time and timezone to a date.
8
9
    Parameters
10
    ----------
11
    dt : pd.Timestamp
12
        The original datetime that represents the date.
13
    time : datetime.time
14
        The time to query before.
15
    tz : tzinfo
16
        The timezone the time applies to.
17
18
    Returns
19
    -------
20
    query_dt : pd.Timestamp
21
        The timestamp with the correct time and date in utc.
22
    """
23
    # get the date after converting the timezone
24
25
    # merge the correct date with the time in the given timezone then convert
26
    # back to utc
27
    return pd.Timestamp(
28
        datetime.datetime.combine(dt.date(), time),
29
        tz=tz,
30
    ).tz_convert('utc')
31
32
33
def normalize_timestamp_to_query_time(df,
34
                                      time,
35
                                      tz,
36
                                      inplace=False,
37
                                      ts_field='timestamp'):
38
    """Update the timestamp field of a dataframe to normalize dates around
39
    some data query time/timezone.
40
41
    Parameters
42
    ----------
43
    df : pd.DataFrame
44
        The dataframe to update. This needs a column named ``ts_field``.
45
    time : datetime.time
46
        The time to query before.
47
    tz : tzinfo
48
        The timezone the time applies to.
49
    inplace : bool, optional
50
        Update the dataframe in place.
51
    ts_field : str, optional
52
        The name of the timestamp field in ``df``.
53
54
    Returns
55
    -------
56
    df : pd.DataFrame
57
        The dataframe with the timestamp field normalized. If ``inplace`` is
58
        true, then this will be the same object as ``df`` otherwise this will
59
        be a copy.
60
    """
61
    if not inplace:
62
        # don't mutate the dataframe in place
63
        df = df.copy()
64
65
    dtidx = pd.DatetimeIndex(df.loc[:, ts_field], tz='utc')
66
    dtidx_local_time = dtidx.tz_convert(tz)
67
    to_roll_forward = dtidx_local_time.time > time
68
    # for all of the times that are greater than our query time add 1
69
    # day and truncate to the date
70
    df.loc[to_roll_forward, ts_field] = (
71
        dtidx_local_time[to_roll_forward] + datetime.timedelta(days=1)
72
    ).normalize().tz_localize(None).tz_localize('utc')  # cast back to utc
73
    df.loc[~to_roll_forward, ts_field] = dtidx[~to_roll_forward].normalize()
74
    return df
75