Completed
Pull Request — master (#947)
by Joe
01:49
created

zipline.pipeline.loaders.normalize_data_query_time()   A

Complexity

Conditions 1

Size

Total Lines 23

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 1
dl 0
loc 23
rs 9.0857
1
import datetime
2
3
import pandas as pd
4
5
6
def normalize_data_query_time(dt, time, tz):
7
    """Apply the correct time and timezone to a date.
8
9
    Parameters
10
    ----------
11
    dt : pd.Timestamp
12
        The original datetime that represents the date.
13
    time : datetime.time
14
        The time to query before.
15
    tz : tzinfo
16
        The timezone the time applies to.
17
18
    Returns
19
    -------
20
    query_dt : pd.Timestamp
21
        The timestamp with the correct time and date in utc.
22
    """
23
    # merge the correct date with the time in the given timezone then convert
24
    # back to utc
25
    return pd.Timestamp(
26
        datetime.datetime.combine(dt.date(), time),
27
        tz=tz,
28
    ).tz_convert('utc')
29
30
31
def normalize_timestamp_to_query_time(df,
32
                                      time,
33
                                      tz,
34
                                      inplace=False,
35
                                      ts_field='timestamp'):
36
    """Update the timestamp field of a dataframe to normalize dates around
37
    some data query time/timezone.
38
39
    Parameters
40
    ----------
41
    df : pd.DataFrame
42
        The dataframe to update. This needs a column named ``ts_field``.
43
    time : datetime.time
44
        The time to query before.
45
    tz : tzinfo
46
        The timezone the time applies to.
47
    inplace : bool, optional
48
        Update the dataframe in place.
49
    ts_field : str, optional
50
        The name of the timestamp field in ``df``.
51
52
    Returns
53
    -------
54
    df : pd.DataFrame
55
        The dataframe with the timestamp field normalized. If ``inplace`` is
56
        true, then this will be the same object as ``df`` otherwise this will
57
        be a copy.
58
    """
59
    if not inplace:
60
        # don't mutate the dataframe in place
61
        df = df.copy()
62
63
    dtidx = pd.DatetimeIndex(df.loc[:, ts_field], tz='utc')
64
    dtidx_local_time = dtidx.tz_convert(tz)
65
    to_roll_forward = dtidx_local_time.time > time
66
    # for all of the times that are greater than our query time add 1
67
    # day and truncate to the date
68
    df.loc[to_roll_forward, ts_field] = (
69
        dtidx_local_time[to_roll_forward] + datetime.timedelta(days=1)
70
    ).normalize().tz_localize(None).tz_localize('utc')  # cast back to utc
71
    df.loc[~to_roll_forward, ts_field] = dtidx[~to_roll_forward].normalize()
72
    return df
73