Completed
Push — master ( 5c3ca1...d3d362 )
by Joe
01:27
created

zipline.pipeline.loaders.next_date_frame()   B

Complexity

Conditions 5

Size

Total Lines 46

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 5
dl 0
loc 46
rs 8.1277
1
import numpy as np
2
import pandas as pd
3
from six import iteritems
4
from six.moves import zip
5
6
from zipline.utils.numpy_utils import np_NaT
7
8
9
def next_date_frame(dates, events_by_sid):
10
    """
11
    Make a DataFrame representing the simulated next known date for an event.
12
13
    Parameters
14
    ----------
15
    dates : pd.DatetimeIndex.
16
        The index of the returned DataFrame.
17
    events_by_sid : dict[int -> pd.Series]
18
        Dict mapping sids to a series of dates. Each k:v pair of the series
19
        represents the date we learned of the event mapping to the date the
20
        event will occur.
21
    Returns
22
    -------
23
    next_events: pd.DataFrame
24
        A DataFrame where each column is a security from `events_by_sid` where
25
        the values are the dates of the next known event with the knowledge we
26
        had on the date of the index. Entries falling after the last date will
27
        have `NaT` as the result in the output.
28
29
30
    See Also
31
    --------
32
    previous_date_frame
33
    """
34
    cols = {
35
        equity: np.full_like(dates, np_NaT) for equity in events_by_sid
36
    }
37
    raw_dates = dates.values
38
    for equity, event_dates in iteritems(events_by_sid):
39
        data = cols[equity]
40
        if not event_dates.index.is_monotonic_increasing:
41
            event_dates = event_dates.sort_index()
42
43
        # Iterate over the raw Series values, since we're comparing against
44
        # numpy arrays anyway.
45
        iterkv = zip(event_dates.index.values, event_dates.values)
46
        for knowledge_date, event_date in iterkv:
47
            date_mask = (
48
                (knowledge_date <= raw_dates) &
49
                (raw_dates <= event_date)
50
            )
51
            value_mask = (event_date <= data) | (data == np_NaT)
52
            data[date_mask & value_mask] = event_date
53
54
    return pd.DataFrame(index=dates, data=cols)
55
56
57
def previous_date_frame(date_index, events_by_sid):
58
    """
59
    Make a DataFrame representing simulated next earnings date_index.
60
61
    Parameters
62
    ----------
63
    date_index : DatetimeIndex.
64
        The index of the returned DataFrame.
65
    events_by_sid : dict[int -> DatetimeIndex]
66
        Dict mapping sids to a series of dates. Each k:v pair of the series
67
        represents the date we learned of the event mapping to the date the
68
        event will occur.
69
70
    Returns
71
    -------
72
    previous_events: pd.DataFrame
73
        A DataFrame where each column is a security from `events_by_sid` where
74
        the values are the dates of the previous event that occured on the date
75
        of the index. Entries falling before the first date will have `NaT` as
76
        the result in the output.
77
78
    See Also
79
    --------
80
    next_date_frame
81
    """
82
    sids = list(events_by_sid)
83
    out = np.full((len(date_index), len(sids)), np_NaT, dtype='datetime64[ns]')
84
    dn = date_index[-1].asm8
85
    for col_idx, sid in enumerate(sids):
86
        # events_by_sid[sid] is Series mapping knowledge_date to actual
87
        # event_date.  We don't care about the knowledge date for
88
        # computing previous earnings.
89
        values = events_by_sid[sid].values
90
        values = values[values <= dn]
91
        out[date_index.searchsorted(values), col_idx] = values
92
93
    frame = pd.DataFrame(out, index=date_index, columns=sids)
94
    frame.ffill(inplace=True)
95
    return frame
96