Completed
Pull Request — master (#940)
by Joe
01:34
created

zipline.pipeline.loaders.previous_date_frame()   B

Complexity

Conditions 2

Size

Total Lines 38

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 2
dl 0
loc 38
rs 8.8571
1
import numpy as np
2
import pandas as pd
3
from six import iteritems
4
from six.moves import zip
5
6
from zipline.utils.numpy_utils import np_NaT
7
8
9
def next_date_frame(dates, announcement_dates):
10
    """
11
    Make a DataFrame representing simulated next earnings dates.
12
13
    Parameters
14
    ----------
15
    dates : pd.DatetimeIndex.
16
        The index of the returned DataFrame.
17
    announcement_dates : dict[int -> pd.Series]
18
        Dict mapping sids to an index of dates on which earnings were announced
19
        for that sid.
20
21
    Returns
22
    -------
23
    next_earnings: pd.DataFrame
24
        A DataFrame representing, for each (label, date) pair, the first entry
25
        in `earnings_calendars[label]` on or after `date`.  Entries falling
26
        after the last date in a calendar will have `np_NaT` as the result in
27
        the output.
28
29
    See Also
30
    --------
31
    previous_earnings_date_frame
32
    """
33
    cols = {
34
        equity: np.full_like(dates, np_NaT) for equity in announcement_dates
35
    }
36
    raw_dates = dates.values
37
    for equity, earnings_dates in iteritems(announcement_dates):
38
        data = cols[equity]
39
        if not earnings_dates.index.is_monotonic_increasing:
40
            earnings_dates = earnings_dates.sort_index()
41
42
        # Iterate over the raw Series values, since we're comparing against
43
        # numpy arrays anyway.
44
        iterkv = zip(earnings_dates.index.values, earnings_dates.values)
45
        for timestamp, announce_date in iterkv:
46
            date_mask = (timestamp <= raw_dates) & (raw_dates <= announce_date)
47
            value_mask = (announce_date <= data) | (data == np_NaT)
48
            data[date_mask & value_mask] = announce_date
49
50
    return pd.DataFrame(index=dates, data=cols)
51
52
53
def previous_date_frame(dates, announcement_dates):
54
    """
55
    Make a DataFrame representing simulated next earnings dates.
56
57
    Parameters
58
    ----------
59
    dates : DatetimeIndex.
60
        The index of the returned DataFrame.
61
    announcement_dates : dict[int -> DatetimeIndex]
62
        Dict mapping sids to an index of dates on which earnings were announced
63
        for that sid.
64
65
    Returns
66
    -------
67
    prev_earnings: pd.DataFrame
68
        A DataFrame representing, for (label, date) pair, the first entry in
69
        `announcement_dates[label]` strictly before `date`.  Entries falling
70
        before the first date in a calendar will have `NaT` as the result in
71
        the output.
72
73
    See Also
74
    --------
75
    next_earnings_date_frame
76
    """
77
    sids = list(announcement_dates)
78
    out = np.full((len(dates), len(sids)), np_NaT, dtype='datetime64[ns]')
79
    dn = dates[-1].asm8
80
    for col_idx, sid in enumerate(sids):
81
        # announcement_dates[sid] is Series mapping knowledge_date to actual
82
        # announcement date.  We don't care about the knowledge date for
83
        # computing previous earnings.
84
        values = announcement_dates[sid].values
85
        values = values[values <= dn]
86
        out[dates.searchsorted(values), col_idx] = values
87
88
    frame = pd.DataFrame(out, index=dates, columns=sids)
89
    frame.ffill(inplace=True)
90
    return frame
91