Completed
Push — master ( 1f137d...7a6ba4 )
by Joe
01:31
created

__init__()   B

Complexity

Conditions 4

Size

Total Lines 24

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 4
dl 0
loc 24
rs 8.6846
1
"""
2
Reference implementation for EarningsCalendar loaders.
3
"""
4
from itertools import repeat
5
6
from numpy import full_like, full
7
import pandas as pd
8
from six import iteritems
9
from six.moves import zip
10
from toolz import merge
11
12
from .base import PipelineLoader
13
from .frame import DataFrameLoader
14
from ..data.earnings import EarningsCalendar
15
from zipline.utils.numpy_utils import np_NaT
16
from zipline.utils.memoize import lazyval
17
18
19
class EarningsCalendarLoader(PipelineLoader):
20
    """
21
    Reference loader for
22
    :class:`zipline.pipeline.data.earnings.EarningsCalendar`.
23
24
    Does not currently support adjustments to the dates of known earnings.
25
26
    Parameters
27
    ----------
28
    all_dates : pd.DatetimeIndex
29
        Index of dates for which we can serve queries.
30
    announcement_dates : dict[int -> pd.Series or pd.DatetimeIndex]
31
        Dict mapping sids to objects representing dates on which earnings
32
        occurred.
33
34
        If a dict value is a Series, it's interpreted as a mapping from the
35
        date on which we learned an announcement was coming to the date on
36
        which the announcement was made.
37
38
        If a dict value is a DatetimeIndex, it's interpreted as just containing
39
        the dates that announcements were made, and we assume we knew about the
40
        announcement on all prior dates.  This mode is only supported if
41
        ``infer_timestamp`` is explicitly passed as a truthy value.
42
43
    infer_timestamps : bool, optional
44
        Whether to allow passing ``DatetimeIndex`` values in
45
        ``announcement_dates``.
46
    """
47
    def __init__(self,
48
                 all_dates,
49
                 announcement_dates,
50
                 infer_timestamps=False,
51
                 dataset=EarningsCalendar):
52
        self.all_dates = all_dates
53
        self.announcement_dates = announcement_dates = (
54
            announcement_dates.copy()
55
        )
56
        dates = self.all_dates.values
57
        for k, v in iteritems(announcement_dates):
58
            if isinstance(v, pd.DatetimeIndex):
59
                if not infer_timestamps:
60
                    raise ValueError(
61
                        "Got DatetimeIndex of announcement dates for sid %d.\n"
62
                        "Pass `infer_timestamps=True` to use the first date in"
63
                        " `all_dates` as implicit timestamp."
64
                    )
65
                # If we are passed a DatetimeIndex, we always have
66
                # knowledge of the announcements.
67
                announcement_dates[k] = pd.Series(
68
                    v, index=repeat(dates[0], len(v)),
69
                )
70
        self.dataset = dataset
71
72
    def get_loader(self, column):
73
        """Dispatch to the loader for ``column``.
74
        """
75
        if column is self.dataset.next_announcement:
76
            return self.next_announcement_loader
77
        elif column is self.dataset.previous_announcement:
78
            return self.previous_announcement_loader
79
        else:
80
            raise ValueError("Don't know how to load column '%s'." % column)
81
82
    @lazyval
83
    def next_announcement_loader(self):
84
        return DataFrameLoader(
85
            self.dataset.next_announcement,
86
            next_earnings_date_frame(
87
                self.all_dates,
88
                self.announcement_dates,
89
            ),
90
            adjustments=None,
91
        )
92
93
    @lazyval
94
    def previous_announcement_loader(self):
95
        return DataFrameLoader(
96
            self.dataset.previous_announcement,
97
            previous_earnings_date_frame(
98
                self.all_dates,
99
                self.announcement_dates,
100
            ),
101
            adjustments=None,
102
        )
103
104
    def load_adjusted_array(self, columns, dates, assets, mask):
105
        return merge(
106
            self.get_loader(column).load_adjusted_array(
107
                [column], dates, assets, mask
108
            )
109
            for column in columns
110
        )
111
112
113
def next_earnings_date_frame(dates, announcement_dates):
114
    """
115
    Make a DataFrame representing simulated next earnings dates.
116
117
    Parameters
118
    ----------
119
    dates : pd.DatetimeIndex.
120
        The index of the returned DataFrame.
121
    announcement_dates : dict[int -> pd.Series]
122
        Dict mapping sids to an index of dates on which earnings were announced
123
        for that sid.
124
125
    Returns
126
    -------
127
    next_earnings: pd.DataFrame
128
        A DataFrame representing, for each (label, date) pair, the first entry
129
        in `earnings_calendars[label]` on or after `date`.  Entries falling
130
        after the last date in a calendar will have `np_NaT` as the result in
131
        the output.
132
133
    See Also
134
    --------
135
    previous_earnings_date_frame
136
    """
137
    cols = {equity: full_like(dates, np_NaT) for equity in announcement_dates}
138
    raw_dates = dates.values
139
    for equity, earnings_dates in iteritems(announcement_dates):
140
        data = cols[equity]
141
        if not earnings_dates.index.is_monotonic_increasing:
142
            earnings_dates = earnings_dates.sort_index()
143
144
        # Iterate over the raw Series values, since we're comparing against
145
        # numpy arrays anyway.
146
        iterkv = zip(earnings_dates.index.values, earnings_dates.values)
147
        for timestamp, announce_date in iterkv:
148
            date_mask = (timestamp <= raw_dates) & (raw_dates <= announce_date)
149
            value_mask = (announce_date <= data) | (data == np_NaT)
150
            data[date_mask & value_mask] = announce_date
151
152
    return pd.DataFrame(index=dates, data=cols)
153
154
155
def previous_earnings_date_frame(dates, announcement_dates):
156
    """
157
    Make a DataFrame representing simulated next earnings dates.
158
159
    Parameters
160
    ----------
161
    dates : DatetimeIndex.
162
        The index of the returned DataFrame.
163
    announcement_dates : dict[int -> DatetimeIndex]
164
        Dict mapping sids to an index of dates on which earnings were announced
165
        for that sid.
166
167
    Returns
168
    -------
169
    prev_earnings: pd.DataFrame
170
        A DataFrame representing, for (label, date) pair, the first entry in
171
        `announcement_dates[label]` strictly before `date`.  Entries falling
172
        before the first date in a calendar will have `NaT` as the result in
173
        the output.
174
175
    See Also
176
    --------
177
    next_earnings_date_frame
178
    """
179
    sids = list(announcement_dates)
180
    out = full((len(dates), len(sids)), np_NaT, dtype='datetime64[ns]')
181
    dn = dates[-1].asm8
182
    for col_idx, sid in enumerate(sids):
183
        # announcement_dates[sid] is Series mapping knowledge_date to actual
184
        # announcement date.  We don't care about the knowledge date for
185
        # computing previous earnings.
186
        values = announcement_dates[sid].values
187
        values = values[values <= dn]
188
        out[dates.searchsorted(values), col_idx] = values
189
190
    frame = pd.DataFrame(out, index=dates, columns=sids)
191
    frame.ffill(inplace=True)
192
    return frame
193