zipline.pipeline.loaders.EarningsCalendarLoader.get_loader() - Code Metrics - Inspection of "Merge pull request #924 from quantopian/dataset-su..." - quantopian/zipline - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 1f137d...7a6ba4 )

by Joe

created 2015-12-29 16:44 UTC

get_loader() A

↳ Parent: zipline.pipeline.loaders.EarningsCalendarLoader

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
cc	3
dl	0
loc	9
rs	9.6667

"""
Reference implementation for EarningsCalendar loaders.
"""
from itertools import repeat

from numpy import full_like, full
import pandas as pd
from six import iteritems
from six.moves import zip
from toolz import merge

from .base import PipelineLoader
from .frame import DataFrameLoader
from ..data.earnings import EarningsCalendar
from zipline.utils.numpy_utils import np_NaT
from zipline.utils.memoize import lazyval


class EarningsCalendarLoader(PipelineLoader):
    """
    Reference loader for
    :class:`zipline.pipeline.data.earnings.EarningsCalendar`.

    Does not currently support adjustments to the dates of known earnings.

    Parameters
    ----------
    all_dates : pd.DatetimeIndex
        Index of dates for which we can serve queries.
    announcement_dates : dict[int -> pd.Series or pd.DatetimeIndex]
        Dict mapping sids to objects representing dates on which earnings
        occurred.

        If a dict value is a Series, it's interpreted as a mapping from the
        date on which we learned an announcement was coming to the date on
        which the announcement was made.

        If a dict value is a DatetimeIndex, it's interpreted as just containing
        the dates that announcements were made, and we assume we knew about the
        announcement on all prior dates.  This mode is only supported if
        ``infer_timestamp`` is explicitly passed as a truthy value.

    infer_timestamps : bool, optional
        Whether to allow passing ``DatetimeIndex`` values in
        ``announcement_dates``.
    """
    def __init__(self,
                 all_dates,
                 announcement_dates,
                 infer_timestamps=False,
                 dataset=EarningsCalendar):
        self.all_dates = all_dates
        self.announcement_dates = announcement_dates = (
            announcement_dates.copy()
        )
        dates = self.all_dates.values
        for k, v in iteritems(announcement_dates):
            if isinstance(v, pd.DatetimeIndex):
                if not infer_timestamps:
                    raise ValueError(
                        "Got DatetimeIndex of announcement dates for sid %d.\n"
                        "Pass `infer_timestamps=True` to use the first date in"
                        " `all_dates` as implicit timestamp."
                    )
                # If we are passed a DatetimeIndex, we always have
                # knowledge of the announcements.
                announcement_dates[k] = pd.Series(
                    v, index=repeat(dates[0], len(v)),
                )
        self.dataset = dataset

    def get_loader(self, column):
        """Dispatch to the loader for ``column``.
        """
        if column is self.dataset.next_announcement:
            return self.next_announcement_loader
        elif column is self.dataset.previous_announcement:
            return self.previous_announcement_loader
        else:
            raise ValueError("Don't know how to load column '%s'." % column)

    @lazyval
    def next_announcement_loader(self):
        return DataFrameLoader(
            self.dataset.next_announcement,
            next_earnings_date_frame(
                self.all_dates,
                self.announcement_dates,
            ),
            adjustments=None,
        )

    @lazyval
    def previous_announcement_loader(self):
        return DataFrameLoader(
            self.dataset.previous_announcement,
            previous_earnings_date_frame(
                self.all_dates,
                self.announcement_dates,
            ),
            adjustments=None,
        )

    def load_adjusted_array(self, columns, dates, assets, mask):
        return merge(
            self.get_loader(column).load_adjusted_array(
                [column], dates, assets, mask
            )
            for column in columns
        )


def next_earnings_date_frame(dates, announcement_dates):
    """
    Make a DataFrame representing simulated next earnings dates.

    Parameters
    ----------
    dates : pd.DatetimeIndex.
        The index of the returned DataFrame.
    announcement_dates : dict[int -> pd.Series]
        Dict mapping sids to an index of dates on which earnings were announced
        for that sid.

    Returns
    -------
    next_earnings: pd.DataFrame
        A DataFrame representing, for each (label, date) pair, the first entry
        in `earnings_calendars[label]` on or after `date`.  Entries falling
        after the last date in a calendar will have `np_NaT` as the result in
        the output.

    See Also
    --------
    previous_earnings_date_frame
    """
    cols = {equity: full_like(dates, np_NaT) for equity in announcement_dates}
    raw_dates = dates.values
    for equity, earnings_dates in iteritems(announcement_dates):
        data = cols[equity]
        if not earnings_dates.index.is_monotonic_increasing:
            earnings_dates = earnings_dates.sort_index()

        # Iterate over the raw Series values, since we're comparing against
        # numpy arrays anyway.
        iterkv = zip(earnings_dates.index.values, earnings_dates.values)
        for timestamp, announce_date in iterkv:
            date_mask = (timestamp <= raw_dates) & (raw_dates <= announce_date)
            value_mask = (announce_date <= data) | (data == np_NaT)
            data[date_mask & value_mask] = announce_date

    return pd.DataFrame(index=dates, data=cols)


def previous_earnings_date_frame(dates, announcement_dates):
    """
    Make a DataFrame representing simulated next earnings dates.

    Parameters
    ----------
    dates : DatetimeIndex.
        The index of the returned DataFrame.
    announcement_dates : dict[int -> DatetimeIndex]
        Dict mapping sids to an index of dates on which earnings were announced
        for that sid.

    Returns
    -------
    prev_earnings: pd.DataFrame
        A DataFrame representing, for (label, date) pair, the first entry in
        `announcement_dates[label]` strictly before `date`.  Entries falling
        before the first date in a calendar will have `NaT` as the result in
        the output.

    See Also
    --------
    next_earnings_date_frame
    """
    sids = list(announcement_dates)
    out = full((len(dates), len(sids)), np_NaT, dtype='datetime64[ns]')
    dn = dates[-1].asm8
    for col_idx, sid in enumerate(sids):
        # announcement_dates[sid] is Series mapping knowledge_date to actual
        # announcement date.  We don't care about the knowledge date for
        # computing previous earnings.
        values = announcement_dates[sid].values
        values = values[values <= dn]
        out[dates.searchsorted(values), col_idx] = values

    frame = pd.DataFrame(out, index=dates, columns=sids)
    frame.ffill(inplace=True)
    return frame


1			"""
2			Reference implementation for EarningsCalendar loaders.
3			"""
4			from itertools import repeat
5
6			from numpy import full_like, full
7			import pandas as pd
8			from six import iteritems
9			from six.moves import zip
10			from toolz import merge
11
12			from .base import PipelineLoader
13			from .frame import DataFrameLoader
14			from ..data.earnings import EarningsCalendar
15			from zipline.utils.numpy_utils import np_NaT
16			from zipline.utils.memoize import lazyval
17
18
19			class EarningsCalendarLoader(PipelineLoader):
20			"""
21			Reference loader for
22			:class:`zipline.pipeline.data.earnings.EarningsCalendar`.
23
24			Does not currently support adjustments to the dates of known earnings.
25
26			Parameters
27			----------
28			all_dates : pd.DatetimeIndex
29			Index of dates for which we can serve queries.
30			announcement_dates : dict[int -> pd.Series or pd.DatetimeIndex]
31			Dict mapping sids to objects representing dates on which earnings
32			occurred.
33
34			If a dict value is a Series, it's interpreted as a mapping from the
35			date on which we learned an announcement was coming to the date on
36			which the announcement was made.
37
38			If a dict value is a DatetimeIndex, it's interpreted as just containing
39			the dates that announcements were made, and we assume we knew about the
40			announcement on all prior dates. This mode is only supported if
41			``infer_timestamp`` is explicitly passed as a truthy value.
42
43			infer_timestamps : bool, optional
44			Whether to allow passing ``DatetimeIndex`` values in
45			``announcement_dates``.
46			"""
47			def __init__(self,
48			all_dates,
49			announcement_dates,
50			infer_timestamps=False,
51			dataset=EarningsCalendar):
52			self.all_dates = all_dates
53			self.announcement_dates = announcement_dates = (
54			announcement_dates.copy()
55			)
56			dates = self.all_dates.values
57			for k, v in iteritems(announcement_dates):
58			if isinstance(v, pd.DatetimeIndex):
59			if not infer_timestamps:
60			raise ValueError(
61			"Got DatetimeIndex of announcement dates for sid %d.\n"
62			"Pass `infer_timestamps=True` to use the first date in"
63			" `all_dates` as implicit timestamp."
64			)
65			# If we are passed a DatetimeIndex, we always have
66			# knowledge of the announcements.
67			announcement_dates[k] = pd.Series(
68			v, index=repeat(dates[0], len(v)),
69			)
70			self.dataset = dataset
71
72			def get_loader(self, column):
73			"""Dispatch to the loader for ``column``.
74			"""
75			if column is self.dataset.next_announcement:
76			return self.next_announcement_loader
77			elif column is self.dataset.previous_announcement:
78			return self.previous_announcement_loader
79			else:
80			raise ValueError("Don't know how to load column '%s'." % column)
81
82			@lazyval
83			def next_announcement_loader(self):
84			return DataFrameLoader(
85			self.dataset.next_announcement,
86			next_earnings_date_frame(
87			self.all_dates,
88			self.announcement_dates,
89			),
90			adjustments=None,
91			)
92
93			@lazyval
94			def previous_announcement_loader(self):
95			return DataFrameLoader(
96			self.dataset.previous_announcement,
97			previous_earnings_date_frame(
98			self.all_dates,
99			self.announcement_dates,
100			),
101			adjustments=None,
102			)
103
104			def load_adjusted_array(self, columns, dates, assets, mask):
105			return merge(
106			self.get_loader(column).load_adjusted_array(
107			[column], dates, assets, mask
108			)
109			for column in columns
110			)
111
112
113			def next_earnings_date_frame(dates, announcement_dates):
114			"""
115			Make a DataFrame representing simulated next earnings dates.
116
117			Parameters
118			----------
119			dates : pd.DatetimeIndex.
120			The index of the returned DataFrame.
121			announcement_dates : dict[int -> pd.Series]
122			Dict mapping sids to an index of dates on which earnings were announced
123			for that sid.
124
125			Returns
126			-------
127			next_earnings: pd.DataFrame
128			A DataFrame representing, for each (label, date) pair, the first entry
129			in `earnings_calendars[label]` on or after `date`. Entries falling
130			after the last date in a calendar will have `np_NaT` as the result in
131			the output.
132
133			See Also
134			--------
135			previous_earnings_date_frame
136			"""
137			cols = {equity: full_like(dates, np_NaT) for equity in announcement_dates}
138			raw_dates = dates.values
139			for equity, earnings_dates in iteritems(announcement_dates):
140			data = cols[equity]
141			if not earnings_dates.index.is_monotonic_increasing:
142			earnings_dates = earnings_dates.sort_index()
143
144			# Iterate over the raw Series values, since we're comparing against
145			# numpy arrays anyway.
146			iterkv = zip(earnings_dates.index.values, earnings_dates.values)
147			for timestamp, announce_date in iterkv:
148			date_mask = (timestamp <= raw_dates) & (raw_dates <= announce_date)
149			value_mask = (announce_date <= data) \| (data == np_NaT)
150			data[date_mask & value_mask] = announce_date
151
152			return pd.DataFrame(index=dates, data=cols)
153
154
155			def previous_earnings_date_frame(dates, announcement_dates):
156			"""
157			Make a DataFrame representing simulated next earnings dates.
158
159			Parameters
160			----------
161			dates : DatetimeIndex.
162			The index of the returned DataFrame.
163			announcement_dates : dict[int -> DatetimeIndex]
164			Dict mapping sids to an index of dates on which earnings were announced
165			for that sid.
166
167			Returns
168			-------
169			prev_earnings: pd.DataFrame
170			A DataFrame representing, for (label, date) pair, the first entry in
171			`announcement_dates[label]` strictly before `date`. Entries falling
172			before the first date in a calendar will have `NaT` as the result in
173			the output.
174
175			See Also
176			--------
177			next_earnings_date_frame
178			"""
179			sids = list(announcement_dates)
180			out = full((len(dates), len(sids)), np_NaT, dtype='datetime64[ns]')
181			dn = dates[-1].asm8
182			for col_idx, sid in enumerate(sids):
183			# announcement_dates[sid] is Series mapping knowledge_date to actual
184			# announcement date. We don't care about the knowledge date for
185			# computing previous earnings.
186			values = announcement_dates[sid].values
187			values = values[values <= dn]
188			out[dates.searchsorted(values), col_idx] = values
189
190			frame = pd.DataFrame(out, index=dates, columns=sids)
191			frame.ffill(inplace=True)
192			return frame
193

quantopian / zipline

Push — master ( 1f137d...7a6ba4 )

get_loader() A

Complexity

Size

Duplication

Duplication Side-by-Side

Filter issues like