zipline.pipeline.loaders.next_date_frame() - Code Metrics - Inspection of "MAINT: Move code to shared modules" - quantopian/zipline - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#940)

by Joe

created 2016-01-08 18:11 UTC

zipline.pipeline.loaders.next_date_frame() B

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
cc	5
dl	0
loc	46
rs	8.1277

import numpy as np
import pandas as pd
from six import iteritems
from six.moves import zip

from zipline.utils.numpy_utils import np_NaT


def next_date_frame(dates, events_by_sid):
    """
    Make a DataFrame representing the simulated next known date for an event.

    Parameters
    ----------
    dates : pd.DatetimeIndex.
        The index of the returned DataFrame.
    events_by_sid : dict[int -> pd.Series]
        Dict mapping sids to a series of dates. Each k:v pair of the series
        represents the date we learned of the event mapping to the date the
        event will occur.
    Returns
    -------
    next_earnings: pd.DataFrame
        A DataFrame where each column is a security from `events_by_sid` where
        the values are the dates of the next known event with the knowledge we
        had on the date of the index. Entries falling after the last date will
        have `NaT` as the result in the output.


    See Also
    --------
    previous_date_frame
    """
    cols = {
        equity: np.full_like(dates, np_NaT) for equity in events_by_sid
    }
    raw_dates = dates.values
    for equity, event_dates in iteritems(events_by_sid):
        data = cols[equity]
        if not event_dates.index.is_monotonic_increasing:
            event_dates = event_dates.sort_index()

        # Iterate over the raw Series values, since we're comparing against
        # numpy arrays anyway.
        iterkv = zip(event_dates.index.values, event_dates.values)
        for knowledge_date, event_date in iterkv:
            date_mask = (
                (knowledge_date <= raw_dates) &
                (raw_dates <= event_date)
            )
            value_mask = (event_date <= data) | (data == np_NaT)
            data[date_mask & value_mask] = event_date

    return pd.DataFrame(index=dates, data=cols)


def previous_date_frame(date_index, events_by_sid):
    """
    Make a DataFrame representing simulated next earnings date_index.

    Parameters
    ----------
    date_index : DatetimeIndex.
        The index of the returned DataFrame.
    events_by_sid : dict[int -> DatetimeIndex]
        Dict mapping sids to a series of dates. Each k:v pair of the series
        represents the date we learned of the event mapping to the date the
        event will occur.

    Returns
    -------
    prev_earnings: pd.DataFrame
        A DataFrame where each column is a security from `events_by_sid` where
        the values are the dates of the previous event that occured on the date
        of the index. Entries falling before the first date will have `NaT` as
        the result in the output.

    See Also
    --------
    next_date_frame
    """
    sids = list(events_by_sid)
    out = np.full((len(date_index), len(sids)), np_NaT, dtype='datetime64[ns]')
    dn = date_index[-1].asm8
    for col_idx, sid in enumerate(sids):
        # events_by_sid[sid] is Series mapping knowledge_date to actual
        # event_date.  We don't care about the knowledge date for
        # computing previous earnings.
        values = events_by_sid[sid].values
        values = values[values <= dn]
        out[date_index.searchsorted(values), col_idx] = values

    frame = pd.DataFrame(out, index=date_index, columns=sids)
    frame.ffill(inplace=True)
    return frame


1			import numpy as np
2			import pandas as pd
3			from six import iteritems
4			from six.moves import zip
5
6			from zipline.utils.numpy_utils import np_NaT
7
8
9			def next_date_frame(dates, events_by_sid):
10			"""
11			Make a DataFrame representing the simulated next known date for an event.
12
13			Parameters
14			----------
15			dates : pd.DatetimeIndex.
16			The index of the returned DataFrame.
17			events_by_sid : dict[int -> pd.Series]
18			Dict mapping sids to a series of dates. Each k:v pair of the series
19			represents the date we learned of the event mapping to the date the
20			event will occur.
21			Returns
22			-------
23			next_earnings: pd.DataFrame
24			A DataFrame where each column is a security from `events_by_sid` where
25			the values are the dates of the next known event with the knowledge we
26			had on the date of the index. Entries falling after the last date will
27			have `NaT` as the result in the output.
28
29
30			See Also
31			--------
32			previous_date_frame
33			"""
34			cols = {
35			equity: np.full_like(dates, np_NaT) for equity in events_by_sid
36			}
37			raw_dates = dates.values
38			for equity, event_dates in iteritems(events_by_sid):
39			data = cols[equity]
40			if not event_dates.index.is_monotonic_increasing:
41			event_dates = event_dates.sort_index()
42
43			# Iterate over the raw Series values, since we're comparing against
44			# numpy arrays anyway.
45			iterkv = zip(event_dates.index.values, event_dates.values)
46			for knowledge_date, event_date in iterkv:
47			date_mask = (
48			(knowledge_date <= raw_dates) &
49			(raw_dates <= event_date)
50			)
51			value_mask = (event_date <= data) \| (data == np_NaT)
52			data[date_mask & value_mask] = event_date
53
54			return pd.DataFrame(index=dates, data=cols)
55
56
57			def previous_date_frame(date_index, events_by_sid):
58			"""
59			Make a DataFrame representing simulated next earnings date_index.
60
61			Parameters
62			----------
63			date_index : DatetimeIndex.
64			The index of the returned DataFrame.
65			events_by_sid : dict[int -> DatetimeIndex]
66			Dict mapping sids to a series of dates. Each k:v pair of the series
67			represents the date we learned of the event mapping to the date the
68			event will occur.
69
70			Returns
71			-------
72			prev_earnings: pd.DataFrame
73			A DataFrame where each column is a security from `events_by_sid` where
74			the values are the dates of the previous event that occured on the date
75			of the index. Entries falling before the first date will have `NaT` as
76			the result in the output.
77
78			See Also
79			--------
80			next_date_frame
81			"""
82			sids = list(events_by_sid)
83			out = np.full((len(date_index), len(sids)), np_NaT, dtype='datetime64[ns]')
84			dn = date_index[-1].asm8
85			for col_idx, sid in enumerate(sids):
86			# events_by_sid[sid] is Series mapping knowledge_date to actual
87			# event_date. We don't care about the knowledge date for
88			# computing previous earnings.
89			values = events_by_sid[sid].values
90			values = values[values <= dn]
91			out[date_index.searchsorted(values), col_idx] = values
92
93			frame = pd.DataFrame(out, index=date_index, columns=sids)
94			frame.ffill(inplace=True)
95			return frame
96

quantopian / zipline

Pull Request — master (#940)

zipline.pipeline.loaders.next_date_frame() B

Complexity

Size

Duplication

Duplication Side-by-Side

Filter issues like