zipline.pipeline.loaders.next_date_frame() - Code Metrics - Inspection of "MAINT: Move code to shared modules" - quantopian/zipline - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#940)

by Joe

created 2016-01-05 19:32 UTC

zipline.pipeline.loaders.next_date_frame() B

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
cc	5
dl	0
loc	42
rs	8.0896

import numpy as np
import pandas as pd
from six import iteritems
from six.moves import zip

from zipline.utils.numpy_utils import np_NaT


def next_date_frame(dates, announcement_dates):
    """
    Make a DataFrame representing simulated next earnings dates.

    Parameters
    ----------
    dates : pd.DatetimeIndex.
        The index of the returned DataFrame.
    announcement_dates : dict[int -> pd.Series]
        Dict mapping sids to an index of dates on which earnings were announced
        for that sid.

    Returns
    -------
    next_earnings: pd.DataFrame
        A DataFrame representing, for each (label, date) pair, the first entry
        in `earnings_calendars[label]` on or after `date`.  Entries falling
        after the last date in a calendar will have `np_NaT` as the result in
        the output.

    See Also
    --------
    previous_earnings_date_frame
    """
    cols = {
        equity: np.full_like(dates, np_NaT) for equity in announcement_dates
    }
    raw_dates = dates.values
    for equity, earnings_dates in iteritems(announcement_dates):
        data = cols[equity]
        if not earnings_dates.index.is_monotonic_increasing:
            earnings_dates = earnings_dates.sort_index()

        # Iterate over the raw Series values, since we're comparing against
        # numpy arrays anyway.
        iterkv = zip(earnings_dates.index.values, earnings_dates.values)
        for timestamp, announce_date in iterkv:
            date_mask = (timestamp <= raw_dates) & (raw_dates <= announce_date)
            value_mask = (announce_date <= data) | (data == np_NaT)
            data[date_mask & value_mask] = announce_date

    return pd.DataFrame(index=dates, data=cols)


def previous_date_frame(dates, announcement_dates):
    """
    Make a DataFrame representing simulated next earnings dates.

    Parameters
    ----------
    dates : DatetimeIndex.
        The index of the returned DataFrame.
    announcement_dates : dict[int -> DatetimeIndex]
        Dict mapping sids to an index of dates on which earnings were announced
        for that sid.

    Returns
    -------
    prev_earnings: pd.DataFrame
        A DataFrame representing, for (label, date) pair, the first entry in
        `announcement_dates[label]` strictly before `date`.  Entries falling
        before the first date in a calendar will have `NaT` as the result in
        the output.

    See Also
    --------
    next_earnings_date_frame
    """
    sids = list(announcement_dates)
    out = np.full((len(dates), len(sids)), np_NaT, dtype='datetime64[ns]')
    dn = dates[-1].asm8
    for col_idx, sid in enumerate(sids):
        # announcement_dates[sid] is Series mapping knowledge_date to actual
        # announcement date.  We don't care about the knowledge date for
        # computing previous earnings.
        values = announcement_dates[sid].values
        values = values[values <= dn]
        out[dates.searchsorted(values), col_idx] = values

    frame = pd.DataFrame(out, index=dates, columns=sids)
    frame.ffill(inplace=True)
    return frame


1			import numpy as np
2			import pandas as pd
3			from six import iteritems
4			from six.moves import zip
5
6			from zipline.utils.numpy_utils import np_NaT
7
8
9			def next_date_frame(dates, announcement_dates):
10			"""
11			Make a DataFrame representing simulated next earnings dates.
12
13			Parameters
14			----------
15			dates : pd.DatetimeIndex.
16			The index of the returned DataFrame.
17			announcement_dates : dict[int -> pd.Series]
18			Dict mapping sids to an index of dates on which earnings were announced
19			for that sid.
20
21			Returns
22			-------
23			next_earnings: pd.DataFrame
24			A DataFrame representing, for each (label, date) pair, the first entry
25			in `earnings_calendars[label]` on or after `date`. Entries falling
26			after the last date in a calendar will have `np_NaT` as the result in
27			the output.
28
29			See Also
30			--------
31			previous_earnings_date_frame
32			"""
33			cols = {
34			equity: np.full_like(dates, np_NaT) for equity in announcement_dates
35			}
36			raw_dates = dates.values
37			for equity, earnings_dates in iteritems(announcement_dates):
38			data = cols[equity]
39			if not earnings_dates.index.is_monotonic_increasing:
40			earnings_dates = earnings_dates.sort_index()
41
42			# Iterate over the raw Series values, since we're comparing against
43			# numpy arrays anyway.
44			iterkv = zip(earnings_dates.index.values, earnings_dates.values)
45			for timestamp, announce_date in iterkv:
46			date_mask = (timestamp <= raw_dates) & (raw_dates <= announce_date)
47			value_mask = (announce_date <= data) \| (data == np_NaT)
48			data[date_mask & value_mask] = announce_date
49
50			return pd.DataFrame(index=dates, data=cols)
51
52
53			def previous_date_frame(dates, announcement_dates):
54			"""
55			Make a DataFrame representing simulated next earnings dates.
56
57			Parameters
58			----------
59			dates : DatetimeIndex.
60			The index of the returned DataFrame.
61			announcement_dates : dict[int -> DatetimeIndex]
62			Dict mapping sids to an index of dates on which earnings were announced
63			for that sid.
64
65			Returns
66			-------
67			prev_earnings: pd.DataFrame
68			A DataFrame representing, for (label, date) pair, the first entry in
69			`announcement_dates[label]` strictly before `date`. Entries falling
70			before the first date in a calendar will have `NaT` as the result in
71			the output.
72
73			See Also
74			--------
75			next_earnings_date_frame
76			"""
77			sids = list(announcement_dates)
78			out = np.full((len(dates), len(sids)), np_NaT, dtype='datetime64[ns]')
79			dn = dates[-1].asm8
80			for col_idx, sid in enumerate(sids):
81			# announcement_dates[sid] is Series mapping knowledge_date to actual
82			# announcement date. We don't care about the knowledge date for
83			# computing previous earnings.
84			values = announcement_dates[sid].values
85			values = values[values <= dn]
86			out[dates.searchsorted(values), col_idx] = values
87
88			frame = pd.DataFrame(out, index=dates, columns=sids)
89			frame.ffill(inplace=True)
90			return frame
91

quantopian / zipline

Pull Request — master (#940)

zipline.pipeline.loaders.next_date_frame() B

Complexity

Size

Duplication

Duplication Side-by-Side

Filter issues like