zipline.pipeline.loaders.blaze.BlazeEarningsCalendarLoader.load_adjusted_array() - Code Metrics - Inspection of "MAINT: Move code to shared modules" - quantopian/zipline - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#940)

by Joe

created 2016-01-08 18:21 UTC

load_adjusted_array() B

↳ Parent: zipline.pipeline.loaders.blaze.BlazeEarningsCalendarLoader

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
cc	2
dl	0
loc	29
rs	8.8571

1 Method

Rating	Name	Duplication	Size	Complexity
A	zipline.pipeline.loaders.blaze.BlazeEarningsCalendarLoader.mkseries()	0	7	1

from datashape import istabular
import pandas as pd
from toolz import valmap

from .core import (
    TS_FIELD_NAME,
    SID_FIELD_NAME,
    bind_expression_to_resources,
    ffill_query_in_range,
)
from zipline.pipeline.data import EarningsCalendar
from zipline.pipeline.loaders.base import PipelineLoader
from zipline.pipeline.loaders.earnings import EarningsCalendarLoader


ANNOUNCEMENT_FIELD_NAME = 'announcement_date'


class BlazeEarningsCalendarLoader(PipelineLoader):
    """A pipeline loader for the ``EarningsCalendar`` dataset that loads
    data from a blaze expression.

    Parameters
    ----------
    expr : Expr
        The expression representing the data to load.
    resources : dict, optional
        Mapping from the atomic terms of ``expr`` to actual data resources.
    odo_kwargs : dict, optional
        Extra keyword arguments to pass to odo when executing the expression.

    Notes
    -----
    The expression should have a tabular dshape of::

       Dim * {{
           {SID_FIELD_NAME}: int64,
           {TS_FIELD_NAME}: datetime,
           {ANNOUNCEMENT_FIELD_NAME}: ?datetime,
       }}

    Where each row of the table is a record including the sid to identify the
    company, the timestamp where we learned about the announcement, and the
    date when the earnings will be announced.

    If the '{TS_FIELD_NAME}' field is not included it is assumed that we
    start the backtest with knowledge of all announcements.
    """
    __doc__ = __doc__.format(
        TS_FIELD_NAME=TS_FIELD_NAME,
        SID_FIELD_NAME=SID_FIELD_NAME,
        ANNOUNCEMENT_FIELD_NAME=ANNOUNCEMENT_FIELD_NAME,
    )

    _expected_fields = frozenset({
        TS_FIELD_NAME,
        SID_FIELD_NAME,
        ANNOUNCEMENT_FIELD_NAME,
    })

    def __init__(self,
                 expr,
                 resources=None,
                 odo_kwargs=None,
                 dataset=EarningsCalendar):
        dshape = expr.dshape

        if not istabular(dshape):
            raise ValueError(
                'expression dshape must be tabular, got: %s' % dshape,
            )

        expected_fields = self._expected_fields
        self._expr = bind_expression_to_resources(
            expr[list(expected_fields)],
            resources,
        )
        self._odo_kwargs = odo_kwargs if odo_kwargs is not None else {}
        self._dataset = dataset

    def load_adjusted_array(self, columns, dates, assets, mask):
        raw = ffill_query_in_range(
            self._expr,
            dates[0],
            dates[-1],
            self._odo_kwargs,
        )
        sids = raw.loc[:, SID_FIELD_NAME]
        raw.drop(
            sids[~sids.isin(assets)].index,
            inplace=True
        )

        gb = raw.groupby(SID_FIELD_NAME)

        def mkseries(idx, raw_loc=raw.loc):
            vs = raw_loc[
                idx, [TS_FIELD_NAME, ANNOUNCEMENT_FIELD_NAME]
            ].values
            return pd.Series(
                index=pd.DatetimeIndex(vs[:, 0]),
                data=vs[:, 1],
            )

        return EarningsCalendarLoader(
            dates,
            valmap(mkseries, gb.groups),
            dataset=self._dataset,
        ).load_adjusted_array(columns, dates, assets, mask)


1			from datashape import istabular
2			import pandas as pd
3			from toolz import valmap
4
5			from .core import (
6			TS_FIELD_NAME,
7			SID_FIELD_NAME,
8			bind_expression_to_resources,
9			ffill_query_in_range,
10			)
11			from zipline.pipeline.data import EarningsCalendar
12			from zipline.pipeline.loaders.base import PipelineLoader
13			from zipline.pipeline.loaders.earnings import EarningsCalendarLoader
14
15
16			ANNOUNCEMENT_FIELD_NAME = 'announcement_date'
17
18
19			class BlazeEarningsCalendarLoader(PipelineLoader):
20			"""A pipeline loader for the ``EarningsCalendar`` dataset that loads
21			data from a blaze expression.
22
23			Parameters
24			----------
25			expr : Expr
26			The expression representing the data to load.
27			resources : dict, optional
28			Mapping from the atomic terms of ``expr`` to actual data resources.
29			odo_kwargs : dict, optional
30			Extra keyword arguments to pass to odo when executing the expression.
31
32			Notes
33			-----
34			The expression should have a tabular dshape of::
35
36			Dim * {{
37			{SID_FIELD_NAME}: int64,
38			{TS_FIELD_NAME}: datetime,
39			{ANNOUNCEMENT_FIELD_NAME}: ?datetime,
40			}}
41
42			Where each row of the table is a record including the sid to identify the
43			company, the timestamp where we learned about the announcement, and the
44			date when the earnings will be announced.
45
46			If the '{TS_FIELD_NAME}' field is not included it is assumed that we
47			start the backtest with knowledge of all announcements.
48			"""
49			__doc__ = __doc__.format(
50			TS_FIELD_NAME=TS_FIELD_NAME,
51			SID_FIELD_NAME=SID_FIELD_NAME,
52			ANNOUNCEMENT_FIELD_NAME=ANNOUNCEMENT_FIELD_NAME,
53			)
54
55			_expected_fields = frozenset({
56			TS_FIELD_NAME,
57			SID_FIELD_NAME,
58			ANNOUNCEMENT_FIELD_NAME,
59			})
60
61			def __init__(self,
62			expr,
63			resources=None,
64			odo_kwargs=None,
65			dataset=EarningsCalendar):
66			dshape = expr.dshape
67
68			if not istabular(dshape):
69			raise ValueError(
70			'expression dshape must be tabular, got: %s' % dshape,
71			)
72
73			expected_fields = self._expected_fields
74			self._expr = bind_expression_to_resources(
75			expr[list(expected_fields)],
76			resources,
77			)
78			self._odo_kwargs = odo_kwargs if odo_kwargs is not None else {}
79			self._dataset = dataset
80
81			def load_adjusted_array(self, columns, dates, assets, mask):
82			raw = ffill_query_in_range(
83			self._expr,
84			dates[0],
85			dates[-1],
86			self._odo_kwargs,
87			)
88			sids = raw.loc[:, SID_FIELD_NAME]
89			raw.drop(
90			sids[~sids.isin(assets)].index,
91			inplace=True
92			)
93
94			gb = raw.groupby(SID_FIELD_NAME)
95
96			def mkseries(idx, raw_loc=raw.loc):
97			vs = raw_loc[
98			idx, [TS_FIELD_NAME, ANNOUNCEMENT_FIELD_NAME]
99			].values
100			return pd.Series(
101			index=pd.DatetimeIndex(vs[:, 0]),
102			data=vs[:, 1],
103			)
104
105			return EarningsCalendarLoader(
106			dates,
107			valmap(mkseries, gb.groups),
108			dataset=self._dataset,
109			).load_adjusted_array(columns, dates, assets, mask)
110

quantopian / zipline

Pull Request — master (#940)

load_adjusted_array() B

Complexity

Size

Duplication

1 Method

Duplication Side-by-Side

Filter issues like