zipline.pipeline.loaders.USEquityPricingLoader.from_files() - Code Metrics - Inspection of "Interactive conveniences" - quantopian/zipline - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#836)

unknown

created 2015-11-21 01:17 UTC

from_files() A

↳ Parent: zipline.pipeline.loaders.USEquityPricingLoader

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
cc	1
dl	0
loc	16
rs	9.4286

# Copyright 2015 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from numpy import (
    iinfo,
    uint32,
)

from zipline.data.us_equity_pricing import (
    BcolzDailyBarReader,
    SQLiteAdjustmentReader,
)
from zipline.lib.adjusted_array import (
    adjusted_array,
)
from zipline.errors import NoFurtherDataError

from .base import PipelineLoader

UINT32_MAX = iinfo(uint32).max


class USEquityPricingLoader(PipelineLoader):
    """
    PipelineLoader for US Equity Pricing data

    Delegates loading of baselines and adjustments.
    """

    def __init__(self, raw_price_loader, adjustments_loader):
        self.raw_price_loader = raw_price_loader
        # HACK: Pull the calendar off our raw_price_loader so that we can
        # backshift dates.
        self._calendar = self.raw_price_loader._calendar
        self.adjustments_loader = adjustments_loader

    @classmethod
    def from_files(cls, pricing_path, adjustments_path):
        """
        Create a loader from a bcolz equity pricing dir and a SQLite
        adjustments path.

        Parameters
        ----------
        pricing_path : str
            Path to a bcolz directory written by a BcolzDailyBarWriter.
        adjusments_path : str
            Path to an adjusments db written by a SQLiteAdjustmentWriter.
        """
        return cls(
            BcolzDailyBarReader(pricing_path),
            SQLiteAdjustmentReader(adjustments_path)
        )

    def load_adjusted_array(self, columns, dates, assets, mask):
        # load_adjusted_array is called with dates on which the user's algo
        # will be shown data, which means we need to return the data that would
        # be known at the start of each date.  We assume that the latest data
        # known on day N is the data from day (N - 1), so we shift all query
        # dates back by a day.
        start_date, end_date = _shift_dates(
            self._calendar, dates[0], dates[-1], shift=1,
        )

        raw_arrays = self.raw_price_loader.load_raw_arrays(
            columns,
            start_date,
            end_date,
            assets,
        )
        adjustments = self.adjustments_loader.load_adjustments(
            columns,
            dates,
            assets,
        )
        adjusted_arrays = [
            adjusted_array(raw_array, mask, col_adjustments)
            for raw_array, col_adjustments in zip(raw_arrays, adjustments)
        ]

        return dict(zip(columns, adjusted_arrays))


def _shift_dates(dates, start_date, end_date, shift):
    try:
        start = dates.get_loc(start_date)
    except KeyError:
        if start_date < dates[0]:
            raise NoFurtherDataError(
                msg=(
                    "Pipeline Query requested data starting on {query_start}, "
                    "but first known date is {calendar_start}"
                ).format(
                    query_start=str(start_date),
                    calendar_start=str(dates[0]),
                )
            )
        else:
            raise ValueError("Query start %s not in calendar" % start_date)

    # Make sure that shifting doesn't push us out of the calendar.
    if start < shift:
        raise NoFurtherDataError(
            msg=(
                "Pipeline Query requested data from {shift}"
                " days before {query_start}, but first known date is only "
                "{start} days earlier."
            ).format(shift=shift, query_start=start_date, start=start),
        )

    try:
        end = dates.get_loc(end_date)
    except KeyError:
        if end_date > dates[-1]:
            raise NoFurtherDataError(
                msg=(
                    "Pipeline Query requesting data up to {query_end}, "
                    "but last known date is {calendar_end}"
                ).format(
                    query_end=end_date,
                    calendar_end=dates[-1],
                )
            )
        else:
            raise ValueError("Query end %s not in calendar" % end_date)
    return dates[start - shift], dates[end - shift]


1			# Copyright 2015 Quantopian, Inc.
2			#
3			# Licensed under the Apache License, Version 2.0 (the "License");
4			# you may not use this file except in compliance with the License.
5			# You may obtain a copy of the License at
6			#
7			# http://www.apache.org/licenses/LICENSE-2.0
8			#
9			# Unless required by applicable law or agreed to in writing, software
10			# distributed under the License is distributed on an "AS IS" BASIS,
11			# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12			# See the License for the specific language governing permissions and
13			# limitations under the License.
14			from numpy import (
15			iinfo,
16			uint32,
17			)
18
19			from zipline.data.us_equity_pricing import (
20			BcolzDailyBarReader,
21			SQLiteAdjustmentReader,
22			)
23			from zipline.lib.adjusted_array import (
24			adjusted_array,
25			)
26			from zipline.errors import NoFurtherDataError
27
28			from .base import PipelineLoader
29
30			UINT32_MAX = iinfo(uint32).max
31
32
33			class USEquityPricingLoader(PipelineLoader):
34			"""
35			PipelineLoader for US Equity Pricing data
36
37			Delegates loading of baselines and adjustments.
38			"""
39
40			def __init__(self, raw_price_loader, adjustments_loader):
41			self.raw_price_loader = raw_price_loader
42			# HACK: Pull the calendar off our raw_price_loader so that we can
43			# backshift dates.
44			self._calendar = self.raw_price_loader._calendar
45			self.adjustments_loader = adjustments_loader
46
47			@classmethod
48			def from_files(cls, pricing_path, adjustments_path):
49			"""
50			Create a loader from a bcolz equity pricing dir and a SQLite
51			adjustments path.
52
53			Parameters
54			----------
55			pricing_path : str
56			Path to a bcolz directory written by a BcolzDailyBarWriter.
57			adjusments_path : str
58			Path to an adjusments db written by a SQLiteAdjustmentWriter.
59			"""
60			return cls(
61			BcolzDailyBarReader(pricing_path),
62			SQLiteAdjustmentReader(adjustments_path)
63			)
64
65			def load_adjusted_array(self, columns, dates, assets, mask):
66			# load_adjusted_array is called with dates on which the user's algo
67			# will be shown data, which means we need to return the data that would
68			# be known at the start of each date. We assume that the latest data
69			# known on day N is the data from day (N - 1), so we shift all query
70			# dates back by a day.
71			start_date, end_date = _shift_dates(
72			self._calendar, dates[0], dates[-1], shift=1,
73			)
74
75			raw_arrays = self.raw_price_loader.load_raw_arrays(
76			columns,
77			start_date,
78			end_date,
79			assets,
80			)
81			adjustments = self.adjustments_loader.load_adjustments(
82			columns,
83			dates,
84			assets,
85			)
86			adjusted_arrays = [
87			adjusted_array(raw_array, mask, col_adjustments)
88			for raw_array, col_adjustments in zip(raw_arrays, adjustments)
89			]
90
91			return dict(zip(columns, adjusted_arrays))
92
93
94			def _shift_dates(dates, start_date, end_date, shift):
95			try:
96			start = dates.get_loc(start_date)
97			except KeyError:
98			if start_date < dates[0]:
99			raise NoFurtherDataError(
100			msg=(
101			"Pipeline Query requested data starting on {query_start}, "
102			"but first known date is {calendar_start}"
103			).format(
104			query_start=str(start_date),
105			calendar_start=str(dates[0]),
106			)
107			)
108			else:
109			raise ValueError("Query start %s not in calendar" % start_date)
110
111			# Make sure that shifting doesn't push us out of the calendar.
112			if start < shift:
113			raise NoFurtherDataError(
114			msg=(
115			"Pipeline Query requested data from {shift}"
116			" days before {query_start}, but first known date is only "
117			"{start} days earlier."
118			).format(shift=shift, query_start=start_date, start=start),
119			)
120
121			try:
122			end = dates.get_loc(end_date)
123			except KeyError:
124			if end_date > dates[-1]:
125			raise NoFurtherDataError(
126			msg=(
127			"Pipeline Query requesting data up to {query_end}, "
128			"but last known date is {calendar_end}"
129			).format(
130			query_end=end_date,
131			calendar_end=dates[-1],
132			)
133			)
134			else:
135			raise ValueError("Query end %s not in calendar" % end_date)
136			return dates[start - shift], dates[end - shift]
137

quantopian / zipline

Pull Request — master (#836)

from_files() A

Complexity

Size

Duplication

Duplication Side-by-Side

Filter issues like