Completed
Pull Request — master (#836)
by
unknown
01:28
created

from_files()   A

Complexity

Conditions 1

Size

Total Lines 16

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 1
dl 0
loc 16
rs 9.4286
1
# Copyright 2015 Quantopian, Inc.
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License");
4
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
6
#
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14
from numpy import (
15
    iinfo,
16
    uint32,
17
)
18
19
from zipline.data.us_equity_pricing import (
20
    BcolzDailyBarReader,
21
    SQLiteAdjustmentReader,
22
)
23
from zipline.lib.adjusted_array import (
24
    adjusted_array,
25
)
26
from zipline.errors import NoFurtherDataError
27
28
from .base import PipelineLoader
29
30
UINT32_MAX = iinfo(uint32).max
31
32
33
class USEquityPricingLoader(PipelineLoader):
34
    """
35
    PipelineLoader for US Equity Pricing data
36
37
    Delegates loading of baselines and adjustments.
38
    """
39
40
    def __init__(self, raw_price_loader, adjustments_loader):
41
        self.raw_price_loader = raw_price_loader
42
        # HACK: Pull the calendar off our raw_price_loader so that we can
43
        # backshift dates.
44
        self._calendar = self.raw_price_loader._calendar
45
        self.adjustments_loader = adjustments_loader
46
47
    @classmethod
48
    def from_files(cls, pricing_path, adjustments_path):
49
        """
50
        Create a loader from a bcolz equity pricing dir and a SQLite
51
        adjustments path.
52
53
        Parameters
54
        ----------
55
        pricing_path : str
56
            Path to a bcolz directory written by a BcolzDailyBarWriter.
57
        adjusments_path : str
58
            Path to an adjusments db written by a SQLiteAdjustmentWriter.
59
        """
60
        return cls(
61
            BcolzDailyBarReader(pricing_path),
62
            SQLiteAdjustmentReader(adjustments_path)
63
        )
64
65
    def load_adjusted_array(self, columns, dates, assets, mask):
66
        # load_adjusted_array is called with dates on which the user's algo
67
        # will be shown data, which means we need to return the data that would
68
        # be known at the start of each date.  We assume that the latest data
69
        # known on day N is the data from day (N - 1), so we shift all query
70
        # dates back by a day.
71
        start_date, end_date = _shift_dates(
72
            self._calendar, dates[0], dates[-1], shift=1,
73
        )
74
75
        raw_arrays = self.raw_price_loader.load_raw_arrays(
76
            columns,
77
            start_date,
78
            end_date,
79
            assets,
80
        )
81
        adjustments = self.adjustments_loader.load_adjustments(
82
            columns,
83
            dates,
84
            assets,
85
        )
86
        adjusted_arrays = [
87
            adjusted_array(raw_array, mask, col_adjustments)
88
            for raw_array, col_adjustments in zip(raw_arrays, adjustments)
89
        ]
90
91
        return dict(zip(columns, adjusted_arrays))
92
93
94
def _shift_dates(dates, start_date, end_date, shift):
95
    try:
96
        start = dates.get_loc(start_date)
97
    except KeyError:
98
        if start_date < dates[0]:
99
            raise NoFurtherDataError(
100
                msg=(
101
                    "Pipeline Query requested data starting on {query_start}, "
102
                    "but first known date is {calendar_start}"
103
                ).format(
104
                    query_start=str(start_date),
105
                    calendar_start=str(dates[0]),
106
                )
107
            )
108
        else:
109
            raise ValueError("Query start %s not in calendar" % start_date)
110
111
    # Make sure that shifting doesn't push us out of the calendar.
112
    if start < shift:
113
        raise NoFurtherDataError(
114
            msg=(
115
                "Pipeline Query requested data from {shift}"
116
                " days before {query_start}, but first known date is only "
117
                "{start} days earlier."
118
            ).format(shift=shift, query_start=start_date, start=start),
119
        )
120
121
    try:
122
        end = dates.get_loc(end_date)
123
    except KeyError:
124
        if end_date > dates[-1]:
125
            raise NoFurtherDataError(
126
                msg=(
127
                    "Pipeline Query requesting data up to {query_end}, "
128
                    "but last known date is {calendar_end}"
129
                ).format(
130
                    query_end=end_date,
131
                    calendar_end=dates[-1],
132
                )
133
            )
134
        else:
135
            raise ValueError("Query end %s not in calendar" % end_date)
136
    return dates[start - shift], dates[end - shift]
137