Completed
Pull Request — master (#858)
by Eddie
01:54
created

zipline.sources.BenchmarkSource.__init__()   A

Complexity

Conditions 2

Size

Total Lines 20

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 2
dl 0
loc 20
rs 9.4286
1
from zipline.errors import (
2
    InvalidBenchmarkAsset,
3
    BenchmarkAssetNotAvailableTooEarly,
4
    BenchmarkAssetNotAvailableTooLate
5
)
6
from zipline.data.us_equity_pricing import NoDataOnDate
7
8
9
class BenchmarkSource(object):
10
    def __init__(self, benchmark_sid, env, trading_days, data_portal,
11
                 emission_rate="daily"):
12
        self.benchmark_sid = benchmark_sid
13
        self.env = env
14
        self.trading_days = trading_days
15
        self.emission_rate = emission_rate
16
        self.data_portal = data_portal
17
18
        if self.benchmark_sid:
19
            self.benchmark_asset = self.env.asset_finder.retrieve_asset(
20
                self.benchmark_sid)
21
22
            self._validate_benchmark()
23
24
        self.precalculated_series = \
25
            self._initialize_precalculated_series(
26
                self.benchmark_sid,
27
                self.env,
28
                self.trading_days,
29
                self.data_portal
30
            )
31
32
    def get_value(self, dt):
33
        return self.precalculated_series.loc[dt]
34
35
    def _validate_benchmark(self):
36
        # check if this security has a stock dividend.  if so, raise an
37
        # error suggesting that the user pick a different asset to use
38
        # as benchmark.
39
        stock_dividends = \
40
            self.data_portal.get_stock_dividends(self.benchmark_sid,
41
                                                 self.trading_days)
42
43
        if len(stock_dividends) > 0:
44
            raise InvalidBenchmarkAsset(
45
                sid=str(self.benchmark_sid),
46
                dt=stock_dividends[0]["ex_date"]
47
            )
48
49
        if self.benchmark_asset.start_date > self.trading_days[0]:
50
            # the asset started trading after the first simulation day
51
            raise BenchmarkAssetNotAvailableTooEarly(
52
                sid=str(self.benchmark_sid),
53
                dt=self.trading_days[0],
54
                start_dt=self.benchmark_asset.start_date
55
            )
56
57
        if self.benchmark_asset.end_date < self.trading_days[-1]:
58
            # the asset stopped trading before the last simulation day
59
            raise BenchmarkAssetNotAvailableTooLate(
60
                sid=str(self.benchmark_sid),
61
                dt=self.trading_days[0],
62
                end_dt=self.benchmark_asset.end_date
63
            )
64
65
    def _initialize_precalculated_series(self, sid, env, trading_days,
66
                                         data_portal):
67
        """
68
        Internal method that precalculates the benchmark return series for
69
        use in the simulation.
70
71
        Parameters
72
        ----------
73
        sid: (int) Asset to use
74
75
        env: TradingEnvironment
76
77
        trading_days: pd.DateTimeIndex
78
79
        data_portal: DataPortal
80
81
        Notes
82
        -----
83
        If the benchmark asset started trading after the simulation start,
84
        or finished trading before the simulation end, exceptions are raised.
85
86
        If the benchmark asset started trading the same day as the simulation
87
        start, the first available minute price on that day is used instead
88
        of the previous close.
89
90
        We use history to get an adjusted price history for each day's close,
91
        as of the look-back date (the last day of the simulation).  Prices are
92
        fully adjusted for dividends, splits, and mergers.
93
94
        Returns
95
        -------
96
        A pd.Series, indexed by trading day, whose values represent the %
97
        change from close to close.
98
        """
99
        if sid is None:
100
            # get benchmark info from trading environment, which defaults to
101
            # downloading data from Yahoo.
102
            daily_series = \
103
                env.benchmark_returns[trading_days[0]:trading_days[-1]]
104
105
            if self.emission_rate == "minute":
106
                # we need to take the env's benchmark returns, which are daily,
107
                # and resample them to minute
108
                minutes = env.minutes_for_days_in_range(
109
                    start=trading_days[0],
110
                    end=trading_days[-1]
111
                )
112
113
                minute_series = daily_series.reindex(
114
                    index=minutes,
115
                    method="ffill"
116
                )
117
118
                return minute_series
119
            else:
120
                return daily_series
121
        elif self.emission_rate == "minute":
122
            minutes = env.minutes_for_days_in_range(self.trading_days[0],
123
                                                    self.trading_days[-1])
124
            benchmark_series = data_portal.get_history_window(
125
                [sid],
126
                minutes[-1],
127
                bar_count=len(minutes) + 1,
128
                frequency="1m",
129
                field="price",
130
                ffill=True
131
            )
132
133
            return benchmark_series.pct_change()[1:]
134
        else:
135
            try:
136
                # get the window of close prices for benchmark_sid from the
137
                # last trading day of the simulation, going up to one day
138
                # before the simulation start day (so that we can get the %
139
                # change on day 1)
140
                benchmark_series = data_portal.get_history_window(
141
                    [sid],
142
                    trading_days[-1],
143
                    bar_count=len(trading_days) + 1,
144
                    frequency="1d",
145
                    field="price",
146
                    ffill=True
147
                )[sid]
148
                return benchmark_series.pct_change()[1:]
149
            except NoDataOnDate:
150
                # Attempt to handle case where stock data starts on first
151
                # day, in this case use the open to close return.
152
                benchmark_series = data_portal.get_history_window(
153
                    [sid],
154
                    trading_days[-1],
155
                    bar_count=len(trading_days),
156
                    frequency="1d",
157
                    field="price",
158
                    ffill=True
159
                )[sid]
160
161
                # get a minute history window of the first day
162
                first_open = data_portal.get_spot_value(
163
                    sid, 'open', trading_days[0])
164
                first_close = data_portal.get_spot_value(
165
                    sid, 'close', trading_days[0])
166
167
                first_day_return = (first_close - first_open) / first_open
168
169
                returns = benchmark_series.pct_change()[:]
170
                returns[0] = first_day_return
171
                return returns
172