Completed
Pull Request — master (#858)
by Eddie
02:03
created

zipline.sources.BenchmarkSource   A

Complexity

Total Complexity 11

Size/Duplication

Total Lines 148
Duplicated Lines 0 %
Metric Value
dl 0
loc 148
rs 10
wmc 11

4 Methods

Rating   Name   Duplication   Size   Complexity  
B _validate_benchmark() 0 28 4
B _initialize_precalculated_series() 0 92 4
A get_value() 0 2 1
A __init__() 0 20 2
1
from zipline.errors import (
2
    InvalidBenchmarkAsset,
3
    BenchmarkAssetNotAvailableTooEarly,
4
    BenchmarkAssetNotAvailableTooLate
5
)
6
7
8
class BenchmarkSource(object):
9
    def __init__(self, benchmark_sid, env, trading_days, data_portal,
10
                 emission_rate="daily"):
11
        self.benchmark_sid = benchmark_sid
12
        self.env = env
13
        self.trading_days = trading_days
14
        self.emission_rate = emission_rate
15
        self.data_portal = data_portal
16
17
        if self.benchmark_sid:
18
            self.benchmark_asset = self.env.asset_finder.retrieve_asset(
19
                self.benchmark_sid)
20
21
            self._validate_benchmark()
22
23
        self.precalculated_series = \
24
            self._initialize_precalculated_series(
25
                self.benchmark_sid,
26
                self.env,
27
                self.trading_days,
28
                self.data_portal
29
            )
30
31
    def get_value(self, dt):
32
        return self.precalculated_series.loc[dt]
33
34
    def _validate_benchmark(self):
35
        # check if this security has a stock dividend.  if so, raise an
36
        # error suggesting that the user pick a different asset to use
37
        # as benchmark.
38
        stock_dividends = \
39
            self.data_portal.get_stock_dividends(self.benchmark_sid,
40
                                                 self.trading_days)
41
42
        if len(stock_dividends) > 0:
43
            raise InvalidBenchmarkAsset(
44
                sid=str(self.benchmark_sid),
45
                dt=stock_dividends[0]["ex_date"]
46
            )
47
48
        if self.benchmark_asset.start_date > self.trading_days[0]:
49
            # the asset started trading after the first simulation day
50
            raise BenchmarkAssetNotAvailableTooEarly(
51
                sid=str(self.benchmark_sid),
52
                dt=self.trading_days[0],
53
                start_dt=self.benchmark_asset.start_date
54
            )
55
56
        if self.benchmark_asset.end_date < self.trading_days[-1]:
57
            # the asset stopped trading before the last simulation day
58
            raise BenchmarkAssetNotAvailableTooLate(
59
                sid=str(self.benchmark_sid),
60
                dt=self.trading_days[0],
61
                end_dt=self.benchmark_asset.end_date
62
            )
63
64
    def _initialize_precalculated_series(self, sid, env, trading_days,
65
                                         data_portal):
66
        """
67
        Internal method that precalculates the benchmark return series for
68
        use in the simulation.
69
70
        Parameters
71
        ----------
72
        sid: (int) Asset to use
73
74
        env: TradingEnvironment
75
76
        trading_days: pd.DateTimeIndex
77
78
        data_portal: DataPortal
79
80
        Notes
81
        -----
82
        If the benchmark asset started trading after the simulation start,
83
        or finished trading before the simulation end, exceptions are raised.
84
85
        If the benchmark asset started trading the same day as the simulation
86
        start, the first available minute price on that day is used instead
87
        of the previous close.
88
89
        We use history to get an adjusted price history for each day's close,
90
        as of the look-back date (the last day of the simulation).  Prices are
91
        fully adjusted for dividends, splits, and mergers.
92
93
        Returns
94
        -------
95
        A pd.Series, indexed by trading day, whose values represent the %
96
        change from close to close.
97
        """
98
        if sid is None:
99
            # get benchmark info from trading environment
100
            return env.benchmark_returns[trading_days[0]:trading_days[-1]]
101
        elif self.emission_rate == "minute":
102
            minutes = env.minutes_for_days_in_range(self.trading_days[0],
103
                                                    self.trading_days[-1])
104
            benchmark_series = data_portal.get_history_window(
105
                [sid],
106
                minutes[-1],
107
                bar_count=len(minutes) + 1,
108
                frequency="1m",
109
                field="price",
110
                ffill=True
111
            )
112
113
            return benchmark_series.pct_change()[1:]
114
        else:
115
            # get the window of close prices for benchmark_sid from the last
116
            # trading day of the simulation, going up to one day before the
117
            # simulation start day (so that we can get the % change on day 1)
118
            benchmark_series = data_portal.get_history_window(
119
                [sid],
120
                trading_days[-1],
121
                bar_count=len(trading_days) + 1,
122
                frequency="1d",
123
                field="price",
124
                ffill=True
125
            )[sid]
126
127
            # now, we need to check if we can safely go use the
128
            # one-day-before-sim-start value, by seeing if the asset was
129
            # trading that day.
130
            trading_day_before_sim_start = \
131
                env.previous_trading_day(trading_days[0])
132
133
            if self.benchmark_asset.start_date > trading_day_before_sim_start:
134
                # we can't go back one day before sim start, because the asset
135
                # didn't start trading until the same day as the sim start.
136
                # instead, we'll use the first available minute value of the
137
                # first sim day.
138
                minutes_in_first_day = \
139
                    env.market_minutes_for_day(trading_days[0])
140
141
                # get a minute history window of the first day
142
                minute_window = data_portal.get_history_window(
143
                    [sid],
144
                    minutes_in_first_day[-1],
145
                    bar_count=len(minutes_in_first_day),
146
                    frequency="1m",
147
                    field="price",
148
                    ffill=True
149
                )[sid]
150
151
                # find the first non-zero value
152
                value_to_use = minute_window[minute_window != 0][0]
153
                benchmark_series[0] = value_to_use
154
155
            return benchmark_series.pct_change()[1:]
156