1
|
|
|
# |
2
|
|
|
# Copyright 2015 Quantopian, Inc. |
3
|
|
|
# |
4
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
5
|
|
|
# you may not use this file except in compliance with the License. |
6
|
|
|
# You may obtain a copy of the License at |
7
|
|
|
# |
8
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0 |
9
|
|
|
# |
10
|
|
|
# Unless required by applicable law or agreed to in writing, software |
11
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS, |
12
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13
|
|
|
# See the License for the specific language governing permissions and |
14
|
|
|
# limitations under the License. |
15
|
|
|
from unittest import TestCase |
16
|
|
|
|
17
|
|
|
from nose_parameterized import parameterized |
18
|
|
|
from numpy import ( |
19
|
|
|
arange, |
20
|
|
|
datetime64, |
21
|
|
|
) |
22
|
|
|
from numpy.testing import ( |
23
|
|
|
assert_array_equal, |
24
|
|
|
) |
25
|
|
|
from pandas import ( |
26
|
|
|
DataFrame, |
27
|
|
|
DatetimeIndex, |
28
|
|
|
Timestamp, |
29
|
|
|
) |
30
|
|
|
from pandas.util.testing import assert_index_equal |
31
|
|
|
from testfixtures import TempDirectory |
32
|
|
|
|
33
|
|
|
from zipline.pipeline.loaders.synthetic import ( |
34
|
|
|
SyntheticDailyBarWriter, |
35
|
|
|
) |
36
|
|
|
from zipline.data.us_equity_pricing import ( |
37
|
|
|
BcolzDailyBarReader, |
38
|
|
|
NoDataOnDate |
39
|
|
|
) |
40
|
|
|
from zipline.finance.trading import TradingEnvironment |
41
|
|
|
from zipline.pipeline.data import USEquityPricing |
42
|
|
|
from zipline.utils.test_utils import ( |
43
|
|
|
seconds_to_timestamp, |
44
|
|
|
) |
45
|
|
|
|
46
|
|
|
TEST_CALENDAR_START = Timestamp('2015-06-01', tz='UTC') |
47
|
|
|
TEST_CALENDAR_STOP = Timestamp('2015-06-30', tz='UTC') |
48
|
|
|
|
49
|
|
|
TEST_QUERY_START = Timestamp('2015-06-10', tz='UTC') |
50
|
|
|
TEST_QUERY_STOP = Timestamp('2015-06-19', tz='UTC') |
51
|
|
|
|
52
|
|
|
# One asset for each of the cases enumerated in load_raw_arrays_from_bcolz. |
53
|
|
|
EQUITY_INFO = DataFrame( |
54
|
|
|
[ |
55
|
|
|
# 1) The equity's trades start and end before query. |
56
|
|
|
{'start_date': '2015-06-01', 'end_date': '2015-06-05'}, |
57
|
|
|
# 2) The equity's trades start and end after query. |
58
|
|
|
{'start_date': '2015-06-22', 'end_date': '2015-06-30'}, |
59
|
|
|
# 3) The equity's data covers all dates in range. |
60
|
|
|
{'start_date': '2015-06-02', 'end_date': '2015-06-30'}, |
61
|
|
|
# 4) The equity's trades start before the query start, but stop |
62
|
|
|
# before the query end. |
63
|
|
|
{'start_date': '2015-06-01', 'end_date': '2015-06-15'}, |
64
|
|
|
# 5) The equity's trades start and end during the query. |
65
|
|
|
{'start_date': '2015-06-12', 'end_date': '2015-06-18'}, |
66
|
|
|
# 6) The equity's trades start during the query, but extend through |
67
|
|
|
# the whole query. |
68
|
|
|
{'start_date': '2015-06-15', 'end_date': '2015-06-25'}, |
69
|
|
|
], |
70
|
|
|
index=arange(1, 7), |
71
|
|
|
columns=['start_date', 'end_date'], |
72
|
|
|
).astype(datetime64) |
73
|
|
|
|
74
|
|
|
TEST_QUERY_ASSETS = EQUITY_INFO.index |
75
|
|
|
|
76
|
|
|
|
77
|
|
|
class BcolzDailyBarTestCase(TestCase): |
78
|
|
|
|
79
|
|
|
@classmethod |
80
|
|
|
def setUpClass(cls): |
81
|
|
|
all_trading_days = TradingEnvironment().trading_days |
82
|
|
|
cls.trading_days = all_trading_days[ |
83
|
|
|
all_trading_days.get_loc(TEST_CALENDAR_START): |
84
|
|
|
all_trading_days.get_loc(TEST_CALENDAR_STOP) + 1 |
85
|
|
|
] |
86
|
|
|
|
87
|
|
|
def setUp(self): |
88
|
|
|
|
89
|
|
|
self.asset_info = EQUITY_INFO |
90
|
|
|
self.writer = SyntheticDailyBarWriter( |
91
|
|
|
self.asset_info, |
92
|
|
|
self.trading_days, |
93
|
|
|
) |
94
|
|
|
|
95
|
|
|
self.dir_ = TempDirectory() |
96
|
|
|
self.dir_.create() |
97
|
|
|
self.dest = self.dir_.getpath('daily_equity_pricing.bcolz') |
98
|
|
|
|
99
|
|
|
def tearDown(self): |
100
|
|
|
self.dir_.cleanup() |
101
|
|
|
|
102
|
|
|
@property |
103
|
|
|
def assets(self): |
104
|
|
|
return self.asset_info.index |
105
|
|
|
|
106
|
|
|
def trading_days_between(self, start, end): |
107
|
|
|
return self.trading_days[self.trading_days.slice_indexer(start, end)] |
108
|
|
|
|
109
|
|
|
def asset_start(self, asset_id): |
110
|
|
|
return self.writer.asset_start(asset_id) |
111
|
|
|
|
112
|
|
|
def asset_end(self, asset_id): |
113
|
|
|
return self.writer.asset_end(asset_id) |
114
|
|
|
|
115
|
|
|
def dates_for_asset(self, asset_id): |
116
|
|
|
start, end = self.asset_start(asset_id), self.asset_end(asset_id) |
117
|
|
|
return self.trading_days_between(start, end) |
118
|
|
|
|
119
|
|
|
def test_write_ohlcv_content(self): |
120
|
|
|
result = self.writer.write(self.dest, self.trading_days, self.assets) |
121
|
|
|
for column in SyntheticDailyBarWriter.OHLCV: |
122
|
|
|
idx = 0 |
123
|
|
|
data = result[column][:] |
124
|
|
|
multiplier = 1 if column == 'volume' else 1000 |
125
|
|
|
for asset_id in self.assets: |
126
|
|
|
for date in self.dates_for_asset(asset_id): |
127
|
|
|
self.assertEqual( |
128
|
|
|
SyntheticDailyBarWriter.expected_value( |
129
|
|
|
asset_id, |
130
|
|
|
date, |
131
|
|
|
column |
132
|
|
|
) * multiplier, |
133
|
|
|
data[idx], |
134
|
|
|
) |
135
|
|
|
idx += 1 |
136
|
|
|
self.assertEqual(idx, len(data)) |
137
|
|
|
|
138
|
|
|
def test_write_day_and_id(self): |
139
|
|
|
result = self.writer.write(self.dest, self.trading_days, self.assets) |
140
|
|
|
idx = 0 |
141
|
|
|
ids = result['id'] |
142
|
|
|
days = result['day'] |
143
|
|
|
for asset_id in self.assets: |
144
|
|
|
for date in self.dates_for_asset(asset_id): |
145
|
|
|
self.assertEqual(ids[idx], asset_id) |
146
|
|
|
self.assertEqual(date, seconds_to_timestamp(days[idx])) |
147
|
|
|
idx += 1 |
148
|
|
|
|
149
|
|
|
def test_write_attrs(self): |
150
|
|
|
result = self.writer.write(self.dest, self.trading_days, self.assets) |
151
|
|
|
expected_first_row = { |
152
|
|
|
'1': 0, |
153
|
|
|
'2': 5, # Asset 1 has 5 trading days. |
154
|
|
|
'3': 12, # Asset 2 has 7 trading days. |
155
|
|
|
'4': 33, # Asset 3 has 21 trading days. |
156
|
|
|
'5': 44, # Asset 4 has 11 trading days. |
157
|
|
|
'6': 49, # Asset 5 has 5 trading days. |
158
|
|
|
} |
159
|
|
|
expected_last_row = { |
160
|
|
|
'1': 4, |
161
|
|
|
'2': 11, |
162
|
|
|
'3': 32, |
163
|
|
|
'4': 43, |
164
|
|
|
'5': 48, |
165
|
|
|
'6': 57, # Asset 6 has 9 trading days. |
166
|
|
|
} |
167
|
|
|
expected_calendar_offset = { |
168
|
|
|
'1': 0, # Starts on 6-01, 1st trading day of month. |
169
|
|
|
'2': 15, # Starts on 6-22, 16th trading day of month. |
170
|
|
|
'3': 1, # Starts on 6-02, 2nd trading day of month. |
171
|
|
|
'4': 0, # Starts on 6-01, 1st trading day of month. |
172
|
|
|
'5': 9, # Starts on 6-12, 10th trading day of month. |
173
|
|
|
'6': 10, # Starts on 6-15, 11th trading day of month. |
174
|
|
|
} |
175
|
|
|
self.assertEqual(result.attrs['first_row'], expected_first_row) |
176
|
|
|
self.assertEqual(result.attrs['last_row'], expected_last_row) |
177
|
|
|
self.assertEqual( |
178
|
|
|
result.attrs['calendar_offset'], |
179
|
|
|
expected_calendar_offset, |
180
|
|
|
) |
181
|
|
|
assert_index_equal( |
182
|
|
|
self.trading_days, |
183
|
|
|
DatetimeIndex(result.attrs['calendar'], tz='UTC'), |
184
|
|
|
) |
185
|
|
|
|
186
|
|
|
def _check_read_results(self, columns, assets, start_date, end_date): |
187
|
|
|
table = self.writer.write(self.dest, self.trading_days, self.assets) |
188
|
|
|
reader = BcolzDailyBarReader(table) |
189
|
|
|
results = reader.load_raw_arrays(columns, start_date, end_date, assets) |
190
|
|
|
dates = self.trading_days_between(start_date, end_date) |
191
|
|
|
for column, result in zip(columns, results): |
192
|
|
|
assert_array_equal( |
193
|
|
|
result, |
194
|
|
|
self.writer.expected_values_2d( |
195
|
|
|
dates, |
196
|
|
|
assets, |
197
|
|
|
column.name, |
198
|
|
|
) |
199
|
|
|
) |
200
|
|
|
|
201
|
|
|
@parameterized.expand([ |
202
|
|
|
([USEquityPricing.open],), |
203
|
|
|
([USEquityPricing.close, USEquityPricing.volume],), |
204
|
|
|
([USEquityPricing.volume, USEquityPricing.high, USEquityPricing.low],), |
205
|
|
|
(USEquityPricing.columns,), |
206
|
|
|
]) |
207
|
|
|
def test_read(self, columns): |
208
|
|
|
self._check_read_results( |
209
|
|
|
columns, |
210
|
|
|
self.assets, |
211
|
|
|
TEST_QUERY_START, |
212
|
|
|
TEST_QUERY_STOP, |
213
|
|
|
) |
214
|
|
|
|
215
|
|
|
def test_start_on_asset_start(self): |
216
|
|
|
""" |
217
|
|
|
Test loading with queries that starts on the first day of each asset's |
218
|
|
|
lifetime. |
219
|
|
|
""" |
220
|
|
|
columns = [USEquityPricing.high, USEquityPricing.volume] |
221
|
|
|
for asset in self.assets: |
222
|
|
|
self._check_read_results( |
223
|
|
|
columns, |
224
|
|
|
self.assets, |
225
|
|
|
start_date=self.asset_start(asset), |
226
|
|
|
end_date=self.trading_days[-1], |
227
|
|
|
) |
228
|
|
|
|
229
|
|
|
def test_start_on_asset_end(self): |
230
|
|
|
""" |
231
|
|
|
Test loading with queries that start on the last day of each asset's |
232
|
|
|
lifetime. |
233
|
|
|
""" |
234
|
|
|
columns = [USEquityPricing.close, USEquityPricing.volume] |
235
|
|
|
for asset in self.assets: |
236
|
|
|
self._check_read_results( |
237
|
|
|
columns, |
238
|
|
|
self.assets, |
239
|
|
|
start_date=self.asset_end(asset), |
240
|
|
|
end_date=self.trading_days[-1], |
241
|
|
|
) |
242
|
|
|
|
243
|
|
|
def test_end_on_asset_start(self): |
244
|
|
|
""" |
245
|
|
|
Test loading with queries that end on the first day of each asset's |
246
|
|
|
lifetime. |
247
|
|
|
""" |
248
|
|
|
columns = [USEquityPricing.close, USEquityPricing.volume] |
249
|
|
|
for asset in self.assets: |
250
|
|
|
self._check_read_results( |
251
|
|
|
columns, |
252
|
|
|
self.assets, |
253
|
|
|
start_date=self.trading_days[0], |
254
|
|
|
end_date=self.asset_start(asset), |
255
|
|
|
) |
256
|
|
|
|
257
|
|
|
def test_end_on_asset_end(self): |
258
|
|
|
""" |
259
|
|
|
Test loading with queries that end on the last day of each asset's |
260
|
|
|
lifetime. |
261
|
|
|
""" |
262
|
|
|
columns = [USEquityPricing.close, USEquityPricing.volume] |
263
|
|
|
for asset in self.assets: |
264
|
|
|
self._check_read_results( |
265
|
|
|
columns, |
266
|
|
|
self.assets, |
267
|
|
|
start_date=self.trading_days[0], |
268
|
|
|
end_date=self.asset_end(asset), |
269
|
|
|
) |
270
|
|
|
|
271
|
|
|
def test_unadjusted_spot_price(self): |
272
|
|
|
table = self.writer.write(self.dest, self.trading_days, self.assets) |
273
|
|
|
reader = BcolzDailyBarReader(table) |
274
|
|
|
# At beginning |
275
|
|
|
price = reader.spot_price(1, Timestamp('2015-06-01', tz='UTC'), |
276
|
|
|
'close') |
277
|
|
|
# Synthetic writes price for date. |
278
|
|
|
self.assertEqual(135630.0, price) |
279
|
|
|
|
280
|
|
|
# Middle |
281
|
|
|
price = reader.spot_price(1, Timestamp('2015-06-02', tz='UTC'), |
282
|
|
|
'close') |
283
|
|
|
self.assertEqual(135631.0, price) |
284
|
|
|
# End |
285
|
|
|
price = reader.spot_price(1, Timestamp('2015-06-05', tz='UTC'), |
286
|
|
|
'close') |
287
|
|
|
self.assertEqual(135634.0, price) |
288
|
|
|
|
289
|
|
|
# Another sid at beginning. |
290
|
|
|
price = reader.spot_price(2, Timestamp('2015-06-22', tz='UTC'), |
291
|
|
|
'close') |
292
|
|
|
self.assertEqual(235651.0, price) |
293
|
|
|
|
294
|
|
|
# Ensure that volume does not have float adjustment applied. |
295
|
|
|
volume = reader.spot_price(1, Timestamp('2015-06-02', tz='UTC'), |
296
|
|
|
'volume') |
297
|
|
|
self.assertEqual(145631, volume) |
298
|
|
|
|
299
|
|
|
def test_unadjusted_spot_price_no_data(self): |
300
|
|
|
table = self.writer.write(self.dest, self.trading_days, self.assets) |
301
|
|
|
reader = BcolzDailyBarReader(table) |
302
|
|
|
# before |
303
|
|
|
with self.assertRaises(NoDataOnDate): |
304
|
|
|
reader.spot_price(2, Timestamp('2015-06-08', tz='UTC'), 'close') |
305
|
|
|
|
306
|
|
|
# after |
307
|
|
|
with self.assertRaises(NoDataOnDate): |
308
|
|
|
reader.spot_price(4, Timestamp('2015-06-16', tz='UTC'), 'close') |
309
|
|
|
|
310
|
|
|
def test_unadjusted_spot_price_empty_value(self): |
311
|
|
|
table = self.writer.write(self.dest, self.trading_days, self.assets) |
312
|
|
|
reader = BcolzDailyBarReader(table) |
313
|
|
|
|
314
|
|
|
# An index into which to write a zero, so that we don't. |
315
|
|
|
zero_sid = 1 |
316
|
|
|
zero_day = Timestamp('2015-06-02', tz='UTC') |
317
|
|
|
zero_ix = reader.sid_day_index(zero_sid, zero_day) |
318
|
|
|
|
319
|
|
|
# Write a zero into the synthetic pricing data at the day and sid, |
320
|
|
|
# so that a read should now return -1. |
321
|
|
|
# This a little hacky, in lieu of changing the synthetic data set. |
322
|
|
|
reader._spot_col('close')[zero_ix] = 0 |
323
|
|
|
|
324
|
|
|
close = reader.spot_price(zero_sid, zero_day, 'close') |
325
|
|
|
self.assertEqual(-1, close) |
326
|
|
|
|