|
1
|
|
|
# |
|
2
|
|
|
# Copyright 2015 Quantopian, Inc. |
|
3
|
|
|
# |
|
4
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
|
5
|
|
|
# you may not use this file except in compliance with the License. |
|
6
|
|
|
# You may obtain a copy of the License at |
|
7
|
|
|
# |
|
8
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0 |
|
9
|
|
|
# |
|
10
|
|
|
# Unless required by applicable law or agreed to in writing, software |
|
11
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS, |
|
12
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
13
|
|
|
# See the License for the specific language governing permissions and |
|
14
|
|
|
# limitations under the License. |
|
15
|
|
|
from unittest import TestCase |
|
16
|
|
|
|
|
17
|
|
|
from nose_parameterized import parameterized |
|
18
|
|
|
from numpy import ( |
|
19
|
|
|
arange, |
|
20
|
|
|
datetime64, |
|
21
|
|
|
) |
|
22
|
|
|
from numpy.testing import ( |
|
23
|
|
|
assert_array_equal, |
|
24
|
|
|
) |
|
25
|
|
|
from pandas import ( |
|
26
|
|
|
DataFrame, |
|
27
|
|
|
DatetimeIndex, |
|
28
|
|
|
Timestamp, |
|
29
|
|
|
) |
|
30
|
|
|
from pandas.util.testing import assert_index_equal |
|
31
|
|
|
from testfixtures import TempDirectory |
|
32
|
|
|
|
|
33
|
|
|
from zipline.pipeline.loaders.synthetic import ( |
|
34
|
|
|
SyntheticDailyBarWriter, |
|
35
|
|
|
) |
|
36
|
|
|
from zipline.data.us_equity_pricing import ( |
|
37
|
|
|
BcolzDailyBarReader, |
|
38
|
|
|
NoDataOnDate |
|
39
|
|
|
) |
|
40
|
|
|
from zipline.finance.trading import TradingEnvironment |
|
41
|
|
|
from zipline.pipeline.data import USEquityPricing |
|
42
|
|
|
from zipline.utils.test_utils import ( |
|
43
|
|
|
seconds_to_timestamp, |
|
44
|
|
|
) |
|
45
|
|
|
|
|
46
|
|
|
TEST_CALENDAR_START = Timestamp('2015-06-01', tz='UTC') |
|
47
|
|
|
TEST_CALENDAR_STOP = Timestamp('2015-06-30', tz='UTC') |
|
48
|
|
|
|
|
49
|
|
|
TEST_QUERY_START = Timestamp('2015-06-10', tz='UTC') |
|
50
|
|
|
TEST_QUERY_STOP = Timestamp('2015-06-19', tz='UTC') |
|
51
|
|
|
|
|
52
|
|
|
# One asset for each of the cases enumerated in load_raw_arrays_from_bcolz. |
|
53
|
|
|
EQUITY_INFO = DataFrame( |
|
54
|
|
|
[ |
|
55
|
|
|
# 1) The equity's trades start and end before query. |
|
56
|
|
|
{'start_date': '2015-06-01', 'end_date': '2015-06-05'}, |
|
57
|
|
|
# 2) The equity's trades start and end after query. |
|
58
|
|
|
{'start_date': '2015-06-22', 'end_date': '2015-06-30'}, |
|
59
|
|
|
# 3) The equity's data covers all dates in range. |
|
60
|
|
|
{'start_date': '2015-06-02', 'end_date': '2015-06-30'}, |
|
61
|
|
|
# 4) The equity's trades start before the query start, but stop |
|
62
|
|
|
# before the query end. |
|
63
|
|
|
{'start_date': '2015-06-01', 'end_date': '2015-06-15'}, |
|
64
|
|
|
# 5) The equity's trades start and end during the query. |
|
65
|
|
|
{'start_date': '2015-06-12', 'end_date': '2015-06-18'}, |
|
66
|
|
|
# 6) The equity's trades start during the query, but extend through |
|
67
|
|
|
# the whole query. |
|
68
|
|
|
{'start_date': '2015-06-15', 'end_date': '2015-06-25'}, |
|
69
|
|
|
], |
|
70
|
|
|
index=arange(1, 7), |
|
71
|
|
|
columns=['start_date', 'end_date'], |
|
72
|
|
|
).astype(datetime64) |
|
73
|
|
|
|
|
74
|
|
|
TEST_QUERY_ASSETS = EQUITY_INFO.index |
|
75
|
|
|
|
|
76
|
|
|
|
|
77
|
|
|
class BcolzDailyBarTestCase(TestCase): |
|
78
|
|
|
|
|
79
|
|
|
@classmethod |
|
80
|
|
|
def setUpClass(cls): |
|
81
|
|
|
all_trading_days = TradingEnvironment().trading_days |
|
82
|
|
|
cls.trading_days = all_trading_days[ |
|
83
|
|
|
all_trading_days.get_loc(TEST_CALENDAR_START): |
|
84
|
|
|
all_trading_days.get_loc(TEST_CALENDAR_STOP) + 1 |
|
85
|
|
|
] |
|
86
|
|
|
|
|
87
|
|
|
def setUp(self): |
|
88
|
|
|
|
|
89
|
|
|
self.asset_info = EQUITY_INFO |
|
90
|
|
|
self.writer = SyntheticDailyBarWriter( |
|
91
|
|
|
self.asset_info, |
|
92
|
|
|
self.trading_days, |
|
93
|
|
|
) |
|
94
|
|
|
|
|
95
|
|
|
self.dir_ = TempDirectory() |
|
96
|
|
|
self.dir_.create() |
|
97
|
|
|
self.dest = self.dir_.getpath('daily_equity_pricing.bcolz') |
|
98
|
|
|
|
|
99
|
|
|
def tearDown(self): |
|
100
|
|
|
self.dir_.cleanup() |
|
101
|
|
|
|
|
102
|
|
|
@property |
|
103
|
|
|
def assets(self): |
|
104
|
|
|
return self.asset_info.index |
|
105
|
|
|
|
|
106
|
|
|
def trading_days_between(self, start, end): |
|
107
|
|
|
return self.trading_days[self.trading_days.slice_indexer(start, end)] |
|
108
|
|
|
|
|
109
|
|
|
def asset_start(self, asset_id): |
|
110
|
|
|
return self.writer.asset_start(asset_id) |
|
111
|
|
|
|
|
112
|
|
|
def asset_end(self, asset_id): |
|
113
|
|
|
return self.writer.asset_end(asset_id) |
|
114
|
|
|
|
|
115
|
|
|
def dates_for_asset(self, asset_id): |
|
116
|
|
|
start, end = self.asset_start(asset_id), self.asset_end(asset_id) |
|
117
|
|
|
return self.trading_days_between(start, end) |
|
118
|
|
|
|
|
119
|
|
|
def test_write_ohlcv_content(self): |
|
120
|
|
|
result = self.writer.write(self.dest, self.trading_days, self.assets) |
|
121
|
|
|
for column in SyntheticDailyBarWriter.OHLCV: |
|
122
|
|
|
idx = 0 |
|
123
|
|
|
data = result[column][:] |
|
124
|
|
|
multiplier = 1 if column == 'volume' else 1000 |
|
125
|
|
|
for asset_id in self.assets: |
|
126
|
|
|
for date in self.dates_for_asset(asset_id): |
|
127
|
|
|
self.assertEqual( |
|
128
|
|
|
SyntheticDailyBarWriter.expected_value( |
|
129
|
|
|
asset_id, |
|
130
|
|
|
date, |
|
131
|
|
|
column |
|
132
|
|
|
) * multiplier, |
|
133
|
|
|
data[idx], |
|
134
|
|
|
) |
|
135
|
|
|
idx += 1 |
|
136
|
|
|
self.assertEqual(idx, len(data)) |
|
137
|
|
|
|
|
138
|
|
|
def test_write_day_and_id(self): |
|
139
|
|
|
result = self.writer.write(self.dest, self.trading_days, self.assets) |
|
140
|
|
|
idx = 0 |
|
141
|
|
|
ids = result['id'] |
|
142
|
|
|
days = result['day'] |
|
143
|
|
|
for asset_id in self.assets: |
|
144
|
|
|
for date in self.dates_for_asset(asset_id): |
|
145
|
|
|
self.assertEqual(ids[idx], asset_id) |
|
146
|
|
|
self.assertEqual(date, seconds_to_timestamp(days[idx])) |
|
147
|
|
|
idx += 1 |
|
148
|
|
|
|
|
149
|
|
|
def test_write_attrs(self): |
|
150
|
|
|
result = self.writer.write(self.dest, self.trading_days, self.assets) |
|
151
|
|
|
expected_first_row = { |
|
152
|
|
|
'1': 0, |
|
153
|
|
|
'2': 5, # Asset 1 has 5 trading days. |
|
154
|
|
|
'3': 12, # Asset 2 has 7 trading days. |
|
155
|
|
|
'4': 33, # Asset 3 has 21 trading days. |
|
156
|
|
|
'5': 44, # Asset 4 has 11 trading days. |
|
157
|
|
|
'6': 49, # Asset 5 has 5 trading days. |
|
158
|
|
|
} |
|
159
|
|
|
expected_last_row = { |
|
160
|
|
|
'1': 4, |
|
161
|
|
|
'2': 11, |
|
162
|
|
|
'3': 32, |
|
163
|
|
|
'4': 43, |
|
164
|
|
|
'5': 48, |
|
165
|
|
|
'6': 57, # Asset 6 has 9 trading days. |
|
166
|
|
|
} |
|
167
|
|
|
expected_calendar_offset = { |
|
168
|
|
|
'1': 0, # Starts on 6-01, 1st trading day of month. |
|
169
|
|
|
'2': 15, # Starts on 6-22, 16th trading day of month. |
|
170
|
|
|
'3': 1, # Starts on 6-02, 2nd trading day of month. |
|
171
|
|
|
'4': 0, # Starts on 6-01, 1st trading day of month. |
|
172
|
|
|
'5': 9, # Starts on 6-12, 10th trading day of month. |
|
173
|
|
|
'6': 10, # Starts on 6-15, 11th trading day of month. |
|
174
|
|
|
} |
|
175
|
|
|
self.assertEqual(result.attrs['first_row'], expected_first_row) |
|
176
|
|
|
self.assertEqual(result.attrs['last_row'], expected_last_row) |
|
177
|
|
|
self.assertEqual( |
|
178
|
|
|
result.attrs['calendar_offset'], |
|
179
|
|
|
expected_calendar_offset, |
|
180
|
|
|
) |
|
181
|
|
|
assert_index_equal( |
|
182
|
|
|
self.trading_days, |
|
183
|
|
|
DatetimeIndex(result.attrs['calendar'], tz='UTC'), |
|
184
|
|
|
) |
|
185
|
|
|
|
|
186
|
|
|
def _check_read_results(self, columns, assets, start_date, end_date): |
|
187
|
|
|
table = self.writer.write(self.dest, self.trading_days, self.assets) |
|
188
|
|
|
reader = BcolzDailyBarReader(table) |
|
189
|
|
|
results = reader.load_raw_arrays(columns, start_date, end_date, assets) |
|
190
|
|
|
dates = self.trading_days_between(start_date, end_date) |
|
191
|
|
|
for column, result in zip(columns, results): |
|
192
|
|
|
assert_array_equal( |
|
193
|
|
|
result, |
|
194
|
|
|
self.writer.expected_values_2d( |
|
195
|
|
|
dates, |
|
196
|
|
|
assets, |
|
197
|
|
|
column.name, |
|
198
|
|
|
) |
|
199
|
|
|
) |
|
200
|
|
|
|
|
201
|
|
|
@parameterized.expand([ |
|
202
|
|
|
([USEquityPricing.open],), |
|
203
|
|
|
([USEquityPricing.close, USEquityPricing.volume],), |
|
204
|
|
|
([USEquityPricing.volume, USEquityPricing.high, USEquityPricing.low],), |
|
205
|
|
|
(USEquityPricing.columns,), |
|
206
|
|
|
]) |
|
207
|
|
|
def test_read(self, columns): |
|
208
|
|
|
self._check_read_results( |
|
209
|
|
|
columns, |
|
210
|
|
|
self.assets, |
|
211
|
|
|
TEST_QUERY_START, |
|
212
|
|
|
TEST_QUERY_STOP, |
|
213
|
|
|
) |
|
214
|
|
|
|
|
215
|
|
|
def test_start_on_asset_start(self): |
|
216
|
|
|
""" |
|
217
|
|
|
Test loading with queries that starts on the first day of each asset's |
|
218
|
|
|
lifetime. |
|
219
|
|
|
""" |
|
220
|
|
|
columns = [USEquityPricing.high, USEquityPricing.volume] |
|
221
|
|
|
for asset in self.assets: |
|
222
|
|
|
self._check_read_results( |
|
223
|
|
|
columns, |
|
224
|
|
|
self.assets, |
|
225
|
|
|
start_date=self.asset_start(asset), |
|
226
|
|
|
end_date=self.trading_days[-1], |
|
227
|
|
|
) |
|
228
|
|
|
|
|
229
|
|
|
def test_start_on_asset_end(self): |
|
230
|
|
|
""" |
|
231
|
|
|
Test loading with queries that start on the last day of each asset's |
|
232
|
|
|
lifetime. |
|
233
|
|
|
""" |
|
234
|
|
|
columns = [USEquityPricing.close, USEquityPricing.volume] |
|
235
|
|
|
for asset in self.assets: |
|
236
|
|
|
self._check_read_results( |
|
237
|
|
|
columns, |
|
238
|
|
|
self.assets, |
|
239
|
|
|
start_date=self.asset_end(asset), |
|
240
|
|
|
end_date=self.trading_days[-1], |
|
241
|
|
|
) |
|
242
|
|
|
|
|
243
|
|
|
def test_end_on_asset_start(self): |
|
244
|
|
|
""" |
|
245
|
|
|
Test loading with queries that end on the first day of each asset's |
|
246
|
|
|
lifetime. |
|
247
|
|
|
""" |
|
248
|
|
|
columns = [USEquityPricing.close, USEquityPricing.volume] |
|
249
|
|
|
for asset in self.assets: |
|
250
|
|
|
self._check_read_results( |
|
251
|
|
|
columns, |
|
252
|
|
|
self.assets, |
|
253
|
|
|
start_date=self.trading_days[0], |
|
254
|
|
|
end_date=self.asset_start(asset), |
|
255
|
|
|
) |
|
256
|
|
|
|
|
257
|
|
|
def test_end_on_asset_end(self): |
|
258
|
|
|
""" |
|
259
|
|
|
Test loading with queries that end on the last day of each asset's |
|
260
|
|
|
lifetime. |
|
261
|
|
|
""" |
|
262
|
|
|
columns = [USEquityPricing.close, USEquityPricing.volume] |
|
263
|
|
|
for asset in self.assets: |
|
264
|
|
|
self._check_read_results( |
|
265
|
|
|
columns, |
|
266
|
|
|
self.assets, |
|
267
|
|
|
start_date=self.trading_days[0], |
|
268
|
|
|
end_date=self.asset_end(asset), |
|
269
|
|
|
) |
|
270
|
|
|
|
|
271
|
|
|
def test_unadjusted_spot_price(self): |
|
272
|
|
|
table = self.writer.write(self.dest, self.trading_days, self.assets) |
|
273
|
|
|
reader = BcolzDailyBarReader(table) |
|
274
|
|
|
# At beginning |
|
275
|
|
|
price = reader.spot_price(1, Timestamp('2015-06-01', tz='UTC'), |
|
276
|
|
|
'close') |
|
277
|
|
|
# Synthetic writes price for date. |
|
278
|
|
|
self.assertEqual(135630.0, price) |
|
279
|
|
|
|
|
280
|
|
|
# Middle |
|
281
|
|
|
price = reader.spot_price(1, Timestamp('2015-06-02', tz='UTC'), |
|
282
|
|
|
'close') |
|
283
|
|
|
self.assertEqual(135631.0, price) |
|
284
|
|
|
# End |
|
285
|
|
|
price = reader.spot_price(1, Timestamp('2015-06-05', tz='UTC'), |
|
286
|
|
|
'close') |
|
287
|
|
|
self.assertEqual(135634.0, price) |
|
288
|
|
|
|
|
289
|
|
|
# Another sid at beginning. |
|
290
|
|
|
price = reader.spot_price(2, Timestamp('2015-06-22', tz='UTC'), |
|
291
|
|
|
'close') |
|
292
|
|
|
self.assertEqual(235651.0, price) |
|
293
|
|
|
|
|
294
|
|
|
# Ensure that volume does not have float adjustment applied. |
|
295
|
|
|
volume = reader.spot_price(1, Timestamp('2015-06-02', tz='UTC'), |
|
296
|
|
|
'volume') |
|
297
|
|
|
self.assertEqual(145631, volume) |
|
298
|
|
|
|
|
299
|
|
|
def test_unadjusted_spot_price_no_data(self): |
|
300
|
|
|
table = self.writer.write(self.dest, self.trading_days, self.assets) |
|
301
|
|
|
reader = BcolzDailyBarReader(table) |
|
302
|
|
|
# before |
|
303
|
|
|
with self.assertRaises(NoDataOnDate): |
|
304
|
|
|
reader.spot_price(2, Timestamp('2015-06-08', tz='UTC'), 'close') |
|
305
|
|
|
|
|
306
|
|
|
# after |
|
307
|
|
|
with self.assertRaises(NoDataOnDate): |
|
308
|
|
|
reader.spot_price(4, Timestamp('2015-06-16', tz='UTC'), 'close') |
|
309
|
|
|
|
|
310
|
|
|
def test_unadjusted_spot_price_empty_value(self): |
|
311
|
|
|
table = self.writer.write(self.dest, self.trading_days, self.assets) |
|
312
|
|
|
reader = BcolzDailyBarReader(table) |
|
313
|
|
|
|
|
314
|
|
|
# A sid, day and corresponding index into which to overwrite a zero. |
|
315
|
|
|
zero_sid = 1 |
|
316
|
|
|
zero_day = Timestamp('2015-06-02', tz='UTC') |
|
317
|
|
|
zero_ix = reader.sid_day_index(zero_sid, zero_day) |
|
318
|
|
|
|
|
319
|
|
|
# Write a zero into the synthetic pricing data at the day and sid, |
|
320
|
|
|
# so that a read should now return -1. |
|
321
|
|
|
# This a little hacky, in lieu of changing the synthetic data set. |
|
322
|
|
|
reader._spot_col('close')[zero_ix] = 0 |
|
323
|
|
|
|
|
324
|
|
|
close = reader.spot_price(zero_sid, zero_day, 'close') |
|
325
|
|
|
self.assertEqual(-1, close) |
|
326
|
|
|
|