1
|
|
|
from os.path import dirname, join, realpath |
2
|
|
|
from textwrap import dedent |
3
|
|
|
from unittest import TestCase |
4
|
|
|
import bcolz |
5
|
|
|
from datetime import timedelta |
6
|
|
|
from nose_parameterized import parameterized |
7
|
|
|
from pandas.tslib import normalize_date |
8
|
|
|
from testfixtures import TempDirectory |
9
|
|
|
import numpy as np |
10
|
|
|
from numpy import array |
11
|
|
|
import pandas as pd |
12
|
|
|
from pandas import ( |
13
|
|
|
read_csv, |
14
|
|
|
Timestamp, |
15
|
|
|
DataFrame, DatetimeIndex) |
16
|
|
|
|
17
|
|
|
from six import iteritems |
18
|
|
|
from zipline import TradingAlgorithm |
19
|
|
|
|
20
|
|
|
from zipline.data.data_portal import DataPortal |
21
|
|
|
from zipline.data.us_equity_pricing import ( |
22
|
|
|
DailyBarWriterFromCSVs, |
23
|
|
|
SQLiteAdjustmentWriter, |
24
|
|
|
SQLiteAdjustmentReader, |
25
|
|
|
) |
26
|
|
|
from zipline.errors import HistoryInInitialize |
27
|
|
|
from zipline.utils.test_utils import ( |
28
|
|
|
make_simple_asset_info, |
29
|
|
|
str_to_seconds, |
30
|
|
|
MockDailyBarReader |
31
|
|
|
) |
32
|
|
|
from zipline.data.minute_writer import MinuteBarWriterFromCSVs |
33
|
|
|
from zipline.utils.tradingcalendar import trading_days |
34
|
|
|
from zipline.finance.trading import ( |
35
|
|
|
TradingEnvironment, |
36
|
|
|
SimulationParameters |
37
|
|
|
) |
38
|
|
|
|
39
|
|
|
TEST_MINUTE_RESOURCE_PATH = join( |
40
|
|
|
dirname(dirname(realpath(__file__))), # zipline_repo/tests |
41
|
|
|
'tests', |
42
|
|
|
'resources', |
43
|
|
|
'history_inputs', |
44
|
|
|
) |
45
|
|
|
|
46
|
|
|
TEST_DAILY_RESOURCE_PATH = join( |
47
|
|
|
dirname(dirname(realpath(__file__))), # zipline_repo/tests |
48
|
|
|
'tests', |
49
|
|
|
'resources', |
50
|
|
|
'pipeline_inputs', |
51
|
|
|
) |
52
|
|
|
|
53
|
|
|
|
54
|
|
|
class HistoryTestCase(TestCase): |
55
|
|
|
@classmethod |
56
|
|
|
def setUpClass(cls): |
57
|
|
|
cls.AAPL = 1 |
58
|
|
|
cls.MSFT = 2 |
59
|
|
|
cls.DELL = 3 |
60
|
|
|
cls.TSLA = 4 |
61
|
|
|
cls.BRKA = 5 |
62
|
|
|
cls.IBM = 6 |
63
|
|
|
cls.GS = 7 |
64
|
|
|
cls.C = 8 |
65
|
|
|
cls.DIVIDEND_SID = 9 |
66
|
|
|
cls.FUTURE_ASSET = 10 |
67
|
|
|
cls.FUTURE_ASSET2 = 11 |
68
|
|
|
cls.assets = [cls.AAPL, cls.MSFT, cls.DELL, cls.TSLA, cls.BRKA, |
69
|
|
|
cls.IBM, cls.GS, cls.C, cls.DIVIDEND_SID] |
70
|
|
|
|
71
|
|
|
asset_info = make_simple_asset_info( |
72
|
|
|
cls.assets, |
73
|
|
|
Timestamp('2014-03-03'), |
74
|
|
|
Timestamp('2014-08-30'), |
75
|
|
|
['AAPL', 'MSFT', 'DELL', 'TSLA', 'BRKA', 'IBM', 'GS', 'C', |
76
|
|
|
'DIVIDEND_SID'] |
77
|
|
|
) |
78
|
|
|
cls.env = TradingEnvironment() |
79
|
|
|
|
80
|
|
|
cls.env.write_data( |
81
|
|
|
equities_df=asset_info, |
82
|
|
|
futures_data={ |
83
|
|
|
cls.FUTURE_ASSET: { |
84
|
|
|
"start_date": pd.Timestamp('2015-11-23', tz='UTC'), |
85
|
|
|
"end_date": pd.Timestamp('2014-12-01', tz='UTC'), |
86
|
|
|
'symbol': 'TEST_FUTURE', |
87
|
|
|
'asset_type': 'future', |
88
|
|
|
}, |
89
|
|
|
cls.FUTURE_ASSET2: { |
90
|
|
|
"start_date": pd.Timestamp('2014-03-19', tz='UTC'), |
91
|
|
|
"end_date": pd.Timestamp('2014-03-22', tz='UTC'), |
92
|
|
|
'symbol': 'TEST_FUTURE2', |
93
|
|
|
'asset_type': 'future', |
94
|
|
|
} |
95
|
|
|
} |
96
|
|
|
) |
97
|
|
|
|
98
|
|
|
cls.tempdir = TempDirectory() |
99
|
|
|
cls.tempdir.create() |
100
|
|
|
|
101
|
|
|
try: |
102
|
|
|
cls.create_fake_minute_data(cls.tempdir) |
103
|
|
|
|
104
|
|
|
cls.futures_start_dates = { |
105
|
|
|
cls.FUTURE_ASSET: pd.Timestamp("2015-11-23 20:11", tz='UTC'), |
106
|
|
|
cls.FUTURE_ASSET2: pd.Timestamp("2014-03-19 13:31", tz='UTC') |
107
|
|
|
} |
108
|
|
|
|
109
|
|
|
cls.create_fake_futures_minute_data( |
110
|
|
|
cls.tempdir, |
111
|
|
|
cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET), |
112
|
|
|
cls.futures_start_dates[cls.FUTURE_ASSET], |
113
|
|
|
cls.futures_start_dates[cls.FUTURE_ASSET] + |
114
|
|
|
timedelta(minutes=10000) |
115
|
|
|
) |
116
|
|
|
|
117
|
|
|
# build data for FUTURE_ASSET2 from 2014-03-19 13:31 to |
118
|
|
|
# 2014-03-21 20:00 |
119
|
|
|
cls.create_fake_futures_minute_data( |
120
|
|
|
cls.tempdir, |
121
|
|
|
cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET2), |
122
|
|
|
cls.futures_start_dates[cls.FUTURE_ASSET2], |
123
|
|
|
cls.futures_start_dates[cls.FUTURE_ASSET2] + |
124
|
|
|
timedelta(minutes=3270) |
125
|
|
|
) |
126
|
|
|
|
127
|
|
|
cls.create_fake_daily_data(cls.tempdir) |
128
|
|
|
|
129
|
|
|
splits = DataFrame([ |
130
|
|
|
{'effective_date': str_to_seconds("2002-01-03"), |
131
|
|
|
'ratio': 0.5, |
132
|
|
|
'sid': cls.AAPL}, |
133
|
|
|
{'effective_date': str_to_seconds("2014-03-20"), |
134
|
|
|
'ratio': 0.5, |
135
|
|
|
'sid': cls.AAPL}, |
136
|
|
|
{'effective_date': str_to_seconds("2014-03-21"), |
137
|
|
|
'ratio': 0.5, |
138
|
|
|
'sid': cls.AAPL}, |
139
|
|
|
{'effective_date': str_to_seconds("2014-04-01"), |
140
|
|
|
'ratio': 0.5, |
141
|
|
|
'sid': cls.IBM}, |
142
|
|
|
{'effective_date': str_to_seconds("2014-07-01"), |
143
|
|
|
'ratio': 0.5, |
144
|
|
|
'sid': cls.IBM}, |
145
|
|
|
{'effective_date': str_to_seconds("2014-07-07"), |
146
|
|
|
'ratio': 0.5, |
147
|
|
|
'sid': cls.IBM}], |
148
|
|
|
columns=['effective_date', 'ratio', 'sid'], |
149
|
|
|
) |
150
|
|
|
|
151
|
|
|
mergers = DataFrame([ |
152
|
|
|
{'effective_date': str_to_seconds("2014-07-16"), |
153
|
|
|
'ratio': 0.5, |
154
|
|
|
'sid': cls.C} |
155
|
|
|
], |
156
|
|
|
columns=['effective_date', 'ratio', 'sid']) |
157
|
|
|
|
158
|
|
|
dividends = DataFrame([ |
159
|
|
|
{'ex_date': |
160
|
|
|
Timestamp("2014-03-18", tz='UTC').to_datetime64(), |
161
|
|
|
'record_date': |
162
|
|
|
Timestamp("2014-03-19", tz='UTC').to_datetime64(), |
163
|
|
|
'declared_date': |
164
|
|
|
Timestamp("2014-03-18", tz='UTC').to_datetime64(), |
165
|
|
|
'pay_date': |
166
|
|
|
Timestamp("2014-03-20", tz='UTC').to_datetime64(), |
167
|
|
|
'amount': 2.0, |
168
|
|
|
'sid': cls.DIVIDEND_SID}, |
169
|
|
|
{'ex_date': |
170
|
|
|
Timestamp("2014-03-20", tz='UTC').to_datetime64(), |
171
|
|
|
'record_date': |
172
|
|
|
Timestamp("2014-03-21", tz='UTC').to_datetime64(), |
173
|
|
|
'declared_date': |
174
|
|
|
Timestamp("2014-03-18", tz='UTC').to_datetime64(), |
175
|
|
|
'pay_date': |
176
|
|
|
Timestamp("2014-03-23", tz='UTC').to_datetime64(), |
177
|
|
|
'amount': 4.0, |
178
|
|
|
'sid': cls.DIVIDEND_SID}], |
179
|
|
|
columns=['ex_date', |
180
|
|
|
'record_date', |
181
|
|
|
'declared_date', |
182
|
|
|
'pay_date', |
183
|
|
|
'amount', |
184
|
|
|
'sid']) |
185
|
|
|
|
186
|
|
|
cls.create_fake_adjustments(cls.tempdir, |
187
|
|
|
"adjustments.sqlite", |
188
|
|
|
splits=splits, |
189
|
|
|
mergers=mergers, |
190
|
|
|
dividends=dividends) |
191
|
|
|
except: |
192
|
|
|
cls.tempdir.cleanup() |
193
|
|
|
raise |
194
|
|
|
|
195
|
|
|
@classmethod |
196
|
|
|
def tearDownClass(cls): |
197
|
|
|
cls.tempdir.cleanup() |
198
|
|
|
|
199
|
|
|
@classmethod |
200
|
|
|
def create_fake_futures_minute_data(cls, tempdir, asset, start_dt, end_dt): |
201
|
|
|
num_minutes = int((end_dt - start_dt).total_seconds() / 60) |
202
|
|
|
|
203
|
|
|
# need to prepend one 0 per minute between normalize_date(start_dt) |
204
|
|
|
# and start_dt |
205
|
|
|
zeroes_buffer = \ |
206
|
|
|
[0] * int((start_dt - |
207
|
|
|
normalize_date(start_dt)).total_seconds() / 60) |
208
|
|
|
|
209
|
|
|
future_df = pd.DataFrame({ |
210
|
|
|
"open": np.array(zeroes_buffer + |
211
|
|
|
list(range(0, num_minutes))) * 1000, |
212
|
|
|
"high": np.array(zeroes_buffer + |
213
|
|
|
list(range(10000, 10000 + num_minutes))) * 1000, |
214
|
|
|
"low": np.array(zeroes_buffer + |
215
|
|
|
list(range(20000, 20000 + num_minutes))) * 1000, |
216
|
|
|
"close": np.array(zeroes_buffer + |
217
|
|
|
list(range(30000, 30000 + num_minutes))) * 1000, |
218
|
|
|
"volume": np.array(zeroes_buffer + |
219
|
|
|
list(range(40000, 40000 + num_minutes))) |
220
|
|
|
}) |
221
|
|
|
|
222
|
|
|
path = join(tempdir.path, "{0}.bcolz".format(asset.sid)) |
223
|
|
|
ctable = bcolz.ctable.fromdataframe(future_df, rootdir=path) |
224
|
|
|
|
225
|
|
|
ctable.attrs["start_dt"] = start_dt.value / 1e9 |
226
|
|
|
ctable.attrs["last_dt"] = end_dt.value / 1e9 |
227
|
|
|
|
228
|
|
|
@classmethod |
229
|
|
|
def create_fake_minute_data(cls, tempdir): |
230
|
|
|
resources = { |
231
|
|
|
cls.AAPL: join(TEST_MINUTE_RESOURCE_PATH, 'AAPL_minute.csv.gz'), |
232
|
|
|
cls.MSFT: join(TEST_MINUTE_RESOURCE_PATH, 'MSFT_minute.csv.gz'), |
233
|
|
|
cls.DELL: join(TEST_MINUTE_RESOURCE_PATH, 'DELL_minute.csv.gz'), |
234
|
|
|
cls.TSLA: join(TEST_MINUTE_RESOURCE_PATH, "TSLA_minute.csv.gz"), |
235
|
|
|
cls.BRKA: join(TEST_MINUTE_RESOURCE_PATH, "BRKA_minute.csv.gz"), |
236
|
|
|
cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"), |
237
|
|
|
cls.GS: |
238
|
|
|
join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"), # unused |
239
|
|
|
cls.C: join(TEST_MINUTE_RESOURCE_PATH, "C_minute.csv.gz"), |
240
|
|
|
cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH, |
241
|
|
|
"DIVIDEND_minute.csv.gz"), |
242
|
|
|
} |
243
|
|
|
|
244
|
|
|
MinuteBarWriterFromCSVs(resources).write(tempdir.path, cls.assets) |
245
|
|
|
|
246
|
|
|
@classmethod |
247
|
|
|
def create_fake_daily_data(cls, tempdir): |
248
|
|
|
resources = { |
249
|
|
|
cls.AAPL: join(TEST_DAILY_RESOURCE_PATH, 'AAPL.csv'), |
250
|
|
|
cls.MSFT: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'), |
251
|
|
|
cls.DELL: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'), # unused |
252
|
|
|
cls.TSLA: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'), # unused |
253
|
|
|
cls.BRKA: join(TEST_DAILY_RESOURCE_PATH, 'BRK-A.csv'), |
254
|
|
|
cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, 'IBM_daily.csv.gz'), |
255
|
|
|
cls.GS: join(TEST_MINUTE_RESOURCE_PATH, 'GS_daily.csv.gz'), |
256
|
|
|
cls.C: join(TEST_MINUTE_RESOURCE_PATH, 'C_daily.csv.gz'), |
257
|
|
|
cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH, |
258
|
|
|
'DIVIDEND_daily.csv.gz') |
259
|
|
|
} |
260
|
|
|
raw_data = { |
261
|
|
|
asset: read_csv(path, parse_dates=['day']).set_index('day') |
262
|
|
|
for asset, path in iteritems(resources) |
263
|
|
|
} |
264
|
|
|
for frame in raw_data.values(): |
265
|
|
|
frame['price'] = frame['close'] |
266
|
|
|
|
267
|
|
|
writer = DailyBarWriterFromCSVs(resources) |
268
|
|
|
data_path = tempdir.getpath('test_daily_data.bcolz') |
269
|
|
|
writer.write(data_path, trading_days, cls.assets) |
270
|
|
|
|
271
|
|
|
@classmethod |
272
|
|
|
def create_fake_adjustments(cls, tempdir, filename, |
273
|
|
|
splits=None, mergers=None, dividends=None): |
274
|
|
|
writer = SQLiteAdjustmentWriter(tempdir.getpath(filename), |
275
|
|
|
cls.env.trading_days, |
276
|
|
|
MockDailyBarReader()) |
277
|
|
|
|
278
|
|
|
if dividends is None: |
279
|
|
|
dividends = DataFrame( |
280
|
|
|
{ |
281
|
|
|
# Hackery to make the dtypes correct on an empty frame. |
282
|
|
|
'ex_date': array([], dtype='datetime64[ns]'), |
283
|
|
|
'pay_date': array([], dtype='datetime64[ns]'), |
284
|
|
|
'record_date': array([], dtype='datetime64[ns]'), |
285
|
|
|
'declared_date': array([], dtype='datetime64[ns]'), |
286
|
|
|
'amount': array([], dtype=float), |
287
|
|
|
'sid': array([], dtype=int), |
288
|
|
|
}, |
289
|
|
|
index=DatetimeIndex([], tz='UTC'), |
290
|
|
|
columns=['ex_date', |
291
|
|
|
'pay_date', |
292
|
|
|
'record_date', |
293
|
|
|
'declared_date', |
294
|
|
|
'amount', |
295
|
|
|
'sid'] |
296
|
|
|
) |
297
|
|
|
|
298
|
|
|
if splits is None: |
299
|
|
|
splits = DataFrame( |
300
|
|
|
{ |
301
|
|
|
# Hackery to make the dtypes correct on an empty frame. |
302
|
|
|
'effective_date': array([], dtype=int), |
303
|
|
|
'ratio': array([], dtype=float), |
304
|
|
|
'sid': array([], dtype=int), |
305
|
|
|
}, |
306
|
|
|
index=DatetimeIndex([], tz='UTC')) |
307
|
|
|
|
308
|
|
|
if mergers is None: |
309
|
|
|
mergers = DataFrame( |
310
|
|
|
{ |
311
|
|
|
# Hackery to make the dtypes correct on an empty frame. |
312
|
|
|
'effective_date': array([], dtype=int), |
313
|
|
|
'ratio': array([], dtype=float), |
314
|
|
|
'sid': array([], dtype=int), |
315
|
|
|
}, |
316
|
|
|
index=DatetimeIndex([], tz='UTC')) |
317
|
|
|
|
318
|
|
|
writer.write(splits, mergers, dividends) |
319
|
|
|
|
320
|
|
|
def get_portal(self, |
321
|
|
|
daily_equities_filename="test_daily_data.bcolz", |
322
|
|
|
adjustments_filename="adjustments.sqlite", |
323
|
|
|
env=None): |
324
|
|
|
|
325
|
|
|
if env is None: |
326
|
|
|
env = self.env |
327
|
|
|
|
328
|
|
|
temp_path = self.tempdir.path |
329
|
|
|
|
330
|
|
|
adjustment_reader = SQLiteAdjustmentReader( |
331
|
|
|
join(temp_path, adjustments_filename)) |
332
|
|
|
|
333
|
|
|
return DataPortal( |
334
|
|
|
env, |
335
|
|
|
minutes_equities_path=temp_path, |
336
|
|
|
daily_equities_path=join(temp_path, daily_equities_filename), |
337
|
|
|
adjustment_reader=adjustment_reader |
338
|
|
|
) |
339
|
|
|
|
340
|
|
|
def test_history_in_initialize(self): |
341
|
|
|
algo_text = dedent( |
342
|
|
|
"""\ |
343
|
|
|
from zipline.api import history |
344
|
|
|
|
345
|
|
|
def initialize(context): |
346
|
|
|
history([24], 10, '1d', 'price') |
347
|
|
|
|
348
|
|
|
def handle_data(context, data): |
349
|
|
|
pass |
350
|
|
|
""" |
351
|
|
|
) |
352
|
|
|
|
353
|
|
|
start = pd.Timestamp('2007-04-05', tz='UTC') |
354
|
|
|
end = pd.Timestamp('2007-04-10', tz='UTC') |
355
|
|
|
|
356
|
|
|
sim_params = SimulationParameters( |
357
|
|
|
period_start=start, |
358
|
|
|
period_end=end, |
359
|
|
|
capital_base=float("1.0e5"), |
360
|
|
|
data_frequency='minute', |
361
|
|
|
emission_rate='daily', |
362
|
|
|
env=self.env, |
363
|
|
|
) |
364
|
|
|
|
365
|
|
|
test_algo = TradingAlgorithm( |
366
|
|
|
script=algo_text, |
367
|
|
|
data_frequency='minute', |
368
|
|
|
sim_params=sim_params, |
369
|
|
|
env=self.env, |
370
|
|
|
) |
371
|
|
|
|
372
|
|
|
with self.assertRaises(HistoryInInitialize): |
373
|
|
|
test_algo.initialize() |
374
|
|
|
|
375
|
|
|
def test_minute_basic_functionality(self): |
376
|
|
|
# get a 5-bar minute history from the very end of the available data |
377
|
|
|
window = self.get_portal().get_history_window( |
378
|
|
|
[1], |
379
|
|
|
pd.Timestamp("2014-03-21 18:23:00+00:00", tz='UTC'), |
380
|
|
|
5, |
381
|
|
|
"1m", |
382
|
|
|
"open_price" |
383
|
|
|
) |
384
|
|
|
|
385
|
|
|
self.assertEqual(len(window), 5) |
386
|
|
|
reference = [534.469, 534.471, 534.475, 534.477, 534.477] |
387
|
|
|
for i in range(0, 4): |
388
|
|
|
self.assertEqual(window.iloc[-5 + i].loc[1], reference[i]) |
389
|
|
|
|
390
|
|
|
def test_minute_splits(self): |
391
|
|
|
portal = self.get_portal() |
392
|
|
|
|
393
|
|
|
window = portal.get_history_window( |
394
|
|
|
[1], |
395
|
|
|
pd.Timestamp("2014-03-21 18:30:00+00:00", tz='UTC'), |
396
|
|
|
1000, |
397
|
|
|
"1m", |
398
|
|
|
"open_price" |
399
|
|
|
) |
400
|
|
|
|
401
|
|
|
self.assertEqual(len(window), 1000) |
402
|
|
|
|
403
|
|
|
# there are two splits for AAPL (on 2014-03-20 and 2014-03-21), |
404
|
|
|
# each with ratio 0.5). |
405
|
|
|
|
406
|
|
|
day1_end = pd.Timestamp("2014-03-19 20:00", tz='UTC') |
407
|
|
|
day2_start = pd.Timestamp("2014-03-20 13:31", tz='UTC') |
408
|
|
|
day2_end = pd.Timestamp("2014-03-20 20:00", tz='UTC') |
409
|
|
|
day3_start = pd.Timestamp("2014-03-21 13:31", tz='UTC') |
410
|
|
|
|
411
|
|
|
self.assertEquals(window.loc[day1_end, 1], 533.086) |
412
|
|
|
self.assertEquals(window.loc[day2_start, 1], 533.087) |
413
|
|
|
self.assertEquals(window.loc[day2_end, 1], 533.853) |
414
|
|
|
self.assertEquals(window.loc[day3_start, 1], 533.854) |
415
|
|
|
|
416
|
|
|
def test_minute_window_starts_before_trading_start(self): |
417
|
|
|
portal = self.get_portal() |
418
|
|
|
|
419
|
|
|
# get a 50-bar minute history for MSFT starting 5 minutes into 3/20, |
420
|
|
|
# its first trading day |
421
|
|
|
window = portal.get_history_window( |
422
|
|
|
[2], |
423
|
|
|
pd.Timestamp("2014-03-20 13:35:00", tz='UTC'), |
424
|
|
|
50, |
425
|
|
|
"1m", |
426
|
|
|
"high", |
427
|
|
|
) |
428
|
|
|
|
429
|
|
|
self.assertEqual(len(window), 50) |
430
|
|
|
reference = [107.081, 109.476, 102.316, 107.861, 106.040] |
431
|
|
|
for i in range(0, 4): |
432
|
|
|
self.assertEqual(window.iloc[-5 + i].loc[2], reference[i]) |
433
|
|
|
|
434
|
|
|
# get history for two securities at the same time, where one starts |
435
|
|
|
# trading a day later than the other |
436
|
|
|
window2 = portal.get_history_window( |
437
|
|
|
[1, 2], |
438
|
|
|
pd.Timestamp("2014-03-20 13:35:00", tz='UTC'), |
439
|
|
|
50, |
440
|
|
|
"1m", |
441
|
|
|
"low", |
442
|
|
|
) |
443
|
|
|
|
444
|
|
|
self.assertEqual(len(window2), 50) |
445
|
|
|
reference2 = { |
446
|
|
|
1: [1059.318, 1055.914, 1061.136, 1063.698, 1055.964], |
447
|
|
|
2: [98.902, 99.841, 90.984, 99.891, 98.027] |
448
|
|
|
} |
449
|
|
|
|
450
|
|
|
for i in range(0, 45): |
451
|
|
|
self.assertFalse(np.isnan(window2.iloc[i].loc[1])) |
452
|
|
|
|
453
|
|
|
# there should be 45 NaNs for MSFT until it starts trading |
454
|
|
|
self.assertTrue(np.isnan(window2.iloc[i].loc[2])) |
455
|
|
|
|
456
|
|
|
for i in range(0, 4): |
457
|
|
|
self.assertEquals(window2.iloc[-5 + i].loc[1], |
458
|
|
|
reference2[1][i]) |
459
|
|
|
self.assertEquals(window2.iloc[-5 + i].loc[2], |
460
|
|
|
reference2[2][i]) |
461
|
|
|
|
462
|
|
|
def test_minute_window_ends_before_trading_start(self): |
|
|
|
|
463
|
|
|
# entire window is before the trading start |
464
|
|
|
window = self.get_portal().get_history_window( |
465
|
|
|
[2], |
466
|
|
|
pd.Timestamp("2014-02-05 14:35:00", tz='UTC'), |
467
|
|
|
100, |
468
|
|
|
"1m", |
469
|
|
|
"high" |
470
|
|
|
) |
471
|
|
|
|
472
|
|
|
self.assertEqual(len(window), 100) |
473
|
|
|
for i in range(0, 100): |
474
|
|
|
self.assertTrue(np.isnan(window.iloc[i].loc[2])) |
475
|
|
|
|
476
|
|
|
def test_minute_window_ends_after_trading_end(self): |
477
|
|
|
portal = self.get_portal() |
478
|
|
|
|
479
|
|
|
window = portal.get_history_window( |
480
|
|
|
[2], |
481
|
|
|
pd.Timestamp("2014-03-24 13:35:00", tz='UTC'), |
482
|
|
|
50, |
483
|
|
|
"1m", |
484
|
|
|
"high", |
485
|
|
|
) |
486
|
|
|
|
487
|
|
|
# should be 45 non-NaNs then 5 NaNs as MSFT has stopped trading at |
488
|
|
|
# the end of the day 2014-03-21 (and the 22nd and 23rd is weekend) |
489
|
|
|
self.assertEqual(len(window), 50) |
490
|
|
|
|
491
|
|
|
for i in range(0, 45): |
492
|
|
|
self.assertFalse(np.isnan(window.iloc[i].loc[2])) |
493
|
|
|
|
494
|
|
|
for i in range(46, 50): |
495
|
|
|
self.assertTrue(np.isnan(window.iloc[i].loc[2])) |
496
|
|
|
|
497
|
|
|
def test_minute_window_starts_after_trading_end(self): |
|
|
|
|
498
|
|
|
# entire window is after the trading end |
499
|
|
|
window = self.get_portal().get_history_window( |
500
|
|
|
[2], |
501
|
|
|
pd.Timestamp("2014-04-02 14:35:00", tz='UTC'), |
502
|
|
|
100, |
503
|
|
|
"1m", |
504
|
|
|
"high" |
505
|
|
|
) |
506
|
|
|
|
507
|
|
|
self.assertEqual(len(window), 100) |
508
|
|
|
for i in range(0, 100): |
509
|
|
|
self.assertTrue(np.isnan(window.iloc[i].loc[2])) |
510
|
|
|
|
511
|
|
|
def test_minute_window_starts_before_1_2_2002(self): |
512
|
|
|
window = self.get_portal().get_history_window( |
513
|
|
|
[3], |
514
|
|
|
pd.Timestamp("2002-01-02 14:35:00", tz='UTC'), |
515
|
|
|
50, |
516
|
|
|
"1m", |
517
|
|
|
"close_price" |
518
|
|
|
) |
519
|
|
|
|
520
|
|
|
self.assertEqual(len(window), 50) |
521
|
|
|
for i in range(0, 45): |
522
|
|
|
self.assertTrue(np.isnan(window.iloc[i].loc[3])) |
523
|
|
|
|
524
|
|
|
for i in range(46, 50): |
525
|
|
|
self.assertFalse(np.isnan(window.iloc[i].loc[3])) |
526
|
|
|
|
527
|
|
|
def test_minute_early_close(self): |
528
|
|
|
# market was closed early on 7/3, and that's reflected in our |
529
|
|
|
# fake IBM minute data. also, IBM had a split that takes effect |
530
|
|
|
# right after the early close. |
531
|
|
|
|
532
|
|
|
# five minutes into the day after an early close, get 20 1m bars |
533
|
|
|
window = self.get_portal().get_history_window( |
534
|
|
|
[self.IBM], |
535
|
|
|
pd.Timestamp("2014-07-07 13:35:00", tz='UTC'), |
536
|
|
|
20, |
537
|
|
|
"1m", |
538
|
|
|
"high" |
539
|
|
|
) |
540
|
|
|
|
541
|
|
|
self.assertEqual(len(window), 20) |
542
|
|
|
|
543
|
|
|
reference = [27134.486, 27134.802, 27134.660, 27132.813, 27130.964, |
544
|
|
|
27133.767, 27133.268, 27131.510, 27134.946, 27132.400, |
545
|
|
|
27134.350, 27130.588, 27132.528, 27130.418, 27131.040, |
546
|
|
|
27132.664, 27131.307, 27133.978, 27132.779, 27134.476] |
547
|
|
|
|
548
|
|
|
for i in range(0, 20): |
549
|
|
|
self.assertAlmostEquals(window.iloc[i].loc[self.IBM], reference[i]) |
550
|
|
|
|
551
|
|
|
def test_minute_merger(self): |
552
|
|
|
def check(field, ref): |
|
|
|
|
553
|
|
|
window = self.get_portal().get_history_window( |
554
|
|
|
[self.C], |
555
|
|
|
pd.Timestamp("2014-07-16 13:35", tz='UTC'), |
556
|
|
|
10, |
557
|
|
|
"1m", |
558
|
|
|
field |
559
|
|
|
) |
560
|
|
|
|
561
|
|
|
self.assertEqual(len(window), len(ref)) |
562
|
|
|
|
563
|
|
|
for i in range(0, len(ref) - 1): |
564
|
|
|
self.assertEquals(window.iloc[i].loc[self.C], ref[i]) |
565
|
|
|
|
566
|
|
|
open_ref = [71.99, 71.991, 71.992, 71.996, 71.996, |
567
|
|
|
72.000, 72.001, 72.002, 72.004, 72.005] |
568
|
|
|
high_ref = [77.334, 80.196, 80.387, 72.331, 79.184, |
569
|
|
|
75.439, 81.176, 78.564, 80.498, 82.000] |
570
|
|
|
low_ref = [62.621, 70.427, 65.572, 68.357, 63.623, |
571
|
|
|
69.805, 67.245, 64.238, 64.487, 71.864] |
572
|
|
|
close_ref = [69.977, 75.311, 72.979, 70.344, 71.403, |
573
|
|
|
72.622, 74.210, 71.401, 72.492, 73.669] |
574
|
|
|
vol_ref = [12663, 12662, 12661, 12661, 12660, 12661, |
575
|
|
|
12663, 12662, 12663, 12662] |
576
|
|
|
|
577
|
|
|
check("open_price", open_ref) |
578
|
|
|
check("high", high_ref) |
579
|
|
|
check("low", low_ref) |
580
|
|
|
check("close_price", close_ref) |
581
|
|
|
check("price", close_ref) |
582
|
|
|
check("volume", vol_ref) |
583
|
|
|
|
584
|
|
|
def test_minute_forward_fill(self): |
585
|
|
|
# only forward fill if ffill=True AND we are asking for "price" |
586
|
|
|
|
587
|
|
|
# our fake TSLA data (sid 4) is missing a bunch of minute bars |
588
|
|
|
# right after the open on 2002-01-02 |
589
|
|
|
|
590
|
|
|
for field in ["open_price", "high", "low", "volume", "close_price"]: |
591
|
|
|
no_ffill = self.get_portal().get_history_window( |
592
|
|
|
[4], |
593
|
|
|
pd.Timestamp("2002-01-02 21:00:00", tz='UTC'), |
594
|
|
|
390, |
595
|
|
|
"1m", |
596
|
|
|
field |
597
|
|
|
) |
598
|
|
|
|
599
|
|
|
missing_bar_indices = [1, 3, 5, 7, 9, 11, 13] |
600
|
|
|
if field == 'volume': |
601
|
|
|
for bar_idx in missing_bar_indices: |
602
|
|
|
self.assertEqual(no_ffill.iloc[bar_idx].loc[4], 0) |
603
|
|
|
else: |
604
|
|
|
for bar_idx in missing_bar_indices: |
605
|
|
|
self.assertTrue(np.isnan(no_ffill.iloc[bar_idx].loc[4])) |
606
|
|
|
|
607
|
|
|
ffill_window = self.get_portal().get_history_window( |
608
|
|
|
[4], |
609
|
|
|
pd.Timestamp("2002-01-02 21:00:00", tz='UTC'), |
610
|
|
|
390, |
611
|
|
|
"1m", |
612
|
|
|
"price" |
613
|
|
|
) |
614
|
|
|
|
615
|
|
|
for i in range(0, 390): |
616
|
|
|
self.assertFalse(np.isnan(ffill_window.iloc[i].loc[4])) |
617
|
|
|
|
618
|
|
|
# 2002-01-02 14:31:00+00:00 126.183 |
619
|
|
|
# 2002-01-02 14:32:00+00:00 126.183 |
620
|
|
|
# 2002-01-02 14:33:00+00:00 125.648 |
621
|
|
|
# 2002-01-02 14:34:00+00:00 125.648 |
622
|
|
|
# 2002-01-02 14:35:00+00:00 126.016 |
623
|
|
|
# 2002-01-02 14:36:00+00:00 126.016 |
624
|
|
|
# 2002-01-02 14:37:00+00:00 127.918 |
625
|
|
|
# 2002-01-02 14:38:00+00:00 127.918 |
626
|
|
|
# 2002-01-02 14:39:00+00:00 126.423 |
627
|
|
|
# 2002-01-02 14:40:00+00:00 126.423 |
628
|
|
|
# 2002-01-02 14:41:00+00:00 129.825 |
629
|
|
|
# 2002-01-02 14:42:00+00:00 129.825 |
630
|
|
|
# 2002-01-02 14:43:00+00:00 125.392 |
631
|
|
|
# 2002-01-02 14:44:00+00:00 125.392 |
632
|
|
|
|
633
|
|
|
vals = [126.183, 125.648, 126.016, 127.918, 126.423, 129.825, 125.392] |
634
|
|
|
for idx, val in enumerate(vals): |
635
|
|
|
self.assertEqual(ffill_window.iloc[2 * idx].loc[4], val) |
636
|
|
|
self.assertEqual(ffill_window.iloc[(2 * idx) + 1].loc[4], val) |
637
|
|
|
|
638
|
|
|
# make sure that if we pass ffill=False with field="price", we do |
639
|
|
|
# not ffill |
640
|
|
|
really_no_ffill_window = self.get_portal().get_history_window( |
641
|
|
|
[4], |
642
|
|
|
pd.Timestamp("2002-01-02 21:00:00", tz='UTC'), |
643
|
|
|
390, |
644
|
|
|
"1m", |
645
|
|
|
"price", |
646
|
|
|
ffill=False |
647
|
|
|
) |
648
|
|
|
|
649
|
|
|
for idx, val in enumerate(vals): |
650
|
|
|
idx1 = 2 * idx |
651
|
|
|
idx2 = idx1 + 1 |
652
|
|
|
self.assertEqual(really_no_ffill_window.iloc[idx1].loc[4], val) |
653
|
|
|
self.assertTrue(np.isnan(really_no_ffill_window.iloc[idx2].loc[4])) |
654
|
|
|
|
655
|
|
|
def test_daily_functionality(self): |
656
|
|
|
# 9 daily bars |
657
|
|
|
# 2014-03-10,183999.0,186400.0,183601.0,186400.0,400 |
658
|
|
|
# 2014-03-11,186925.0,187490.0,185910.0,187101.0,600 |
659
|
|
|
# 2014-03-12,186498.0,187832.0,186005.0,187750.0,300 |
660
|
|
|
# 2014-03-13,188150.0,188852.0,185254.0,185750.0,700 |
661
|
|
|
# 2014-03-14,185825.0,186507.0,183418.0,183860.0,600 |
662
|
|
|
# 2014-03-17,184350.0,185790.0,184350.0,185050.0,400 |
663
|
|
|
# 2014-03-18,185400.0,185400.0,183860.0,184860.0,200 |
664
|
|
|
# 2014-03-19,184860.0,185489.0,182764.0,183860.0,200 |
665
|
|
|
# 2014-03-20,183999.0,186742.0,183630.0,186540.0,300 |
666
|
|
|
|
667
|
|
|
# 5 one-minute bars that will be aggregated |
668
|
|
|
# 2014-03-21 13:31:00+00:00,185422401,185426332,185413974,185420153,304 |
669
|
|
|
# 2014-03-21 13:32:00+00:00,185422402,185424165,185417717,185420941,300 |
670
|
|
|
# 2014-03-21 13:33:00+00:00,185422403,185430663,185419420,185425041,303 |
671
|
|
|
# 2014-03-21 13:34:00+00:00,185422403,185431290,185417079,185424184,302 |
672
|
|
|
# 2014-03-21 13:35:00+00:00,185422405,185430210,185416293,185423251,302 |
673
|
|
|
|
674
|
|
|
def run_query(field, values): |
675
|
|
|
window = self.get_portal().get_history_window( |
676
|
|
|
[self.BRKA], |
677
|
|
|
pd.Timestamp("2014-03-21 13:35", tz='UTC'), |
678
|
|
|
10, |
679
|
|
|
"1d", |
680
|
|
|
field |
681
|
|
|
) |
682
|
|
|
|
683
|
|
|
self.assertEqual(len(window), 10) |
684
|
|
|
|
685
|
|
|
for i in range(0, 10): |
686
|
|
|
self.assertEquals(window.iloc[i].loc[self.BRKA], |
687
|
|
|
values[i]) |
688
|
|
|
|
689
|
|
|
# last value is the first minute's open |
690
|
|
|
opens = [183999, 186925, 186498, 188150, 185825, 184350, |
691
|
|
|
185400, 184860, 183999, 185422.401] |
692
|
|
|
|
693
|
|
|
# last value is the last minute's close |
694
|
|
|
closes = [186400, 187101, 187750, 185750, 183860, 185050, |
695
|
|
|
184860, 183860, 186540, 185423.251] |
696
|
|
|
|
697
|
|
|
# last value is the highest high value |
698
|
|
|
highs = [186400, 187490, 187832, 188852, 186507, 185790, |
699
|
|
|
185400, 185489, 186742, 185431.290] |
700
|
|
|
|
701
|
|
|
# last value is the lowest low value |
702
|
|
|
lows = [183601, 185910, 186005, 185254, 183418, 184350, 183860, |
703
|
|
|
182764, 183630, 185413.974] |
704
|
|
|
|
705
|
|
|
# last value is the sum of all the minute volumes |
706
|
|
|
volumes = [400, 600, 300, 700, 600, 400, 200, 200, 300, 1511] |
707
|
|
|
|
708
|
|
|
run_query("open_price", opens) |
709
|
|
|
run_query("close_price", closes) |
710
|
|
|
run_query("price", closes) |
711
|
|
|
run_query("high", highs) |
712
|
|
|
run_query("low", lows) |
713
|
|
|
run_query("volume", volumes) |
714
|
|
|
|
715
|
|
|
def test_daily_splits_with_no_minute_data(self): |
716
|
|
|
# scenario is that we have daily data for AAPL through 6/11, |
717
|
|
|
# but we have no minute data for AAPL on 6/11. there's also a split |
718
|
|
|
# for AAPL on 6/9. |
719
|
|
|
splits = DataFrame( |
720
|
|
|
[ |
721
|
|
|
{ |
722
|
|
|
'effective_date': str_to_seconds('2014-06-09'), |
723
|
|
|
'ratio': (1 / 7.0), |
724
|
|
|
'sid': self.AAPL, |
725
|
|
|
} |
726
|
|
|
], |
727
|
|
|
columns=['effective_date', 'ratio', 'sid']) |
728
|
|
|
|
729
|
|
|
self.create_fake_adjustments(self.tempdir, |
730
|
|
|
"adjustments2.sqlite", |
731
|
|
|
splits=splits) |
732
|
|
|
|
733
|
|
|
portal = self.get_portal(adjustments_filename="adjustments2.sqlite") |
734
|
|
|
|
735
|
|
|
def test_window(field, reference, ffill=True): |
736
|
|
|
window = portal.get_history_window( |
737
|
|
|
[self.AAPL], |
738
|
|
|
pd.Timestamp("2014-06-11 15:30", tz='UTC'), |
739
|
|
|
6, |
740
|
|
|
"1d", |
741
|
|
|
field, |
742
|
|
|
ffill |
743
|
|
|
) |
744
|
|
|
|
745
|
|
|
self.assertEqual(len(window), 6) |
746
|
|
|
|
747
|
|
|
for i in range(0, 5): |
748
|
|
|
self.assertEquals(window.iloc[i].loc[self.AAPL], |
749
|
|
|
reference[i]) |
750
|
|
|
|
751
|
|
|
if ffill and field == "price": |
752
|
|
|
last_val = window.iloc[5].loc[self.AAPL] |
753
|
|
|
second_to_last_val = window.iloc[4].loc[self.AAPL] |
754
|
|
|
|
755
|
|
|
self.assertEqual(last_val, second_to_last_val) |
756
|
|
|
else: |
757
|
|
|
if field == "volume": |
758
|
|
|
self.assertEqual(window.iloc[5].loc[self.AAPL], 0) |
759
|
|
|
else: |
760
|
|
|
self.assertTrue(np.isnan(window.iloc[5].loc[self.AAPL])) |
761
|
|
|
|
762
|
|
|
# 2014-06-04,637.4400099999999,647.8899690000001,636.110046,644.819992,p |
763
|
|
|
# 2014-06-05,646.20005,649.370003,642.610008,647.349983,75951400 |
764
|
|
|
# 2014-06-06,649.900002,651.259979,644.469971,645.570023,87484600 |
765
|
|
|
# 2014-06-09,92.699997,93.879997,91.75,93.699997,75415000 |
766
|
|
|
# 2014-06-10,94.730003,95.050003,93.57,94.25,62777000 |
767
|
|
|
open_data = [91.063, 92.314, 92.843, 92.699, 94.730] |
768
|
|
|
test_window("open_price", open_data, ffill=False) |
769
|
|
|
test_window("open_price", open_data) |
770
|
|
|
|
771
|
|
|
high_data = [92.556, 92.767, 93.037, 93.879, 95.050] |
772
|
|
|
test_window("high", high_data, ffill=False) |
773
|
|
|
test_window("high", high_data) |
774
|
|
|
|
775
|
|
|
low_data = [90.873, 91.801, 92.067, 91.750, 93.570] |
776
|
|
|
test_window("low", low_data, ffill=False) |
777
|
|
|
test_window("low", low_data) |
778
|
|
|
|
779
|
|
|
close_data = [92.117, 92.478, 92.224, 93.699, 94.250] |
780
|
|
|
test_window("close_price", close_data, ffill=False) |
781
|
|
|
test_window("close_price", close_data) |
782
|
|
|
test_window("price", close_data, ffill=False) |
783
|
|
|
test_window("price", close_data) |
784
|
|
|
|
785
|
|
|
vol_data = [587093500, 531659800, 612392200, 75415000, 62777000] |
786
|
|
|
test_window("volume", vol_data) |
787
|
|
|
test_window("volume", vol_data, ffill=False) |
788
|
|
|
|
789
|
|
|
def test_daily_window_starts_before_trading_start(self): |
790
|
|
|
portal = self.get_portal() |
791
|
|
|
|
792
|
|
|
# MSFT started on 3/3/2014, so try to go before that |
793
|
|
|
window = portal.get_history_window( |
794
|
|
|
[self.MSFT], |
795
|
|
|
pd.Timestamp("2014-03-05 13:35:00", tz='UTC'), |
796
|
|
|
5, |
797
|
|
|
"1d", |
798
|
|
|
"high" |
799
|
|
|
) |
800
|
|
|
|
801
|
|
|
self.assertEqual(len(window), 5) |
802
|
|
|
|
803
|
|
|
# should be two empty days, then 3/3 and 3/4, then |
804
|
|
|
# an empty day because we don't have minute data for 3/5 |
805
|
|
|
self.assertTrue(np.isnan(window.iloc[0].loc[self.MSFT])) |
806
|
|
|
self.assertTrue(np.isnan(window.iloc[1].loc[self.MSFT])) |
807
|
|
|
self.assertEquals(window.iloc[2].loc[self.MSFT], 38.130) |
808
|
|
|
self.assertEquals(window.iloc[3].loc[self.MSFT], 38.48) |
809
|
|
|
self.assertTrue(np.isnan(window.iloc[4].loc[self.MSFT])) |
810
|
|
|
|
811
|
|
|
def test_daily_window_ends_before_trading_start(self): |
812
|
|
|
portal = self.get_portal() |
813
|
|
|
|
814
|
|
|
# MSFT started on 3/3/2014, so try to go before that |
815
|
|
|
window = portal.get_history_window( |
816
|
|
|
[self.MSFT], |
817
|
|
|
pd.Timestamp("2014-02-28 13:35:00", tz='UTC'), |
818
|
|
|
5, |
819
|
|
|
"1d", |
820
|
|
|
"high" |
821
|
|
|
) |
822
|
|
|
|
823
|
|
|
self.assertEqual(len(window), 5) |
824
|
|
|
for i in range(0, 5): |
825
|
|
|
self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT])) |
826
|
|
|
|
827
|
|
|
def test_daily_window_starts_after_trading_end(self): |
828
|
|
|
# MSFT stopped trading EOD Friday 8/29/2014 |
829
|
|
|
window = self.get_portal().get_history_window( |
830
|
|
|
[self.MSFT], |
831
|
|
|
pd.Timestamp("2014-09-12 13:35:00", tz='UTC'), |
832
|
|
|
8, |
833
|
|
|
"1d", |
834
|
|
|
"high", |
835
|
|
|
) |
836
|
|
|
|
837
|
|
|
self.assertEqual(len(window), 8) |
838
|
|
|
for i in range(0, 8): |
839
|
|
|
self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT])) |
840
|
|
|
|
841
|
|
|
def test_daily_window_ends_after_trading_end(self): |
842
|
|
|
# MSFT stopped trading EOD Friday 8/29/2014 |
843
|
|
|
window = self.get_portal().get_history_window( |
844
|
|
|
[self.MSFT], |
845
|
|
|
pd.Timestamp("2014-09-04 13:35:00", tz='UTC'), |
846
|
|
|
10, |
847
|
|
|
"1d", |
848
|
|
|
"high", |
849
|
|
|
) |
850
|
|
|
|
851
|
|
|
# should be 7 non-NaNs (8/21-8/22, 8/25-8/29) and 3 NaNs (9/2 - 9/4) |
852
|
|
|
# (9/1/2014 is labor day) |
853
|
|
|
self.assertEqual(len(window), 10) |
854
|
|
|
|
855
|
|
|
for i in range(0, 7): |
856
|
|
|
self.assertFalse(np.isnan(window.iloc[i].loc[self.MSFT])) |
857
|
|
|
|
858
|
|
|
for i in range(7, 10): |
859
|
|
|
self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT])) |
860
|
|
|
|
861
|
|
|
def test_empty_sid_list(self): |
862
|
|
|
portal = self.get_portal() |
863
|
|
|
|
864
|
|
|
fields = ["open_price", |
865
|
|
|
"close_price", |
866
|
|
|
"high", |
867
|
|
|
"low", |
868
|
|
|
"volume", |
869
|
|
|
"price"] |
870
|
|
|
freqs = ["1m", "1d"] |
871
|
|
|
|
872
|
|
|
for field in fields: |
873
|
|
|
for freq in freqs: |
874
|
|
|
window = portal.get_history_window( |
875
|
|
|
[], |
876
|
|
|
pd.Timestamp("2014-06-11 15:30", tz='UTC'), |
877
|
|
|
6, |
878
|
|
|
freq, |
879
|
|
|
field |
880
|
|
|
) |
881
|
|
|
|
882
|
|
|
self.assertEqual(len(window), 6) |
883
|
|
|
|
884
|
|
|
for i in range(0, 6): |
885
|
|
|
self.assertEqual(len(window.iloc[i]), 0) |
886
|
|
|
|
887
|
|
|
def test_daily_window_starts_before_1_2_2002(self): |
888
|
|
|
|
889
|
|
|
env = TradingEnvironment() |
890
|
|
|
asset_info = make_simple_asset_info( |
891
|
|
|
[self.GS], |
892
|
|
|
Timestamp('1999-05-04'), |
893
|
|
|
Timestamp('2004-08-30'), |
894
|
|
|
['GS'] |
895
|
|
|
) |
896
|
|
|
env.write_data(equities_df=asset_info) |
897
|
|
|
portal = self.get_portal(env=env) |
898
|
|
|
|
899
|
|
|
window = portal.get_history_window( |
900
|
|
|
[self.GS], |
901
|
|
|
pd.Timestamp("2002-01-04 14:35:00", tz='UTC'), |
902
|
|
|
10, |
903
|
|
|
"1d", |
904
|
|
|
"low" |
905
|
|
|
) |
906
|
|
|
|
907
|
|
|
# 12/20, 12/21, 12/24, 12/26, 12/27, 12/28, 12/31 should be NaNs |
908
|
|
|
# 1/2 and 1/3 should be non-NaN |
909
|
|
|
# 1/4 should be NaN (since we don't have minute data for it) |
910
|
|
|
|
911
|
|
|
self.assertEqual(len(window), 10) |
912
|
|
|
|
913
|
|
|
for i in range(0, 7): |
914
|
|
|
self.assertTrue(np.isnan(window.iloc[i].loc[self.GS])) |
915
|
|
|
|
916
|
|
|
for i in range(8, 9): |
917
|
|
|
self.assertFalse(np.isnan(window.iloc[i].loc[self.GS])) |
918
|
|
|
|
919
|
|
|
self.assertTrue(np.isnan(window.iloc[9].loc[self.GS])) |
920
|
|
|
|
921
|
|
|
def test_minute_window_ends_before_1_2_2002(self): |
922
|
|
|
with self.assertRaises(ValueError): |
923
|
|
|
self.get_portal().get_history_window( |
924
|
|
|
[self.GS], |
925
|
|
|
pd.Timestamp("2001-12-31 14:35:00", tz='UTC'), |
926
|
|
|
50, |
927
|
|
|
"1m", |
928
|
|
|
"close_price" |
929
|
|
|
) |
930
|
|
|
|
931
|
|
|
def test_bad_history_inputs(self): |
932
|
|
|
portal = self.get_portal() |
933
|
|
|
|
934
|
|
|
# bad fieldname |
935
|
|
|
for field in ["foo", "bar", "", "5"]: |
936
|
|
|
with self.assertRaises(ValueError): |
937
|
|
|
portal.get_history_window( |
938
|
|
|
[self.AAPL], |
939
|
|
|
pd.Timestamp("2014-06-11 15:30", tz='UTC'), |
940
|
|
|
6, |
941
|
|
|
"1d", |
942
|
|
|
field |
943
|
|
|
) |
944
|
|
|
|
945
|
|
|
# bad frequency |
946
|
|
|
for freq in ["2m", "30m", "3d", "300d", "", "5"]: |
947
|
|
|
with self.assertRaises(ValueError): |
948
|
|
|
portal.get_history_window( |
949
|
|
|
[self.AAPL], |
950
|
|
|
pd.Timestamp("2014-06-11 15:30", tz='UTC'), |
951
|
|
|
6, |
952
|
|
|
freq, |
953
|
|
|
"volume" |
954
|
|
|
) |
955
|
|
|
|
956
|
|
|
def test_daily_merger(self): |
957
|
|
|
def check(field, ref): |
|
|
|
|
958
|
|
|
window = self.get_portal().get_history_window( |
959
|
|
|
[self.C], |
960
|
|
|
pd.Timestamp("2014-07-17 13:35", tz='UTC'), |
961
|
|
|
4, |
962
|
|
|
"1d", |
963
|
|
|
field |
964
|
|
|
) |
965
|
|
|
|
966
|
|
|
self.assertEqual(len(window), len(ref),) |
967
|
|
|
|
968
|
|
|
for i in range(0, len(ref) - 1): |
969
|
|
|
self.assertEquals(window.iloc[i].loc[self.C], ref[i], i) |
970
|
|
|
|
971
|
|
|
# 2014-07-14 00:00:00+00:00,139.18,139.14,139.2,139.17,12351 |
972
|
|
|
# 2014-07-15 00:00:00+00:00,139.2,139.2,139.18,139.19,12354 |
973
|
|
|
# 2014-07-16 00:00:00+00:00,69.58,69.56,69.57,69.565,12352 |
974
|
|
|
# 2014-07-17 13:31:00+00:00,72767,80146,63406,71776,12876 |
975
|
|
|
# 2014-07-17 13:32:00+00:00,72769,76943,68907,72925,12875 |
976
|
|
|
# 2014-07-17 13:33:00+00:00,72771,76127,63194,69660,12875 |
977
|
|
|
# 2014-07-17 13:34:00+00:00,72774,79349,69771,74560,12877 |
978
|
|
|
# 2014-07-17 13:35:00+00:00,72776,75340,68970,72155,12879 |
979
|
|
|
|
980
|
|
|
open_ref = [69.59, 69.6, 69.58, 72.767] |
981
|
|
|
high_ref = [69.57, 69.6, 69.56, 80.146] |
982
|
|
|
low_ref = [69.6, 69.59, 69.57, 63.194] |
983
|
|
|
close_ref = [69.585, 69.595, 69.565, 72.155] |
984
|
|
|
vol_ref = [12351, 12354, 12352, 64382] |
985
|
|
|
|
986
|
|
|
check("open_price", open_ref) |
987
|
|
|
check("high", high_ref) |
988
|
|
|
check("low", low_ref) |
989
|
|
|
check("close_price", close_ref) |
990
|
|
|
check("price", close_ref) |
991
|
|
|
check("volume", vol_ref) |
992
|
|
|
|
993
|
|
|
def test_minute_adjustments_as_of_lookback_date(self): |
|
|
|
|
994
|
|
|
# AAPL has splits on 2014-03-20 and 2014-03-21 |
995
|
|
|
window_0320 = self.get_portal().get_history_window( |
996
|
|
|
[self.AAPL], |
997
|
|
|
pd.Timestamp("2014-03-20 13:35", tz='UTC'), |
998
|
|
|
395, |
999
|
|
|
"1m", |
1000
|
|
|
"open_price" |
1001
|
|
|
) |
1002
|
|
|
|
1003
|
|
|
window_0321 = self.get_portal().get_history_window( |
1004
|
|
|
[self.AAPL], |
1005
|
|
|
pd.Timestamp("2014-03-21 13:35", tz='UTC'), |
1006
|
|
|
785, |
1007
|
|
|
"1m", |
1008
|
|
|
"open_price" |
1009
|
|
|
) |
1010
|
|
|
|
1011
|
|
|
for i in range(0, 395): |
1012
|
|
|
# history on 3/20, since the 3/21 0.5 split hasn't |
1013
|
|
|
# happened yet, should return values 2x larger than history on |
1014
|
|
|
# 3/21 |
1015
|
|
|
self.assertEqual(window_0320.iloc[i].loc[self.AAPL], |
1016
|
|
|
window_0321.iloc[i].loc[self.AAPL] * 2) |
1017
|
|
|
|
1018
|
|
|
def test_daily_adjustments_as_of_lookback_date(self): |
|
|
|
|
1019
|
|
|
window_0402 = self.get_portal().get_history_window( |
1020
|
|
|
[self.IBM], |
1021
|
|
|
pd.Timestamp("2014-04-02 13:35", tz='UTC'), |
1022
|
|
|
23, |
1023
|
|
|
"1d", |
1024
|
|
|
"open_price" |
1025
|
|
|
) |
1026
|
|
|
|
1027
|
|
|
window_0702 = self.get_portal().get_history_window( |
1028
|
|
|
[self.IBM], |
1029
|
|
|
pd.Timestamp("2014-07-02 13:35", tz='UTC'), |
1030
|
|
|
86, |
1031
|
|
|
"1d", |
1032
|
|
|
"open_price" |
1033
|
|
|
) |
1034
|
|
|
|
1035
|
|
|
for i in range(0, 22): |
1036
|
|
|
self.assertEqual(window_0402.iloc[i].loc[self.IBM], |
1037
|
|
|
window_0702.iloc[i].loc[self.IBM] * 2) |
1038
|
|
|
|
1039
|
|
|
def test_minute_dividends(self): |
1040
|
|
|
def check(field, ref): |
1041
|
|
|
window = self.get_portal().get_history_window( |
1042
|
|
|
[self.DIVIDEND_SID], |
1043
|
|
|
pd.Timestamp("2014-03-18 13:35", tz='UTC'), |
1044
|
|
|
10, |
1045
|
|
|
"1m", |
1046
|
|
|
field |
1047
|
|
|
) |
1048
|
|
|
|
1049
|
|
|
self.assertEqual(len(window), len(ref)) |
1050
|
|
|
|
1051
|
|
|
np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref) |
1052
|
|
|
|
1053
|
|
|
# the DIVIDEND stock has dividends on 2014-03-18 (0.98) |
1054
|
|
|
# 2014-03-17 19:56:00+00:00,118923,123229,112445,117837,2273 |
1055
|
|
|
# 2014-03-17 19:57:00+00:00,118927,122997,117911,120454,2274 |
1056
|
|
|
# 2014-03-17 19:58:00+00:00,118930,129112,111136,120124,2274 |
1057
|
|
|
# 2014-03-17 19:59:00+00:00,118932,126147,112112,119129,2276 |
1058
|
|
|
# 2014-03-17 20:00:00+00:00,118932,124541,108717,116628,2275 |
1059
|
|
|
# 2014-03-18 13:31:00+00:00,116457,120731,114148,117439,2274 |
1060
|
|
|
# 2014-03-18 13:32:00+00:00,116461,116520,106572,111546,2275 |
1061
|
|
|
# 2014-03-18 13:33:00+00:00,116461,117115,108506,112810,2274 |
1062
|
|
|
# 2014-03-18 13:34:00+00:00,116461,119787,108861,114323,2273 |
1063
|
|
|
# 2014-03-18 13:35:00+00:00,116464,117221,112698,114960,2272 |
1064
|
|
|
|
1065
|
|
|
open_ref = [116.545, # 2014-03-17 19:56:00+00:00 |
1066
|
|
|
116.548, # 2014-03-17 19:57:00+00:00 |
1067
|
|
|
116.551, # 2014-03-17 19:58:00+00:00 |
1068
|
|
|
116.553, # 2014-03-17 19:59:00+00:00 |
1069
|
|
|
116.553, # 2014-03-17 20:00:00+00:00 |
1070
|
|
|
116.457, # 2014-03-18 13:31:00+00:00 |
1071
|
|
|
116.461, # 2014-03-18 13:32:00+00:00 |
1072
|
|
|
116.461, # 2014-03-18 13:33:00+00:00 |
1073
|
|
|
116.461, # 2014-03-18 13:34:00+00:00 |
1074
|
|
|
116.464] # 2014-03-18 13:35:00+00:00 |
1075
|
|
|
|
1076
|
|
|
high_ref = [120.764, # 2014-03-17 19:56:00+00:00 |
1077
|
|
|
120.537, # 2014-03-17 19:57:00+00:00 |
1078
|
|
|
126.530, # 2014-03-17 19:58:00+00:00 |
1079
|
|
|
123.624, # 2014-03-17 19:59:00+00:00 |
1080
|
|
|
122.050, # 2014-03-17 20:00:00+00:00 |
1081
|
|
|
120.731, # 2014-03-18 13:31:00+00:00 |
1082
|
|
|
116.520, # 2014-03-18 13:32:00+00:00 |
1083
|
|
|
117.115, # 2014-03-18 13:33:00+00:00 |
1084
|
|
|
119.787, # 2014-03-18 13:34:00+00:00 |
1085
|
|
|
117.221] # 2014-03-18 13:35:00+00:00 |
1086
|
|
|
|
1087
|
|
|
low_ref = [110.196, # 2014-03-17 19:56:00+00:00 |
1088
|
|
|
115.553, # 2014-03-17 19:57:00+00:00 |
1089
|
|
|
108.913, # 2014-03-17 19:58:00+00:00 |
1090
|
|
|
109.870, # 2014-03-17 19:59:00+00:00 |
1091
|
|
|
106.543, # 2014-03-17 20:00:00+00:00 |
1092
|
|
|
114.148, # 2014-03-18 13:31:00+00:00 |
1093
|
|
|
106.572, # 2014-03-18 13:32:00+00:00 |
1094
|
|
|
108.506, # 2014-03-18 13:33:00+00:00 |
1095
|
|
|
108.861, # 2014-03-18 13:34:00+00:00 |
1096
|
|
|
112.698] # 2014-03-18 13:35:00+00:00 |
1097
|
|
|
|
1098
|
|
|
close_ref = [115.480, # 2014-03-17 19:56:00+00:00 |
1099
|
|
|
118.045, # 2014-03-17 19:57:00+00:00 |
1100
|
|
|
117.722, # 2014-03-17 19:58:00+00:00 |
1101
|
|
|
116.746, # 2014-03-17 19:59:00+00:00 |
1102
|
|
|
114.295, # 2014-03-17 20:00:00+00:00 |
1103
|
|
|
117.439, # 2014-03-18 13:31:00+00:00 |
1104
|
|
|
111.546, # 2014-03-18 13:32:00+00:00 |
1105
|
|
|
112.810, # 2014-03-18 13:33:00+00:00 |
1106
|
|
|
114.323, # 2014-03-18 13:34:00+00:00 |
1107
|
|
|
114.960] # 2014-03-18 13:35:00+00:00 |
1108
|
|
|
|
1109
|
|
|
volume_ref = [2273, # 2014-03-17 19:56:00+00:00 |
1110
|
|
|
2274, # 2014-03-17 19:57:00+00:00 |
1111
|
|
|
2274, # 2014-03-17 19:58:00+00:00 |
1112
|
|
|
2276, # 2014-03-17 19:59:00+00:00 |
1113
|
|
|
2275, # 2014-03-17 20:00:00+00:00 |
1114
|
|
|
2274, # 2014-03-18 13:31:00+00:00 |
1115
|
|
|
2275, # 2014-03-18 13:32:00+00:00 |
1116
|
|
|
2274, # 2014-03-18 13:33:00+00:00 |
1117
|
|
|
2273, # 2014-03-18 13:34:00+00:00 |
1118
|
|
|
2272] # 2014-03-18 13:35:00+00:00 |
1119
|
|
|
|
1120
|
|
|
check("open_price", open_ref) |
1121
|
|
|
check("high", high_ref) |
1122
|
|
|
check("low", low_ref) |
1123
|
|
|
check("close_price", close_ref) |
1124
|
|
|
check("price", close_ref) |
1125
|
|
|
check("volume", volume_ref) |
1126
|
|
|
|
1127
|
|
|
def test_daily_dividends(self): |
1128
|
|
|
def check(field, ref): |
1129
|
|
|
window = self.get_portal().get_history_window( |
1130
|
|
|
[self.DIVIDEND_SID], |
1131
|
|
|
pd.Timestamp("2014-03-21 13:35", tz='UTC'), |
1132
|
|
|
6, |
1133
|
|
|
"1d", |
1134
|
|
|
field |
1135
|
|
|
) |
1136
|
|
|
|
1137
|
|
|
self.assertEqual(len(window), len(ref)) |
1138
|
|
|
|
1139
|
|
|
np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref) |
1140
|
|
|
|
1141
|
|
|
# 2014-03-14 00:00:00+00:00,106408,106527,103498,105012,950 |
1142
|
|
|
# 2014-03-17 00:00:00+00:00,106411,110252,99877,105064,950 |
1143
|
|
|
# 2014-03-18 00:00:00+00:00,104194,110891,95342,103116,972 |
1144
|
|
|
# 2014-03-19 00:00:00+00:00,104198,107086,102615,104851,973 |
1145
|
|
|
# 2014-03-20 00:00:00+00:00,100032,102989,92179,97584,1016 |
1146
|
|
|
# 2014-03-21 13:31:00+00:00,114098,120818,110333,115575,2866 |
1147
|
|
|
# 2014-03-21 13:32:00+00:00,114099,120157,105353,112755,2866 |
1148
|
|
|
# 2014-03-21 13:33:00+00:00,114099,122263,108838,115550,2867 |
1149
|
|
|
# 2014-03-21 13:34:00+00:00,114101,116620,106654,111637,2867 |
1150
|
|
|
# 2014-03-21 13:35:00+00:00,114104,123773,107769,115771,2867 |
1151
|
|
|
|
1152
|
|
|
open_ref = [100.108, # 2014-03-14 00:00:00+00:00 |
1153
|
|
|
100.111, # 2014-03-17 00:00:00+00:00 |
1154
|
|
|
100.026, # 2014-03-18 00:00:00+00:00 |
1155
|
|
|
100.030, # 2014-03-19 00:00:00+00:00 |
1156
|
|
|
100.032, # 2014-03-20 00:00:00+00:00 |
1157
|
|
|
114.098] # 2014-03-21 00:00:00+00:00 |
1158
|
|
|
|
1159
|
|
|
high_ref = [100.221, # 2014-03-14 00:00:00+00:00 |
1160
|
|
|
103.725, # 2014-03-17 00:00:00+00:00 |
1161
|
|
|
106.455, # 2014-03-18 00:00:00+00:00 |
1162
|
|
|
102.803, # 2014-03-19 00:00:00+00:00 |
1163
|
|
|
102.988, # 2014-03-20 00:00:00+00:00 |
1164
|
|
|
123.773] # 2014-03-21 00:00:00+00:00 |
1165
|
|
|
|
1166
|
|
|
low_ref = [97.370, # 2014-03-14 00:00:00+00:00 |
1167
|
|
|
93.964, # 2014-03-17 00:00:00+00:00 |
1168
|
|
|
91.528, # 2014-03-18 00:00:00+00:00 |
1169
|
|
|
98.510, # 2014-03-19 00:00:00+00:00 |
1170
|
|
|
92.179, # 2014-03-20 00:00:00+00:00 |
1171
|
|
|
105.353] # 2014-03-21 00:00:00+00:00 |
1172
|
|
|
|
1173
|
|
|
close_ref = [98.795, # 2014-03-14 00:00:00+00:00 |
1174
|
|
|
98.844, # 2014-03-17 00:00:00+00:00 |
1175
|
|
|
98.991, # 2014-03-18 00:00:00+00:00 |
1176
|
|
|
100.657, # 2014-03-19 00:00:00+00:00 |
1177
|
|
|
97.584, # 2014-03-20 00:00:00+00:00 |
1178
|
|
|
115.771] # 2014-03-21 00:00:00+00:00 |
1179
|
|
|
|
1180
|
|
|
volume_ref = [950, # 2014-03-14 00:00:00+00:00 |
1181
|
|
|
950, # 2014-03-17 00:00:00+00:00 |
1182
|
|
|
972, # 2014-03-18 00:00:00+00:00 |
1183
|
|
|
973, # 2014-03-19 00:00:00+00:00 |
1184
|
|
|
1016, # 2014-03-20 00:00:00+00:00 |
1185
|
|
|
14333] # 2014-03-21 00:00:00+00:00 |
1186
|
|
|
|
1187
|
|
|
check("open_price", open_ref) |
1188
|
|
|
check("high", high_ref) |
1189
|
|
|
check("low", low_ref) |
1190
|
|
|
check("close_price", close_ref) |
1191
|
|
|
check("price", close_ref) |
1192
|
|
|
check("volume", volume_ref) |
1193
|
|
|
|
1194
|
|
|
@parameterized.expand([('open', 0), |
1195
|
|
|
('high', 10000), |
1196
|
|
|
('low', 20000), |
1197
|
|
|
('close', 30000), |
1198
|
|
|
('price', 30000), |
1199
|
|
|
('volume', 40000)]) |
1200
|
|
|
def test_futures_history_minutes(self, field, offset): |
1201
|
|
|
# our history data, for self.FUTURE_ASSET, is 10,000 bars starting at |
1202
|
|
|
# self.futures_start_dt. Those 10k bars are 24/7. |
1203
|
|
|
|
1204
|
|
|
# = 2015-11-30 18:50 UTC, 13:50 Eastern = during market hours |
1205
|
|
|
futures_end_dt = \ |
1206
|
|
|
self.futures_start_dates[self.FUTURE_ASSET] + \ |
1207
|
|
|
timedelta(minutes=9999) |
1208
|
|
|
|
1209
|
|
|
window = self.get_portal().get_history_window( |
1210
|
|
|
[self.FUTURE_ASSET], |
1211
|
|
|
futures_end_dt, |
1212
|
|
|
1000, |
1213
|
|
|
"1m", |
1214
|
|
|
field |
1215
|
|
|
) |
1216
|
|
|
|
1217
|
|
|
# check the minutes are right |
1218
|
|
|
reference_minutes = self.env.market_minute_window( |
1219
|
|
|
futures_end_dt, 1000, step=-1 |
1220
|
|
|
)[::-1] |
1221
|
|
|
|
1222
|
|
|
np.testing.assert_array_equal(window.index, reference_minutes) |
1223
|
|
|
|
1224
|
|
|
# check the values |
1225
|
|
|
|
1226
|
|
|
# 2015-11-24 18:41 |
1227
|
|
|
# ... |
1228
|
|
|
# 2015-11-24 21:00 |
1229
|
|
|
# 2015-11-25 14:31 |
1230
|
|
|
# ... |
1231
|
|
|
# 2015-11-25 21:00 |
1232
|
|
|
# 2015-11-27 14:31 |
1233
|
|
|
# ... |
1234
|
|
|
# 2015-11-27 18:00 # early close |
1235
|
|
|
# 2015-11-30 14:31 |
1236
|
|
|
# ... |
1237
|
|
|
# 2015-11-30 18:50 |
1238
|
|
|
|
1239
|
|
|
reference_values = pd.date_range( |
1240
|
|
|
start=self.futures_start_dates[self.FUTURE_ASSET], |
1241
|
|
|
end=futures_end_dt, |
1242
|
|
|
freq="T" |
1243
|
|
|
) |
1244
|
|
|
|
1245
|
|
|
for idx, dt in enumerate(window.index): |
1246
|
|
|
date_val = reference_values.searchsorted(dt) |
1247
|
|
|
self.assertEqual(offset + date_val, |
1248
|
|
|
window.iloc[idx][self.FUTURE_ASSET]) |
1249
|
|
|
|
1250
|
|
|
def test_history_minute_blended(self): |
1251
|
|
|
window = self.get_portal().get_history_window( |
1252
|
|
|
[self.FUTURE_ASSET2, self.AAPL], |
1253
|
|
|
pd.Timestamp("2014-03-21 20:00", tz='UTC'), |
1254
|
|
|
200, |
1255
|
|
|
"1m", |
1256
|
|
|
"price" |
1257
|
|
|
) |
1258
|
|
|
|
1259
|
|
|
# just a sanity check |
1260
|
|
|
self.assertEqual(200, len(window[self.AAPL])) |
1261
|
|
|
self.assertEqual(200, len(window[self.FUTURE_ASSET2])) |
1262
|
|
|
|
1263
|
|
|
def test_futures_history_daily(self): |
1264
|
|
|
# get 3 days ending 11/30 10:00 am Eastern |
1265
|
|
|
# = 11/25, 11/27 (half day), 11/30 (partial) |
1266
|
|
|
|
1267
|
|
|
window = self.get_portal().get_history_window( |
1268
|
|
|
[self.env.asset_finder.retrieve_asset(self.FUTURE_ASSET)], |
1269
|
|
|
pd.Timestamp("2015-11-30 15:00", tz='UTC'), |
1270
|
|
|
3, |
1271
|
|
|
"1d", |
1272
|
|
|
"high" |
1273
|
|
|
) |
1274
|
|
|
|
1275
|
|
|
self.assertEqual(3, len(window[self.FUTURE_ASSET])) |
1276
|
|
|
|
1277
|
|
|
np.testing.assert_array_equal([12929.0, 15629.0, 19769.0], |
1278
|
|
|
window.values.T[0]) |
1279
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.