Total Complexity | 92 |
Total Lines | 1251 |
Duplicated Lines | 0 % |
Complex classes like tests.HistoryTestCase often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | from os.path import dirname, join, realpath |
||
60 | class HistoryTestCase(TestCase): |
||
61 | @classmethod |
||
62 | def setUpClass(cls): |
||
63 | cls.AAPL = 1 |
||
64 | cls.MSFT = 2 |
||
65 | cls.DELL = 3 |
||
66 | cls.TSLA = 4 |
||
67 | cls.BRKA = 5 |
||
68 | cls.IBM = 6 |
||
69 | cls.GS = 7 |
||
70 | cls.C = 8 |
||
71 | cls.DIVIDEND_SID = 9 |
||
72 | cls.FUTURE_ASSET = 10 |
||
73 | cls.FUTURE_ASSET2 = 11 |
||
74 | cls.assets = [cls.AAPL, cls.MSFT, cls.DELL, cls.TSLA, cls.BRKA, |
||
75 | cls.IBM, cls.GS, cls.C, cls.DIVIDEND_SID] |
||
76 | |||
77 | asset_info = make_simple_asset_info( |
||
78 | cls.assets, |
||
79 | Timestamp('2014-03-03'), |
||
80 | Timestamp('2014-08-30'), |
||
81 | ['AAPL', 'MSFT', 'DELL', 'TSLA', 'BRKA', 'IBM', 'GS', 'C', |
||
82 | 'DIVIDEND_SID'] |
||
83 | ) |
||
84 | cls.env = TradingEnvironment() |
||
85 | |||
86 | cls.env.write_data( |
||
87 | equities_df=asset_info, |
||
88 | futures_data={ |
||
89 | cls.FUTURE_ASSET: { |
||
90 | "start_date": pd.Timestamp('2015-11-23', tz='UTC'), |
||
91 | "end_date": pd.Timestamp('2014-12-01', tz='UTC'), |
||
92 | 'symbol': 'TEST_FUTURE', |
||
93 | 'asset_type': 'future', |
||
94 | }, |
||
95 | cls.FUTURE_ASSET2: { |
||
96 | "start_date": pd.Timestamp('2014-03-19', tz='UTC'), |
||
97 | "end_date": pd.Timestamp('2014-03-22', tz='UTC'), |
||
98 | 'symbol': 'TEST_FUTURE2', |
||
99 | 'asset_type': 'future', |
||
100 | } |
||
101 | } |
||
102 | ) |
||
103 | |||
104 | cls.tempdir = TempDirectory() |
||
105 | cls.tempdir.create() |
||
106 | |||
107 | try: |
||
108 | cls.create_fake_minute_data(cls.tempdir) |
||
109 | |||
110 | cls.futures_start_dates = { |
||
111 | cls.FUTURE_ASSET: pd.Timestamp("2015-11-23 20:11", tz='UTC'), |
||
112 | cls.FUTURE_ASSET2: pd.Timestamp("2014-03-19 13:31", tz='UTC') |
||
113 | } |
||
114 | |||
115 | futures_tempdir = os.path.join(cls.tempdir.path, |
||
116 | 'futures', 'minutes') |
||
117 | os.makedirs(futures_tempdir) |
||
118 | cls.create_fake_futures_minute_data( |
||
119 | futures_tempdir, |
||
120 | cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET), |
||
121 | cls.futures_start_dates[cls.FUTURE_ASSET], |
||
122 | cls.futures_start_dates[cls.FUTURE_ASSET] + |
||
123 | timedelta(minutes=10000) |
||
124 | ) |
||
125 | |||
126 | # build data for FUTURE_ASSET2 from 2014-03-19 13:31 to |
||
127 | # 2014-03-21 20:00 |
||
128 | cls.create_fake_futures_minute_data( |
||
129 | futures_tempdir, |
||
130 | cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET2), |
||
131 | cls.futures_start_dates[cls.FUTURE_ASSET2], |
||
132 | cls.futures_start_dates[cls.FUTURE_ASSET2] + |
||
133 | timedelta(minutes=3270) |
||
134 | ) |
||
135 | |||
136 | cls.create_fake_daily_data(cls.tempdir) |
||
137 | |||
138 | splits = DataFrame([ |
||
139 | {'effective_date': str_to_seconds("2002-01-03"), |
||
140 | 'ratio': 0.5, |
||
141 | 'sid': cls.AAPL}, |
||
142 | {'effective_date': str_to_seconds("2014-03-20"), |
||
143 | 'ratio': 0.5, |
||
144 | 'sid': cls.AAPL}, |
||
145 | {'effective_date': str_to_seconds("2014-03-21"), |
||
146 | 'ratio': 0.5, |
||
147 | 'sid': cls.AAPL}, |
||
148 | {'effective_date': str_to_seconds("2014-04-01"), |
||
149 | 'ratio': 0.5, |
||
150 | 'sid': cls.IBM}, |
||
151 | {'effective_date': str_to_seconds("2014-07-01"), |
||
152 | 'ratio': 0.5, |
||
153 | 'sid': cls.IBM}, |
||
154 | {'effective_date': str_to_seconds("2014-07-07"), |
||
155 | 'ratio': 0.5, |
||
156 | 'sid': cls.IBM}], |
||
157 | columns=['effective_date', 'ratio', 'sid'], |
||
158 | ) |
||
159 | |||
160 | mergers = DataFrame([ |
||
161 | {'effective_date': str_to_seconds("2014-07-16"), |
||
162 | 'ratio': 0.5, |
||
163 | 'sid': cls.C} |
||
164 | ], |
||
165 | columns=['effective_date', 'ratio', 'sid']) |
||
166 | |||
167 | dividends = DataFrame([ |
||
168 | {'ex_date': |
||
169 | Timestamp("2014-03-18", tz='UTC').to_datetime64(), |
||
170 | 'record_date': |
||
171 | Timestamp("2014-03-19", tz='UTC').to_datetime64(), |
||
172 | 'declared_date': |
||
173 | Timestamp("2014-03-18", tz='UTC').to_datetime64(), |
||
174 | 'pay_date': |
||
175 | Timestamp("2014-03-20", tz='UTC').to_datetime64(), |
||
176 | 'amount': 2.0, |
||
177 | 'sid': cls.DIVIDEND_SID}, |
||
178 | {'ex_date': |
||
179 | Timestamp("2014-03-20", tz='UTC').to_datetime64(), |
||
180 | 'record_date': |
||
181 | Timestamp("2014-03-21", tz='UTC').to_datetime64(), |
||
182 | 'declared_date': |
||
183 | Timestamp("2014-03-18", tz='UTC').to_datetime64(), |
||
184 | 'pay_date': |
||
185 | Timestamp("2014-03-23", tz='UTC').to_datetime64(), |
||
186 | 'amount': 4.0, |
||
187 | 'sid': cls.DIVIDEND_SID}], |
||
188 | columns=['ex_date', |
||
189 | 'record_date', |
||
190 | 'declared_date', |
||
191 | 'pay_date', |
||
192 | 'amount', |
||
193 | 'sid']) |
||
194 | |||
195 | cls.create_fake_adjustments(cls.tempdir, |
||
196 | "adjustments.sqlite", |
||
197 | splits=splits, |
||
198 | mergers=mergers, |
||
199 | dividends=dividends) |
||
200 | |||
201 | cls.data_portal = cls.get_portal( |
||
202 | daily_equities_filename="test_daily_data.bcolz", |
||
203 | adjustments_filename="adjustments.sqlite" |
||
204 | ) |
||
205 | except: |
||
206 | cls.tempdir.cleanup() |
||
207 | raise |
||
208 | |||
209 | @classmethod |
||
210 | def tearDownClass(cls): |
||
211 | cls.tempdir.cleanup() |
||
212 | |||
213 | @classmethod |
||
214 | def create_fake_futures_minute_data(cls, tempdir, asset, start_dt, end_dt): |
||
215 | num_minutes = int((end_dt - start_dt).total_seconds() / 60) |
||
216 | |||
217 | # need to prepend one 0 per minute between normalize_date(start_dt) |
||
218 | # and start_dt |
||
219 | zeroes_buffer = \ |
||
220 | [0] * int((start_dt - |
||
221 | normalize_date(start_dt)).total_seconds() / 60) |
||
222 | |||
223 | future_df = pd.DataFrame({ |
||
224 | "open": np.array(zeroes_buffer + |
||
225 | list(range(0, num_minutes))) * 1000, |
||
226 | "high": np.array(zeroes_buffer + |
||
227 | list(range(10000, 10000 + num_minutes))) * 1000, |
||
228 | "low": np.array(zeroes_buffer + |
||
229 | list(range(20000, 20000 + num_minutes))) * 1000, |
||
230 | "close": np.array(zeroes_buffer + |
||
231 | list(range(30000, 30000 + num_minutes))) * 1000, |
||
232 | "volume": np.array(zeroes_buffer + |
||
233 | list(range(40000, 40000 + num_minutes))) |
||
234 | }) |
||
235 | |||
236 | path = join(tempdir, "{0}.bcolz".format(asset.sid)) |
||
237 | ctable = bcolz.ctable.fromdataframe(future_df, rootdir=path) |
||
238 | |||
239 | ctable.attrs["start_dt"] = start_dt.value / 1e9 |
||
240 | ctable.attrs["last_dt"] = end_dt.value / 1e9 |
||
241 | |||
242 | @classmethod |
||
243 | def create_fake_minute_data(cls, tempdir): |
||
244 | resources = { |
||
245 | cls.AAPL: join(TEST_MINUTE_RESOURCE_PATH, 'AAPL_minute.csv.gz'), |
||
246 | cls.MSFT: join(TEST_MINUTE_RESOURCE_PATH, 'MSFT_minute.csv.gz'), |
||
247 | cls.DELL: join(TEST_MINUTE_RESOURCE_PATH, 'DELL_minute.csv.gz'), |
||
248 | cls.TSLA: join(TEST_MINUTE_RESOURCE_PATH, "TSLA_minute.csv.gz"), |
||
249 | cls.BRKA: join(TEST_MINUTE_RESOURCE_PATH, "BRKA_minute.csv.gz"), |
||
250 | cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"), |
||
251 | cls.GS: |
||
252 | join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"), # unused |
||
253 | cls.C: join(TEST_MINUTE_RESOURCE_PATH, "C_minute.csv.gz"), |
||
254 | cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH, |
||
255 | "DIVIDEND_minute.csv.gz"), |
||
256 | } |
||
257 | |||
258 | equities_tempdir = os.path.join(tempdir.path, 'equity', 'minutes') |
||
259 | os.makedirs(equities_tempdir) |
||
260 | |||
261 | MinuteBarWriterFromCSVs(resources, |
||
262 | pd.Timestamp('2002-01-02', tz='UTC')).write( |
||
263 | equities_tempdir, cls.assets) |
||
264 | |||
265 | @classmethod |
||
266 | def create_fake_daily_data(cls, tempdir): |
||
267 | resources = { |
||
268 | cls.AAPL: join(TEST_DAILY_RESOURCE_PATH, 'AAPL.csv'), |
||
269 | cls.MSFT: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'), |
||
270 | cls.DELL: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'), # unused |
||
271 | cls.TSLA: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'), # unused |
||
272 | cls.BRKA: join(TEST_DAILY_RESOURCE_PATH, 'BRK-A.csv'), |
||
273 | cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, 'IBM_daily.csv.gz'), |
||
274 | cls.GS: join(TEST_MINUTE_RESOURCE_PATH, 'GS_daily.csv.gz'), |
||
275 | cls.C: join(TEST_MINUTE_RESOURCE_PATH, 'C_daily.csv.gz'), |
||
276 | cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH, |
||
277 | 'DIVIDEND_daily.csv.gz') |
||
278 | } |
||
279 | raw_data = { |
||
280 | asset: read_csv(path, parse_dates=['day']).set_index('day') |
||
281 | for asset, path in iteritems(resources) |
||
282 | } |
||
283 | for frame in raw_data.values(): |
||
284 | frame['price'] = frame['close'] |
||
285 | |||
286 | writer = DailyBarWriterFromCSVs(resources) |
||
287 | data_path = tempdir.getpath('test_daily_data.bcolz') |
||
288 | writer.write(data_path, trading_days, cls.assets) |
||
289 | |||
290 | @classmethod |
||
291 | def create_fake_adjustments(cls, tempdir, filename, |
||
292 | splits=None, mergers=None, dividends=None): |
||
293 | writer = SQLiteAdjustmentWriter(tempdir.getpath(filename), |
||
294 | cls.env.trading_days, |
||
295 | MockDailyBarReader()) |
||
296 | |||
297 | if dividends is None: |
||
298 | dividends = DataFrame( |
||
299 | { |
||
300 | # Hackery to make the dtypes correct on an empty frame. |
||
301 | 'ex_date': array([], dtype='datetime64[ns]'), |
||
302 | 'pay_date': array([], dtype='datetime64[ns]'), |
||
303 | 'record_date': array([], dtype='datetime64[ns]'), |
||
304 | 'declared_date': array([], dtype='datetime64[ns]'), |
||
305 | 'amount': array([], dtype=float), |
||
306 | 'sid': array([], dtype=int), |
||
307 | }, |
||
308 | index=DatetimeIndex([], tz='UTC'), |
||
309 | columns=['ex_date', |
||
310 | 'pay_date', |
||
311 | 'record_date', |
||
312 | 'declared_date', |
||
313 | 'amount', |
||
314 | 'sid'] |
||
315 | ) |
||
316 | |||
317 | if splits is None: |
||
318 | splits = DataFrame( |
||
319 | { |
||
320 | # Hackery to make the dtypes correct on an empty frame. |
||
321 | 'effective_date': array([], dtype=int), |
||
322 | 'ratio': array([], dtype=float), |
||
323 | 'sid': array([], dtype=int), |
||
324 | }, |
||
325 | index=DatetimeIndex([], tz='UTC')) |
||
326 | |||
327 | if mergers is None: |
||
328 | mergers = DataFrame( |
||
329 | { |
||
330 | # Hackery to make the dtypes correct on an empty frame. |
||
331 | 'effective_date': array([], dtype=int), |
||
332 | 'ratio': array([], dtype=float), |
||
333 | 'sid': array([], dtype=int), |
||
334 | }, |
||
335 | index=DatetimeIndex([], tz='UTC')) |
||
336 | |||
337 | writer.write(splits, mergers, dividends) |
||
338 | |||
339 | @classmethod |
||
340 | def get_portal(cls, |
||
341 | daily_equities_filename="test_daily_data.bcolz", |
||
342 | adjustments_filename="adjustments.sqlite", |
||
343 | env=None): |
||
344 | |||
345 | if env is None: |
||
346 | env = cls.env |
||
347 | |||
348 | temp_path = cls.tempdir.path |
||
349 | |||
350 | minutes_path = os.path.join(temp_path, 'equity', 'minutes') |
||
351 | futures_path = os.path.join(temp_path, 'futures', 'minutes') |
||
352 | |||
353 | adjustment_reader = SQLiteAdjustmentReader( |
||
354 | join(temp_path, adjustments_filename)) |
||
355 | |||
356 | equity_minute_reader = BcolzMinuteBarReader(minutes_path) |
||
357 | |||
358 | equity_daily_reader = BcolzDailyBarReader( |
||
359 | join(temp_path, daily_equities_filename)) |
||
360 | |||
361 | future_minute_reader = FutureMinuteReader(futures_path) |
||
362 | |||
363 | return DataPortal( |
||
364 | env, |
||
365 | equity_minute_reader=equity_minute_reader, |
||
366 | future_minute_reader=future_minute_reader, |
||
367 | equity_daily_reader=equity_daily_reader, |
||
368 | adjustment_reader=adjustment_reader |
||
369 | ) |
||
370 | |||
371 | def test_history_in_initialize(self): |
||
372 | algo_text = dedent( |
||
373 | """\ |
||
374 | from zipline.api import history |
||
375 | |||
376 | def initialize(context): |
||
377 | history([24], 10, '1d', 'price') |
||
378 | |||
379 | def handle_data(context, data): |
||
380 | pass |
||
381 | """ |
||
382 | ) |
||
383 | |||
384 | start = pd.Timestamp('2007-04-05', tz='UTC') |
||
385 | end = pd.Timestamp('2007-04-10', tz='UTC') |
||
386 | |||
387 | sim_params = SimulationParameters( |
||
388 | period_start=start, |
||
389 | period_end=end, |
||
390 | capital_base=float("1.0e5"), |
||
391 | data_frequency='minute', |
||
392 | emission_rate='daily', |
||
393 | env=self.env, |
||
394 | ) |
||
395 | |||
396 | test_algo = TradingAlgorithm( |
||
397 | script=algo_text, |
||
398 | data_frequency='minute', |
||
399 | sim_params=sim_params, |
||
400 | env=self.env, |
||
401 | ) |
||
402 | |||
403 | with self.assertRaises(HistoryInInitialize): |
||
404 | test_algo.initialize() |
||
405 | |||
406 | def test_minute_basic_functionality(self): |
||
407 | # get a 5-bar minute history from the very end of the available data |
||
408 | window = self.data_portal.get_history_window( |
||
409 | [1], |
||
410 | pd.Timestamp("2014-03-21 18:23:00+00:00", tz='UTC'), |
||
411 | 5, |
||
412 | "1m", |
||
413 | "open_price" |
||
414 | ) |
||
415 | |||
416 | self.assertEqual(len(window), 5) |
||
417 | reference = [534.469, 534.471, 534.475, 534.477, 534.477] |
||
418 | for i in range(0, 4): |
||
419 | self.assertEqual(window.iloc[-5 + i].loc[1], reference[i]) |
||
420 | |||
421 | def test_minute_splits(self): |
||
422 | portal = self.data_portal |
||
423 | |||
424 | window = portal.get_history_window( |
||
425 | [1], |
||
426 | pd.Timestamp("2014-03-21 18:30:00+00:00", tz='UTC'), |
||
427 | 1000, |
||
428 | "1m", |
||
429 | "open_price" |
||
430 | ) |
||
431 | |||
432 | self.assertEqual(len(window), 1000) |
||
433 | |||
434 | # there are two splits for AAPL (on 2014-03-20 and 2014-03-21), |
||
435 | # each with ratio 0.5). |
||
436 | |||
437 | day1_end = pd.Timestamp("2014-03-19 20:00", tz='UTC') |
||
438 | day2_start = pd.Timestamp("2014-03-20 13:31", tz='UTC') |
||
439 | day2_end = pd.Timestamp("2014-03-20 20:00", tz='UTC') |
||
440 | day3_start = pd.Timestamp("2014-03-21 13:31", tz='UTC') |
||
441 | |||
442 | self.assertEquals(window.loc[day1_end, 1], 533.086) |
||
443 | self.assertEquals(window.loc[day2_start, 1], 533.087) |
||
444 | self.assertEquals(window.loc[day2_end, 1], 533.853) |
||
445 | self.assertEquals(window.loc[day3_start, 1], 533.854) |
||
446 | |||
447 | def test_minute_window_starts_before_trading_start(self): |
||
448 | portal = self.data_portal |
||
449 | |||
450 | # get a 50-bar minute history for MSFT starting 5 minutes into 3/20, |
||
451 | # its first trading day |
||
452 | window = portal.get_history_window( |
||
453 | [2], |
||
454 | pd.Timestamp("2014-03-20 13:35:00", tz='UTC'), |
||
455 | 50, |
||
456 | "1m", |
||
457 | "high", |
||
458 | ) |
||
459 | |||
460 | self.assertEqual(len(window), 50) |
||
461 | reference = [107.081, 109.476, 102.316, 107.861, 106.040] |
||
462 | for i in range(0, 4): |
||
463 | self.assertEqual(window.iloc[-5 + i].loc[2], reference[i]) |
||
464 | |||
465 | # get history for two securities at the same time, where one starts |
||
466 | # trading a day later than the other |
||
467 | window2 = portal.get_history_window( |
||
468 | [1, 2], |
||
469 | pd.Timestamp("2014-03-20 13:35:00", tz='UTC'), |
||
470 | 50, |
||
471 | "1m", |
||
472 | "low", |
||
473 | ) |
||
474 | |||
475 | self.assertEqual(len(window2), 50) |
||
476 | reference2 = { |
||
477 | 1: [1059.318, 1055.914, 1061.136, 1063.698, 1055.964], |
||
478 | 2: [98.902, 99.841, 90.984, 99.891, 98.027] |
||
479 | } |
||
480 | |||
481 | for i in range(0, 45): |
||
482 | self.assertFalse(np.isnan(window2.iloc[i].loc[1])) |
||
483 | |||
484 | # there should be 45 NaNs for MSFT until it starts trading |
||
485 | self.assertTrue(np.isnan(window2.iloc[i].loc[2])) |
||
486 | |||
487 | for i in range(0, 4): |
||
488 | self.assertEquals(window2.iloc[-5 + i].loc[1], |
||
489 | reference2[1][i]) |
||
490 | self.assertEquals(window2.iloc[-5 + i].loc[2], |
||
491 | reference2[2][i]) |
||
492 | |||
493 | def test_minute_window_ends_before_trading_start(self): |
||
|
|||
494 | # entire window is before the trading start |
||
495 | window = self.data_portal.get_history_window( |
||
496 | [2], |
||
497 | pd.Timestamp("2014-02-05 14:35:00", tz='UTC'), |
||
498 | 100, |
||
499 | "1m", |
||
500 | "high" |
||
501 | ) |
||
502 | |||
503 | self.assertEqual(len(window), 100) |
||
504 | for i in range(0, 100): |
||
505 | self.assertTrue(np.isnan(window.iloc[i].loc[2])) |
||
506 | |||
507 | def test_minute_window_ends_after_trading_end(self): |
||
508 | portal = self.data_portal |
||
509 | |||
510 | window = portal.get_history_window( |
||
511 | [2], |
||
512 | pd.Timestamp("2014-03-24 13:35:00", tz='UTC'), |
||
513 | 50, |
||
514 | "1m", |
||
515 | "high", |
||
516 | ) |
||
517 | |||
518 | # should be 45 non-NaNs then 5 NaNs as MSFT has stopped trading at |
||
519 | # the end of the day 2014-03-21 (and the 22nd and 23rd is weekend) |
||
520 | self.assertEqual(len(window), 50) |
||
521 | |||
522 | for i in range(0, 45): |
||
523 | self.assertFalse(np.isnan(window.iloc[i].loc[2])) |
||
524 | |||
525 | for i in range(46, 50): |
||
526 | self.assertTrue(np.isnan(window.iloc[i].loc[2])) |
||
527 | |||
528 | def test_minute_window_starts_after_trading_end(self): |
||
529 | # entire window is after the trading end |
||
530 | window = self.data_portal.get_history_window( |
||
531 | [2], |
||
532 | pd.Timestamp("2014-04-02 14:35:00", tz='UTC'), |
||
533 | 100, |
||
534 | "1m", |
||
535 | "high" |
||
536 | ) |
||
537 | |||
538 | self.assertEqual(len(window), 100) |
||
539 | for i in range(0, 100): |
||
540 | self.assertTrue(np.isnan(window.iloc[i].loc[2])) |
||
541 | |||
542 | def test_minute_window_starts_before_1_2_2002(self): |
||
543 | window = self.data_portal.get_history_window( |
||
544 | [3], |
||
545 | pd.Timestamp("2002-01-02 14:35:00", tz='UTC'), |
||
546 | 50, |
||
547 | "1m", |
||
548 | "close_price" |
||
549 | ) |
||
550 | |||
551 | self.assertEqual(len(window), 50) |
||
552 | for i in range(0, 45): |
||
553 | self.assertTrue(np.isnan(window.iloc[i].loc[3])) |
||
554 | |||
555 | for i in range(46, 50): |
||
556 | self.assertFalse(np.isnan(window.iloc[i].loc[3])) |
||
557 | |||
558 | def test_minute_early_close(self): |
||
559 | # market was closed early on 7/3, and that's reflected in our |
||
560 | # fake IBM minute data. also, IBM had a split that takes effect |
||
561 | # right after the early close. |
||
562 | |||
563 | # five minutes into the day after an early close, get 20 1m bars |
||
564 | window = self.data_portal.get_history_window( |
||
565 | [self.IBM], |
||
566 | pd.Timestamp("2014-07-07 13:35:00", tz='UTC'), |
||
567 | 20, |
||
568 | "1m", |
||
569 | "high" |
||
570 | ) |
||
571 | |||
572 | self.assertEqual(len(window), 20) |
||
573 | |||
574 | reference = [27134.486, 27134.802, 27134.660, 27132.813, 27130.964, |
||
575 | 27133.767, 27133.268, 27131.510, 27134.946, 27132.400, |
||
576 | 27134.350, 27130.588, 27132.528, 27130.418, 27131.040, |
||
577 | 27132.664, 27131.307, 27133.978, 27132.779, 27134.476] |
||
578 | |||
579 | for i in range(0, 20): |
||
580 | self.assertAlmostEquals(window.iloc[i].loc[self.IBM], reference[i]) |
||
581 | |||
582 | def test_minute_merger(self): |
||
583 | def check(field, ref): |
||
584 | window = self.data_portal.get_history_window( |
||
585 | [self.C], |
||
586 | pd.Timestamp("2014-07-16 13:35", tz='UTC'), |
||
587 | 10, |
||
588 | "1m", |
||
589 | field |
||
590 | ) |
||
591 | |||
592 | self.assertEqual(len(window), len(ref)) |
||
593 | |||
594 | for i in range(0, len(ref) - 1): |
||
595 | self.assertEquals(window.iloc[i].loc[self.C], ref[i]) |
||
596 | |||
597 | open_ref = [71.99, 71.991, 71.992, 71.996, 71.996, |
||
598 | 72.000, 72.001, 72.002, 72.004, 72.005] |
||
599 | high_ref = [77.334, 80.196, 80.387, 72.331, 79.184, |
||
600 | 75.439, 81.176, 78.564, 80.498, 82.000] |
||
601 | low_ref = [62.621, 70.427, 65.572, 68.357, 63.623, |
||
602 | 69.805, 67.245, 64.238, 64.487, 71.864] |
||
603 | close_ref = [69.977, 75.311, 72.979, 70.344, 71.403, |
||
604 | 72.622, 74.210, 71.401, 72.492, 73.669] |
||
605 | vol_ref = [12663, 12662, 12661, 12661, 12660, 12661, |
||
606 | 12663, 12662, 12663, 12662] |
||
607 | |||
608 | check("open_price", open_ref) |
||
609 | check("high", high_ref) |
||
610 | check("low", low_ref) |
||
611 | check("close_price", close_ref) |
||
612 | check("price", close_ref) |
||
613 | check("volume", vol_ref) |
||
614 | |||
615 | def test_minute_forward_fill(self): |
||
616 | # only forward fill if ffill=True AND we are asking for "price" |
||
617 | |||
618 | # our fake TSLA data (sid 4) is missing a bunch of minute bars |
||
619 | # right after the open on 2002-01-02 |
||
620 | |||
621 | for field in ["open_price", "high", "low", "volume", "close_price"]: |
||
622 | no_ffill = self.data_portal.get_history_window( |
||
623 | [4], |
||
624 | pd.Timestamp("2002-01-02 21:00:00", tz='UTC'), |
||
625 | 390, |
||
626 | "1m", |
||
627 | field |
||
628 | ) |
||
629 | |||
630 | missing_bar_indices = [1, 3, 5, 7, 9, 11, 13] |
||
631 | if field == 'volume': |
||
632 | for bar_idx in missing_bar_indices: |
||
633 | self.assertEqual(no_ffill.iloc[bar_idx].loc[4], 0) |
||
634 | else: |
||
635 | for bar_idx in missing_bar_indices: |
||
636 | self.assertTrue(np.isnan(no_ffill.iloc[bar_idx].loc[4])) |
||
637 | |||
638 | ffill_window = self.data_portal.get_history_window( |
||
639 | [4], |
||
640 | pd.Timestamp("2002-01-02 21:00:00", tz='UTC'), |
||
641 | 390, |
||
642 | "1m", |
||
643 | "price" |
||
644 | ) |
||
645 | |||
646 | for i in range(0, 390): |
||
647 | self.assertFalse(np.isnan(ffill_window.iloc[i].loc[4])) |
||
648 | |||
649 | # 2002-01-02 14:31:00+00:00 126.183 |
||
650 | # 2002-01-02 14:32:00+00:00 126.183 |
||
651 | # 2002-01-02 14:33:00+00:00 125.648 |
||
652 | # 2002-01-02 14:34:00+00:00 125.648 |
||
653 | # 2002-01-02 14:35:00+00:00 126.016 |
||
654 | # 2002-01-02 14:36:00+00:00 126.016 |
||
655 | # 2002-01-02 14:37:00+00:00 127.918 |
||
656 | # 2002-01-02 14:38:00+00:00 127.918 |
||
657 | # 2002-01-02 14:39:00+00:00 126.423 |
||
658 | # 2002-01-02 14:40:00+00:00 126.423 |
||
659 | # 2002-01-02 14:41:00+00:00 129.825 |
||
660 | # 2002-01-02 14:42:00+00:00 129.825 |
||
661 | # 2002-01-02 14:43:00+00:00 125.392 |
||
662 | # 2002-01-02 14:44:00+00:00 125.392 |
||
663 | |||
664 | vals = [126.183, 125.648, 126.016, 127.918, 126.423, 129.825, 125.392] |
||
665 | for idx, val in enumerate(vals): |
||
666 | self.assertEqual(ffill_window.iloc[2 * idx].loc[4], val) |
||
667 | self.assertEqual(ffill_window.iloc[(2 * idx) + 1].loc[4], val) |
||
668 | |||
669 | # make sure that if we pass ffill=False with field="price", we do |
||
670 | # not ffill |
||
671 | really_no_ffill_window = self.data_portal.get_history_window( |
||
672 | [4], |
||
673 | pd.Timestamp("2002-01-02 21:00:00", tz='UTC'), |
||
674 | 390, |
||
675 | "1m", |
||
676 | "price", |
||
677 | ffill=False |
||
678 | ) |
||
679 | |||
680 | for idx, val in enumerate(vals): |
||
681 | idx1 = 2 * idx |
||
682 | idx2 = idx1 + 1 |
||
683 | self.assertEqual(really_no_ffill_window.iloc[idx1].loc[4], val) |
||
684 | self.assertTrue(np.isnan(really_no_ffill_window.iloc[idx2].loc[4])) |
||
685 | |||
686 | def test_daily_functionality(self): |
||
687 | # 9 daily bars |
||
688 | # 2014-03-10,183999.0,186400.0,183601.0,186400.0,400 |
||
689 | # 2014-03-11,186925.0,187490.0,185910.0,187101.0,600 |
||
690 | # 2014-03-12,186498.0,187832.0,186005.0,187750.0,300 |
||
691 | # 2014-03-13,188150.0,188852.0,185254.0,185750.0,700 |
||
692 | # 2014-03-14,185825.0,186507.0,183418.0,183860.0,600 |
||
693 | # 2014-03-17,184350.0,185790.0,184350.0,185050.0,400 |
||
694 | # 2014-03-18,185400.0,185400.0,183860.0,184860.0,200 |
||
695 | # 2014-03-19,184860.0,185489.0,182764.0,183860.0,200 |
||
696 | # 2014-03-20,183999.0,186742.0,183630.0,186540.0,300 |
||
697 | |||
698 | # 5 one-minute bars that will be aggregated |
||
699 | # 2014-03-21 13:31:00+00:00,185422401,185426332,185413974,185420153,304 |
||
700 | # 2014-03-21 13:32:00+00:00,185422402,185424165,185417717,185420941,300 |
||
701 | # 2014-03-21 13:33:00+00:00,185422403,185430663,185419420,185425041,303 |
||
702 | # 2014-03-21 13:34:00+00:00,185422403,185431290,185417079,185424184,302 |
||
703 | # 2014-03-21 13:35:00+00:00,185422405,185430210,185416293,185423251,302 |
||
704 | |||
705 | def run_query(field, values): |
||
706 | window = self.data_portal.get_history_window( |
||
707 | [self.BRKA], |
||
708 | pd.Timestamp("2014-03-21 13:35", tz='UTC'), |
||
709 | 10, |
||
710 | "1d", |
||
711 | field |
||
712 | ) |
||
713 | |||
714 | self.assertEqual(len(window), 10) |
||
715 | |||
716 | for i in range(0, 10): |
||
717 | self.assertEquals(window.iloc[i].loc[self.BRKA], |
||
718 | values[i]) |
||
719 | |||
720 | # last value is the first minute's open |
||
721 | opens = [183999, 186925, 186498, 188150, 185825, 184350, |
||
722 | 185400, 184860, 183999, 185422.401] |
||
723 | |||
724 | # last value is the last minute's close |
||
725 | closes = [186400, 187101, 187750, 185750, 183860, 185050, |
||
726 | 184860, 183860, 186540, 185423.251] |
||
727 | |||
728 | # last value is the highest high value |
||
729 | highs = [186400, 187490, 187832, 188852, 186507, 185790, |
||
730 | 185400, 185489, 186742, 185431.290] |
||
731 | |||
732 | # last value is the lowest low value |
||
733 | lows = [183601, 185910, 186005, 185254, 183418, 184350, 183860, |
||
734 | 182764, 183630, 185413.974] |
||
735 | |||
736 | # last value is the sum of all the minute volumes |
||
737 | volumes = [400, 600, 300, 700, 600, 400, 200, 200, 300, 1511] |
||
738 | |||
739 | run_query("open_price", opens) |
||
740 | run_query("close_price", closes) |
||
741 | run_query("price", closes) |
||
742 | run_query("high", highs) |
||
743 | run_query("low", lows) |
||
744 | run_query("volume", volumes) |
||
745 | |||
746 | def test_daily_splits_with_no_minute_data(self): |
||
747 | # scenario is that we have daily data for AAPL through 6/11, |
||
748 | # but we have no minute data for AAPL on 6/11. there's also a split |
||
749 | # for AAPL on 6/9. |
||
750 | splits = DataFrame( |
||
751 | [ |
||
752 | { |
||
753 | 'effective_date': str_to_seconds('2014-06-09'), |
||
754 | 'ratio': (1 / 7.0), |
||
755 | 'sid': self.AAPL, |
||
756 | } |
||
757 | ], |
||
758 | columns=['effective_date', 'ratio', 'sid']) |
||
759 | |||
760 | self.create_fake_adjustments(self.tempdir, |
||
761 | "adjustments2.sqlite", |
||
762 | splits=splits) |
||
763 | |||
764 | portal = self.get_portal(adjustments_filename="adjustments2.sqlite") |
||
765 | |||
766 | def test_window(field, reference, ffill=True): |
||
767 | window = portal.get_history_window( |
||
768 | [self.AAPL], |
||
769 | pd.Timestamp("2014-06-11 15:30", tz='UTC'), |
||
770 | 6, |
||
771 | "1d", |
||
772 | field, |
||
773 | ffill |
||
774 | ) |
||
775 | |||
776 | self.assertEqual(len(window), 6) |
||
777 | |||
778 | for i in range(0, 5): |
||
779 | self.assertEquals(window.iloc[i].loc[self.AAPL], |
||
780 | reference[i]) |
||
781 | |||
782 | if ffill and field == "price": |
||
783 | last_val = window.iloc[5].loc[self.AAPL] |
||
784 | second_to_last_val = window.iloc[4].loc[self.AAPL] |
||
785 | |||
786 | self.assertEqual(last_val, second_to_last_val) |
||
787 | else: |
||
788 | if field == "volume": |
||
789 | self.assertEqual(window.iloc[5].loc[self.AAPL], 0) |
||
790 | else: |
||
791 | self.assertTrue(np.isnan(window.iloc[5].loc[self.AAPL])) |
||
792 | |||
793 | # 2014-06-04,637.4400099999999,647.8899690000001,636.110046,644.819992,p |
||
794 | # 2014-06-05,646.20005,649.370003,642.610008,647.349983,75951400 |
||
795 | # 2014-06-06,649.900002,651.259979,644.469971,645.570023,87484600 |
||
796 | # 2014-06-09,92.699997,93.879997,91.75,93.699997,75415000 |
||
797 | # 2014-06-10,94.730003,95.050003,93.57,94.25,62777000 |
||
798 | open_data = [91.063, 92.314, 92.843, 92.699, 94.730] |
||
799 | test_window("open_price", open_data, ffill=False) |
||
800 | test_window("open_price", open_data) |
||
801 | |||
802 | high_data = [92.556, 92.767, 93.037, 93.879, 95.050] |
||
803 | test_window("high", high_data, ffill=False) |
||
804 | test_window("high", high_data) |
||
805 | |||
806 | low_data = [90.873, 91.801, 92.067, 91.750, 93.570] |
||
807 | test_window("low", low_data, ffill=False) |
||
808 | test_window("low", low_data) |
||
809 | |||
810 | close_data = [92.117, 92.478, 92.224, 93.699, 94.250] |
||
811 | test_window("close_price", close_data, ffill=False) |
||
812 | test_window("close_price", close_data) |
||
813 | test_window("price", close_data, ffill=False) |
||
814 | test_window("price", close_data) |
||
815 | |||
816 | vol_data = [587093500, 531659800, 612392200, 75415000, 62777000] |
||
817 | test_window("volume", vol_data) |
||
818 | test_window("volume", vol_data, ffill=False) |
||
819 | |||
820 | def test_daily_window_starts_before_trading_start(self): |
||
821 | portal = self.data_portal |
||
822 | |||
823 | # MSFT started on 3/3/2014, so try to go before that |
||
824 | window = portal.get_history_window( |
||
825 | [self.MSFT], |
||
826 | pd.Timestamp("2014-03-05 13:35:00", tz='UTC'), |
||
827 | 5, |
||
828 | "1d", |
||
829 | "high" |
||
830 | ) |
||
831 | |||
832 | self.assertEqual(len(window), 5) |
||
833 | |||
834 | # should be two empty days, then 3/3 and 3/4, then |
||
835 | # an empty day because we don't have minute data for 3/5 |
||
836 | self.assertTrue(np.isnan(window.iloc[0].loc[self.MSFT])) |
||
837 | self.assertTrue(np.isnan(window.iloc[1].loc[self.MSFT])) |
||
838 | self.assertEquals(window.iloc[2].loc[self.MSFT], 38.130) |
||
839 | self.assertEquals(window.iloc[3].loc[self.MSFT], 38.48) |
||
840 | self.assertTrue(np.isnan(window.iloc[4].loc[self.MSFT])) |
||
841 | |||
842 | def test_daily_window_ends_before_trading_start(self): |
||
843 | portal = self.data_portal |
||
844 | |||
845 | # MSFT started on 3/3/2014, so try to go before that |
||
846 | window = portal.get_history_window( |
||
847 | [self.MSFT], |
||
848 | pd.Timestamp("2014-02-28 13:35:00", tz='UTC'), |
||
849 | 5, |
||
850 | "1d", |
||
851 | "high" |
||
852 | ) |
||
853 | |||
854 | self.assertEqual(len(window), 5) |
||
855 | for i in range(0, 5): |
||
856 | self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT])) |
||
857 | |||
858 | def test_daily_window_starts_after_trading_end(self): |
||
859 | # MSFT stopped trading EOD Friday 8/29/2014 |
||
860 | window = self.data_portal.get_history_window( |
||
861 | [self.MSFT], |
||
862 | pd.Timestamp("2014-09-12 13:35:00", tz='UTC'), |
||
863 | 8, |
||
864 | "1d", |
||
865 | "high", |
||
866 | ) |
||
867 | |||
868 | self.assertEqual(len(window), 8) |
||
869 | for i in range(0, 8): |
||
870 | self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT])) |
||
871 | |||
872 | def test_daily_window_ends_after_trading_end(self): |
||
873 | # MSFT stopped trading EOD Friday 8/29/2014 |
||
874 | window = self.data_portal.get_history_window( |
||
875 | [self.MSFT], |
||
876 | pd.Timestamp("2014-09-04 13:35:00", tz='UTC'), |
||
877 | 10, |
||
878 | "1d", |
||
879 | "high", |
||
880 | ) |
||
881 | |||
882 | # should be 7 non-NaNs (8/21-8/22, 8/25-8/29) and 3 NaNs (9/2 - 9/4) |
||
883 | # (9/1/2014 is labor day) |
||
884 | self.assertEqual(len(window), 10) |
||
885 | |||
886 | for i in range(0, 7): |
||
887 | self.assertFalse(np.isnan(window.iloc[i].loc[self.MSFT])) |
||
888 | |||
889 | for i in range(7, 10): |
||
890 | self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT])) |
||
891 | |||
892 | def test_empty_sid_list(self): |
||
893 | portal = self.data_portal |
||
894 | |||
895 | fields = ["open_price", |
||
896 | "close_price", |
||
897 | "high", |
||
898 | "low", |
||
899 | "volume", |
||
900 | "price"] |
||
901 | freqs = ["1m", "1d"] |
||
902 | |||
903 | for field in fields: |
||
904 | for freq in freqs: |
||
905 | window = portal.get_history_window( |
||
906 | [], |
||
907 | pd.Timestamp("2014-06-11 15:30", tz='UTC'), |
||
908 | 6, |
||
909 | freq, |
||
910 | field |
||
911 | ) |
||
912 | |||
913 | self.assertEqual(len(window), 6) |
||
914 | |||
915 | for i in range(0, 6): |
||
916 | self.assertEqual(len(window.iloc[i]), 0) |
||
917 | |||
918 | def test_daily_window_starts_before_minute_data(self): |
||
919 | |||
920 | env = TradingEnvironment() |
||
921 | asset_info = make_simple_asset_info( |
||
922 | [self.GS], |
||
923 | Timestamp('1999-04-05'), |
||
924 | Timestamp('2004-08-30'), |
||
925 | ['GS'] |
||
926 | ) |
||
927 | env.write_data(equities_df=asset_info) |
||
928 | portal = self.get_portal(env=env) |
||
929 | |||
930 | window = portal.get_history_window( |
||
931 | [self.GS], |
||
932 | # 3rd day of daily data for GS, minute data starts in 2002. |
||
933 | pd.Timestamp("1999-04-07 14:35:00", tz='UTC'), |
||
934 | 10, |
||
935 | "1d", |
||
936 | "low" |
||
937 | ) |
||
938 | |||
939 | # 12/20, 12/21, 12/24, 12/26, 12/27, 12/28, 12/31 should be NaNs |
||
940 | # 1/2 and 1/3 should be non-NaN |
||
941 | # 1/4 should be NaN (since we don't have minute data for it) |
||
942 | |||
943 | self.assertEqual(len(window), 10) |
||
944 | |||
945 | for i in range(0, 7): |
||
946 | self.assertTrue(np.isnan(window.iloc[i].loc[self.GS])) |
||
947 | |||
948 | for i in range(8, 9): |
||
949 | self.assertFalse(np.isnan(window.iloc[i].loc[self.GS])) |
||
950 | |||
951 | self.assertTrue(np.isnan(window.iloc[9].loc[self.GS])) |
||
952 | |||
953 | def test_minute_window_ends_before_1_2_2002(self): |
||
954 | with self.assertRaises(ValueError): |
||
955 | self.data_portal.get_history_window( |
||
956 | [self.GS], |
||
957 | pd.Timestamp("2001-12-31 14:35:00", tz='UTC'), |
||
958 | 50, |
||
959 | "1m", |
||
960 | "close_price" |
||
961 | ) |
||
962 | |||
963 | def test_bad_history_inputs(self): |
||
964 | portal = self.data_portal |
||
965 | |||
966 | # bad fieldname |
||
967 | for field in ["foo", "bar", "", "5"]: |
||
968 | with self.assertRaises(ValueError): |
||
969 | portal.get_history_window( |
||
970 | [self.AAPL], |
||
971 | pd.Timestamp("2014-06-11 15:30", tz='UTC'), |
||
972 | 6, |
||
973 | "1d", |
||
974 | field |
||
975 | ) |
||
976 | |||
977 | # bad frequency |
||
978 | for freq in ["2m", "30m", "3d", "300d", "", "5"]: |
||
979 | with self.assertRaises(ValueError): |
||
980 | portal.get_history_window( |
||
981 | [self.AAPL], |
||
982 | pd.Timestamp("2014-06-11 15:30", tz='UTC'), |
||
983 | 6, |
||
984 | freq, |
||
985 | "volume" |
||
986 | ) |
||
987 | |||
988 | def test_daily_merger(self): |
||
989 | def check(field, ref): |
||
990 | window = self.data_portal.get_history_window( |
||
991 | [self.C], |
||
992 | pd.Timestamp("2014-07-17 13:35", tz='UTC'), |
||
993 | 4, |
||
994 | "1d", |
||
995 | field |
||
996 | ) |
||
997 | |||
998 | self.assertEqual(len(window), len(ref),) |
||
999 | |||
1000 | for i in range(0, len(ref) - 1): |
||
1001 | self.assertEquals(window.iloc[i].loc[self.C], ref[i], i) |
||
1002 | |||
1003 | # 2014-07-14 00:00:00+00:00,139.18,139.14,139.2,139.17,12351 |
||
1004 | # 2014-07-15 00:00:00+00:00,139.2,139.2,139.18,139.19,12354 |
||
1005 | # 2014-07-16 00:00:00+00:00,69.58,69.56,69.57,69.565,12352 |
||
1006 | # 2014-07-17 13:31:00+00:00,72767,80146,63406,71776,12876 |
||
1007 | # 2014-07-17 13:32:00+00:00,72769,76943,68907,72925,12875 |
||
1008 | # 2014-07-17 13:33:00+00:00,72771,76127,63194,69660,12875 |
||
1009 | # 2014-07-17 13:34:00+00:00,72774,79349,69771,74560,12877 |
||
1010 | # 2014-07-17 13:35:00+00:00,72776,75340,68970,72155,12879 |
||
1011 | |||
1012 | open_ref = [69.59, 69.6, 69.58, 72.767] |
||
1013 | high_ref = [69.57, 69.6, 69.56, 80.146] |
||
1014 | low_ref = [69.6, 69.59, 69.57, 63.194] |
||
1015 | close_ref = [69.585, 69.595, 69.565, 72.155] |
||
1016 | vol_ref = [12351, 12354, 12352, 64382] |
||
1017 | |||
1018 | check("open_price", open_ref) |
||
1019 | check("high", high_ref) |
||
1020 | check("low", low_ref) |
||
1021 | check("close_price", close_ref) |
||
1022 | check("price", close_ref) |
||
1023 | check("volume", vol_ref) |
||
1024 | |||
1025 | def test_minute_adjustments_as_of_lookback_date(self): |
||
1026 | # AAPL has splits on 2014-03-20 and 2014-03-21 |
||
1027 | window_0320 = self.data_portal.get_history_window( |
||
1028 | [self.AAPL], |
||
1029 | pd.Timestamp("2014-03-20 13:35", tz='UTC'), |
||
1030 | 395, |
||
1031 | "1m", |
||
1032 | "open_price" |
||
1033 | ) |
||
1034 | |||
1035 | window_0321 = self.data_portal.get_history_window( |
||
1036 | [self.AAPL], |
||
1037 | pd.Timestamp("2014-03-21 13:35", tz='UTC'), |
||
1038 | 785, |
||
1039 | "1m", |
||
1040 | "open_price" |
||
1041 | ) |
||
1042 | |||
1043 | for i in range(0, 395): |
||
1044 | # history on 3/20, since the 3/21 0.5 split hasn't |
||
1045 | # happened yet, should return values 2x larger than history on |
||
1046 | # 3/21 |
||
1047 | self.assertEqual(window_0320.iloc[i].loc[self.AAPL], |
||
1048 | window_0321.iloc[i].loc[self.AAPL] * 2) |
||
1049 | |||
1050 | def test_daily_adjustments_as_of_lookback_date(self): |
||
1051 | window_0402 = self.data_portal.get_history_window( |
||
1052 | [self.IBM], |
||
1053 | pd.Timestamp("2014-04-02 13:35", tz='UTC'), |
||
1054 | 23, |
||
1055 | "1d", |
||
1056 | "open_price" |
||
1057 | ) |
||
1058 | |||
1059 | window_0702 = self.data_portal.get_history_window( |
||
1060 | [self.IBM], |
||
1061 | pd.Timestamp("2014-07-02 13:35", tz='UTC'), |
||
1062 | 86, |
||
1063 | "1d", |
||
1064 | "open_price" |
||
1065 | ) |
||
1066 | |||
1067 | for i in range(0, 22): |
||
1068 | self.assertEqual(window_0402.iloc[i].loc[self.IBM], |
||
1069 | window_0702.iloc[i].loc[self.IBM] * 2) |
||
1070 | |||
1071 | def test_minute_dividends(self): |
||
1072 | def check(field, ref): |
||
1073 | window = self.data_portal.get_history_window( |
||
1074 | [self.DIVIDEND_SID], |
||
1075 | pd.Timestamp("2014-03-18 13:35", tz='UTC'), |
||
1076 | 10, |
||
1077 | "1m", |
||
1078 | field |
||
1079 | ) |
||
1080 | |||
1081 | self.assertEqual(len(window), len(ref)) |
||
1082 | |||
1083 | np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref) |
||
1084 | |||
1085 | # the DIVIDEND stock has dividends on 2014-03-18 (0.98) |
||
1086 | # 2014-03-17 19:56:00+00:00,118923,123229,112445,117837,2273 |
||
1087 | # 2014-03-17 19:57:00+00:00,118927,122997,117911,120454,2274 |
||
1088 | # 2014-03-17 19:58:00+00:00,118930,129112,111136,120124,2274 |
||
1089 | # 2014-03-17 19:59:00+00:00,118932,126147,112112,119129,2276 |
||
1090 | # 2014-03-17 20:00:00+00:00,118932,124541,108717,116628,2275 |
||
1091 | # 2014-03-18 13:31:00+00:00,116457,120731,114148,117439,2274 |
||
1092 | # 2014-03-18 13:32:00+00:00,116461,116520,106572,111546,2275 |
||
1093 | # 2014-03-18 13:33:00+00:00,116461,117115,108506,112810,2274 |
||
1094 | # 2014-03-18 13:34:00+00:00,116461,119787,108861,114323,2273 |
||
1095 | # 2014-03-18 13:35:00+00:00,116464,117221,112698,114960,2272 |
||
1096 | |||
1097 | open_ref = [116.545, # 2014-03-17 19:56:00+00:00 |
||
1098 | 116.548, # 2014-03-17 19:57:00+00:00 |
||
1099 | 116.551, # 2014-03-17 19:58:00+00:00 |
||
1100 | 116.553, # 2014-03-17 19:59:00+00:00 |
||
1101 | 116.553, # 2014-03-17 20:00:00+00:00 |
||
1102 | 116.457, # 2014-03-18 13:31:00+00:00 |
||
1103 | 116.461, # 2014-03-18 13:32:00+00:00 |
||
1104 | 116.461, # 2014-03-18 13:33:00+00:00 |
||
1105 | 116.461, # 2014-03-18 13:34:00+00:00 |
||
1106 | 116.464] # 2014-03-18 13:35:00+00:00 |
||
1107 | |||
1108 | high_ref = [120.764, # 2014-03-17 19:56:00+00:00 |
||
1109 | 120.537, # 2014-03-17 19:57:00+00:00 |
||
1110 | 126.530, # 2014-03-17 19:58:00+00:00 |
||
1111 | 123.624, # 2014-03-17 19:59:00+00:00 |
||
1112 | 122.050, # 2014-03-17 20:00:00+00:00 |
||
1113 | 120.731, # 2014-03-18 13:31:00+00:00 |
||
1114 | 116.520, # 2014-03-18 13:32:00+00:00 |
||
1115 | 117.115, # 2014-03-18 13:33:00+00:00 |
||
1116 | 119.787, # 2014-03-18 13:34:00+00:00 |
||
1117 | 117.221] # 2014-03-18 13:35:00+00:00 |
||
1118 | |||
1119 | low_ref = [110.196, # 2014-03-17 19:56:00+00:00 |
||
1120 | 115.553, # 2014-03-17 19:57:00+00:00 |
||
1121 | 108.913, # 2014-03-17 19:58:00+00:00 |
||
1122 | 109.870, # 2014-03-17 19:59:00+00:00 |
||
1123 | 106.543, # 2014-03-17 20:00:00+00:00 |
||
1124 | 114.148, # 2014-03-18 13:31:00+00:00 |
||
1125 | 106.572, # 2014-03-18 13:32:00+00:00 |
||
1126 | 108.506, # 2014-03-18 13:33:00+00:00 |
||
1127 | 108.861, # 2014-03-18 13:34:00+00:00 |
||
1128 | 112.698] # 2014-03-18 13:35:00+00:00 |
||
1129 | |||
1130 | close_ref = [115.480, # 2014-03-17 19:56:00+00:00 |
||
1131 | 118.045, # 2014-03-17 19:57:00+00:00 |
||
1132 | 117.722, # 2014-03-17 19:58:00+00:00 |
||
1133 | 116.746, # 2014-03-17 19:59:00+00:00 |
||
1134 | 114.295, # 2014-03-17 20:00:00+00:00 |
||
1135 | 117.439, # 2014-03-18 13:31:00+00:00 |
||
1136 | 111.546, # 2014-03-18 13:32:00+00:00 |
||
1137 | 112.810, # 2014-03-18 13:33:00+00:00 |
||
1138 | 114.323, # 2014-03-18 13:34:00+00:00 |
||
1139 | 114.960] # 2014-03-18 13:35:00+00:00 |
||
1140 | |||
1141 | volume_ref = [2273, # 2014-03-17 19:56:00+00:00 |
||
1142 | 2274, # 2014-03-17 19:57:00+00:00 |
||
1143 | 2274, # 2014-03-17 19:58:00+00:00 |
||
1144 | 2276, # 2014-03-17 19:59:00+00:00 |
||
1145 | 2275, # 2014-03-17 20:00:00+00:00 |
||
1146 | 2274, # 2014-03-18 13:31:00+00:00 |
||
1147 | 2275, # 2014-03-18 13:32:00+00:00 |
||
1148 | 2274, # 2014-03-18 13:33:00+00:00 |
||
1149 | 2273, # 2014-03-18 13:34:00+00:00 |
||
1150 | 2272] # 2014-03-18 13:35:00+00:00 |
||
1151 | |||
1152 | check("open_price", open_ref) |
||
1153 | check("high", high_ref) |
||
1154 | check("low", low_ref) |
||
1155 | check("close_price", close_ref) |
||
1156 | check("price", close_ref) |
||
1157 | check("volume", volume_ref) |
||
1158 | |||
1159 | def test_daily_dividends(self): |
||
1160 | def check(field, ref): |
||
1161 | window = self.data_portal.get_history_window( |
||
1162 | [self.DIVIDEND_SID], |
||
1163 | pd.Timestamp("2014-03-21 13:35", tz='UTC'), |
||
1164 | 6, |
||
1165 | "1d", |
||
1166 | field |
||
1167 | ) |
||
1168 | |||
1169 | self.assertEqual(len(window), len(ref)) |
||
1170 | |||
1171 | np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref) |
||
1172 | |||
1173 | # 2014-03-14 00:00:00+00:00,106408,106527,103498,105012,950 |
||
1174 | # 2014-03-17 00:00:00+00:00,106411,110252,99877,105064,950 |
||
1175 | # 2014-03-18 00:00:00+00:00,104194,110891,95342,103116,972 |
||
1176 | # 2014-03-19 00:00:00+00:00,104198,107086,102615,104851,973 |
||
1177 | # 2014-03-20 00:00:00+00:00,100032,102989,92179,97584,1016 |
||
1178 | # 2014-03-21 13:31:00+00:00,114098,120818,110333,115575,2866 |
||
1179 | # 2014-03-21 13:32:00+00:00,114099,120157,105353,112755,2866 |
||
1180 | # 2014-03-21 13:33:00+00:00,114099,122263,108838,115550,2867 |
||
1181 | # 2014-03-21 13:34:00+00:00,114101,116620,106654,111637,2867 |
||
1182 | # 2014-03-21 13:35:00+00:00,114104,123773,107769,115771,2867 |
||
1183 | |||
1184 | open_ref = [100.108, # 2014-03-14 00:00:00+00:00 |
||
1185 | 100.111, # 2014-03-17 00:00:00+00:00 |
||
1186 | 100.026, # 2014-03-18 00:00:00+00:00 |
||
1187 | 100.030, # 2014-03-19 00:00:00+00:00 |
||
1188 | 100.032, # 2014-03-20 00:00:00+00:00 |
||
1189 | 114.098] # 2014-03-21 00:00:00+00:00 |
||
1190 | |||
1191 | high_ref = [100.221, # 2014-03-14 00:00:00+00:00 |
||
1192 | 103.725, # 2014-03-17 00:00:00+00:00 |
||
1193 | 106.455, # 2014-03-18 00:00:00+00:00 |
||
1194 | 102.803, # 2014-03-19 00:00:00+00:00 |
||
1195 | 102.988, # 2014-03-20 00:00:00+00:00 |
||
1196 | 123.773] # 2014-03-21 00:00:00+00:00 |
||
1197 | |||
1198 | low_ref = [97.370, # 2014-03-14 00:00:00+00:00 |
||
1199 | 93.964, # 2014-03-17 00:00:00+00:00 |
||
1200 | 91.528, # 2014-03-18 00:00:00+00:00 |
||
1201 | 98.510, # 2014-03-19 00:00:00+00:00 |
||
1202 | 92.179, # 2014-03-20 00:00:00+00:00 |
||
1203 | 105.353] # 2014-03-21 00:00:00+00:00 |
||
1204 | |||
1205 | close_ref = [98.795, # 2014-03-14 00:00:00+00:00 |
||
1206 | 98.844, # 2014-03-17 00:00:00+00:00 |
||
1207 | 98.991, # 2014-03-18 00:00:00+00:00 |
||
1208 | 100.657, # 2014-03-19 00:00:00+00:00 |
||
1209 | 97.584, # 2014-03-20 00:00:00+00:00 |
||
1210 | 115.771] # 2014-03-21 00:00:00+00:00 |
||
1211 | |||
1212 | volume_ref = [950, # 2014-03-14 00:00:00+00:00 |
||
1213 | 950, # 2014-03-17 00:00:00+00:00 |
||
1214 | 972, # 2014-03-18 00:00:00+00:00 |
||
1215 | 973, # 2014-03-19 00:00:00+00:00 |
||
1216 | 1016, # 2014-03-20 00:00:00+00:00 |
||
1217 | 14333] # 2014-03-21 00:00:00+00:00 |
||
1218 | |||
1219 | check("open_price", open_ref) |
||
1220 | check("high", high_ref) |
||
1221 | check("low", low_ref) |
||
1222 | check("close_price", close_ref) |
||
1223 | check("price", close_ref) |
||
1224 | check("volume", volume_ref) |
||
1225 | |||
1226 | @parameterized.expand([('open', 0), |
||
1227 | ('high', 10000), |
||
1228 | ('low', 20000), |
||
1229 | ('close', 30000), |
||
1230 | ('price', 30000), |
||
1231 | ('volume', 40000)]) |
||
1232 | def test_futures_history_minutes(self, field, offset): |
||
1233 | # our history data, for self.FUTURE_ASSET, is 10,000 bars starting at |
||
1234 | # self.futures_start_dt. Those 10k bars are 24/7. |
||
1235 | |||
1236 | # = 2015-11-30 18:50 UTC, 13:50 Eastern = during market hours |
||
1237 | futures_end_dt = \ |
||
1238 | self.futures_start_dates[self.FUTURE_ASSET] + \ |
||
1239 | timedelta(minutes=9999) |
||
1240 | |||
1241 | window = self.data_portal.get_history_window( |
||
1242 | [self.FUTURE_ASSET], |
||
1243 | futures_end_dt, |
||
1244 | 1000, |
||
1245 | "1m", |
||
1246 | field |
||
1247 | ) |
||
1248 | |||
1249 | # check the minutes are right |
||
1250 | reference_minutes = self.env.market_minute_window( |
||
1251 | futures_end_dt, 1000, step=-1 |
||
1252 | )[::-1] |
||
1253 | |||
1254 | np.testing.assert_array_equal(window.index, reference_minutes) |
||
1255 | |||
1256 | # check the values |
||
1257 | |||
1258 | # 2015-11-24 18:41 |
||
1259 | # ... |
||
1260 | # 2015-11-24 21:00 |
||
1261 | # 2015-11-25 14:31 |
||
1262 | # ... |
||
1263 | # 2015-11-25 21:00 |
||
1264 | # 2015-11-27 14:31 |
||
1265 | # ... |
||
1266 | # 2015-11-27 18:00 # early close |
||
1267 | # 2015-11-30 14:31 |
||
1268 | # ... |
||
1269 | # 2015-11-30 18:50 |
||
1270 | |||
1271 | reference_values = pd.date_range( |
||
1272 | start=self.futures_start_dates[self.FUTURE_ASSET], |
||
1273 | end=futures_end_dt, |
||
1274 | freq="T" |
||
1275 | ) |
||
1276 | |||
1277 | for idx, dt in enumerate(window.index): |
||
1278 | date_val = reference_values.searchsorted(dt) |
||
1279 | self.assertEqual(offset + date_val, |
||
1280 | window.iloc[idx][self.FUTURE_ASSET]) |
||
1281 | |||
1282 | def test_history_minute_blended(self): |
||
1283 | window = self.data_portal.get_history_window( |
||
1284 | [self.FUTURE_ASSET2, self.AAPL], |
||
1285 | pd.Timestamp("2014-03-21 20:00", tz='UTC'), |
||
1286 | 200, |
||
1287 | "1m", |
||
1288 | "price" |
||
1289 | ) |
||
1290 | |||
1291 | # just a sanity check |
||
1292 | self.assertEqual(200, len(window[self.AAPL])) |
||
1293 | self.assertEqual(200, len(window[self.FUTURE_ASSET2])) |
||
1294 | |||
1295 | def test_futures_history_daily(self): |
||
1296 | # get 3 days ending 11/30 10:00 am Eastern |
||
1297 | # = 11/25, 11/27 (half day), 11/30 (partial) |
||
1298 | |||
1299 | window = self.data_portal.get_history_window( |
||
1300 | [self.env.asset_finder.retrieve_asset(self.FUTURE_ASSET)], |
||
1301 | pd.Timestamp("2015-11-30 15:00", tz='UTC'), |
||
1302 | 3, |
||
1303 | "1d", |
||
1304 | "high" |
||
1305 | ) |
||
1306 | |||
1307 | self.assertEqual(3, len(window[self.FUTURE_ASSET])) |
||
1308 | |||
1309 | np.testing.assert_array_equal([12929.0, 15629.0, 19769.0], |
||
1310 | window.values.T[0]) |
||
1311 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.