Total Complexity | 98 |
Total Lines | 1375 |
Duplicated Lines | 0 % |
Complex classes like tests.HistoryTestCase often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | from os.path import dirname, join, realpath |
||
60 | class HistoryTestCase(TestCase): |
||
61 | @classmethod |
||
62 | def setUpClass(cls): |
||
63 | cls.AAPL = 1 |
||
64 | cls.MSFT = 2 |
||
65 | cls.DELL = 3 |
||
66 | cls.TSLA = 4 |
||
67 | cls.BRKA = 5 |
||
68 | cls.IBM = 6 |
||
69 | cls.GS = 7 |
||
70 | cls.C = 8 |
||
71 | cls.DIVIDEND_SID = 9 |
||
72 | cls.FUTURE_ASSET = 10 |
||
73 | cls.FUTURE_ASSET2 = 11 |
||
74 | cls.FUTURE_ASSET3 = 12 |
||
75 | cls.FOO = 13 |
||
76 | cls.assets = [cls.AAPL, cls.MSFT, cls.DELL, cls.TSLA, cls.BRKA, |
||
77 | cls.IBM, cls.GS, cls.C, cls.DIVIDEND_SID, cls.FOO] |
||
78 | |||
79 | asset_info = make_simple_asset_info( |
||
80 | cls.assets, |
||
81 | Timestamp('2014-03-03'), |
||
82 | Timestamp('2014-08-30'), |
||
83 | ['AAPL', 'MSFT', 'DELL', 'TSLA', 'BRKA', 'IBM', 'GS', 'C', |
||
84 | 'DIVIDEND_SID', 'FOO'] |
||
85 | ) |
||
86 | cls.env = TradingEnvironment() |
||
87 | |||
88 | cls.env.write_data( |
||
89 | equities_df=asset_info, |
||
90 | futures_data={ |
||
91 | cls.FUTURE_ASSET: { |
||
92 | "start_date": pd.Timestamp('2015-11-23', tz='UTC'), |
||
93 | "end_date": pd.Timestamp('2014-12-01', tz='UTC'), |
||
94 | 'symbol': 'TEST_FUTURE', |
||
95 | 'asset_type': 'future', |
||
96 | }, |
||
97 | cls.FUTURE_ASSET2: { |
||
98 | "start_date": pd.Timestamp('2014-03-19', tz='UTC'), |
||
99 | "end_date": pd.Timestamp('2014-03-22', tz='UTC'), |
||
100 | 'symbol': 'TEST_FUTURE2', |
||
101 | 'asset_type': 'future', |
||
102 | }, |
||
103 | cls.FUTURE_ASSET3: { |
||
104 | "start_date": pd.Timestamp('2014-03-19', tz='UTC'), |
||
105 | "end_date": pd.Timestamp('2014-03-22', tz='UTC'), |
||
106 | 'symbol': 'TEST_FUTURE3', |
||
107 | 'asset_type': 'future', |
||
108 | } |
||
109 | } |
||
110 | ) |
||
111 | |||
112 | cls.tempdir = TempDirectory() |
||
113 | cls.tempdir.create() |
||
114 | |||
115 | try: |
||
116 | cls.create_fake_minute_data(cls.tempdir) |
||
117 | |||
118 | cls.futures_start_dates = { |
||
119 | cls.FUTURE_ASSET: pd.Timestamp("2015-11-23 20:11", tz='UTC'), |
||
120 | cls.FUTURE_ASSET2: pd.Timestamp("2014-03-19 13:31", tz='UTC'), |
||
121 | cls.FUTURE_ASSET3: pd.Timestamp("2014-03-19 13:31", tz='UTC') |
||
122 | } |
||
123 | |||
124 | futures_tempdir = os.path.join(cls.tempdir.path, |
||
125 | 'futures', 'minutes') |
||
126 | os.makedirs(futures_tempdir) |
||
127 | cls.create_fake_futures_minute_data( |
||
128 | futures_tempdir, |
||
129 | cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET), |
||
130 | cls.futures_start_dates[cls.FUTURE_ASSET], |
||
131 | cls.futures_start_dates[cls.FUTURE_ASSET] + |
||
132 | timedelta(minutes=10000) |
||
133 | ) |
||
134 | |||
135 | # build data for FUTURE_ASSET2 from 2014-03-19 13:31 to |
||
136 | # 2014-03-21 20:00 |
||
137 | cls.create_fake_futures_minute_data( |
||
138 | futures_tempdir, |
||
139 | cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET2), |
||
140 | cls.futures_start_dates[cls.FUTURE_ASSET2], |
||
141 | cls.futures_start_dates[cls.FUTURE_ASSET2] + |
||
142 | timedelta(minutes=3270) |
||
143 | ) |
||
144 | |||
145 | # build data for FUTURE_ASSET3 from 2014-03-19 13:31 to |
||
146 | # 2014-03-21 20:00. |
||
147 | # Pause trading between 2014-03-20 14:00 and 2014-03-20 15:00 |
||
148 | gap_start = pd.Timestamp('2014-03-20 14:00', tz='UTC') |
||
149 | gap_end = pd.Timestamp('2014-03-20 15:00', tz='UTC') |
||
150 | cls.create_fake_futures_minute_data( |
||
151 | futures_tempdir, |
||
152 | cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET3), |
||
153 | cls.futures_start_dates[cls.FUTURE_ASSET3], |
||
154 | cls.futures_start_dates[cls.FUTURE_ASSET3] + |
||
155 | timedelta(minutes=3270), |
||
156 | gap_start_dt=gap_start, |
||
157 | gap_end_dt=gap_end, |
||
158 | ) |
||
159 | |||
160 | cls.create_fake_daily_data(cls.tempdir) |
||
161 | |||
162 | splits = DataFrame([ |
||
163 | {'effective_date': str_to_seconds("2002-01-03"), |
||
164 | 'ratio': 0.5, |
||
165 | 'sid': cls.AAPL}, |
||
166 | {'effective_date': str_to_seconds("2014-03-20"), |
||
167 | 'ratio': 0.5, |
||
168 | 'sid': cls.AAPL}, |
||
169 | {'effective_date': str_to_seconds("2014-03-21"), |
||
170 | 'ratio': 0.5, |
||
171 | 'sid': cls.AAPL}, |
||
172 | {'effective_date': str_to_seconds("2014-04-01"), |
||
173 | 'ratio': 0.5, |
||
174 | 'sid': cls.IBM}, |
||
175 | {'effective_date': str_to_seconds("2014-07-01"), |
||
176 | 'ratio': 0.5, |
||
177 | 'sid': cls.IBM}, |
||
178 | {'effective_date': str_to_seconds("2014-07-07"), |
||
179 | 'ratio': 0.5, |
||
180 | 'sid': cls.IBM}, |
||
181 | {'effective_date': str_to_seconds("2002-03-21"), |
||
182 | 'ratio': 0.5, |
||
183 | 'sid': cls.FOO}, |
||
184 | ], |
||
185 | columns=['effective_date', 'ratio', 'sid'], |
||
186 | ) |
||
187 | |||
188 | mergers = DataFrame([ |
||
189 | {'effective_date': str_to_seconds("2014-07-16"), |
||
190 | 'ratio': 0.5, |
||
191 | 'sid': cls.C} |
||
192 | ], |
||
193 | columns=['effective_date', 'ratio', 'sid']) |
||
194 | |||
195 | dividends = DataFrame([ |
||
196 | {'ex_date': |
||
197 | Timestamp("2014-03-18", tz='UTC').to_datetime64(), |
||
198 | 'record_date': |
||
199 | Timestamp("2014-03-19", tz='UTC').to_datetime64(), |
||
200 | 'declared_date': |
||
201 | Timestamp("2014-03-18", tz='UTC').to_datetime64(), |
||
202 | 'pay_date': |
||
203 | Timestamp("2014-03-20", tz='UTC').to_datetime64(), |
||
204 | 'amount': 2.0, |
||
205 | 'sid': cls.DIVIDEND_SID}, |
||
206 | {'ex_date': |
||
207 | Timestamp("2014-03-20", tz='UTC').to_datetime64(), |
||
208 | 'record_date': |
||
209 | Timestamp("2014-03-21", tz='UTC').to_datetime64(), |
||
210 | 'declared_date': |
||
211 | Timestamp("2014-03-18", tz='UTC').to_datetime64(), |
||
212 | 'pay_date': |
||
213 | Timestamp("2014-03-23", tz='UTC').to_datetime64(), |
||
214 | 'amount': 4.0, |
||
215 | 'sid': cls.DIVIDEND_SID}], |
||
216 | columns=['ex_date', |
||
217 | 'record_date', |
||
218 | 'declared_date', |
||
219 | 'pay_date', |
||
220 | 'amount', |
||
221 | 'sid']) |
||
222 | |||
223 | cls.create_fake_adjustments(cls.tempdir, |
||
224 | "adjustments.sqlite", |
||
225 | splits=splits, |
||
226 | mergers=mergers, |
||
227 | dividends=dividends) |
||
228 | |||
229 | cls.data_portal = cls.get_portal( |
||
230 | daily_equities_filename="test_daily_data.bcolz", |
||
231 | adjustments_filename="adjustments.sqlite" |
||
232 | ) |
||
233 | except: |
||
234 | cls.tempdir.cleanup() |
||
235 | raise |
||
236 | |||
237 | @classmethod |
||
238 | def tearDownClass(cls): |
||
239 | cls.tempdir.cleanup() |
||
240 | |||
241 | @classmethod |
||
242 | def create_fake_futures_minute_data(cls, tempdir, asset, start_dt, end_dt, |
||
243 | gap_start_dt=None, gap_end_dt=None): |
||
244 | num_minutes = int((end_dt - start_dt).total_seconds() / 60) |
||
245 | |||
246 | # need to prepend one 0 per minute between normalize_date(start_dt) |
||
247 | # and start_dt |
||
248 | zeroes_buffer = \ |
||
249 | [0] * int((start_dt - |
||
250 | normalize_date(start_dt)).total_seconds() / 60) |
||
251 | |||
252 | future_df = pd.DataFrame({ |
||
253 | "open": np.array(zeroes_buffer + |
||
254 | list(range(0, num_minutes))) * 1000, |
||
255 | "high": np.array(zeroes_buffer + |
||
256 | list(range(10000, 10000 + num_minutes))) * 1000, |
||
257 | "low": np.array(zeroes_buffer + |
||
258 | list(range(20000, 20000 + num_minutes))) * 1000, |
||
259 | "close": np.array(zeroes_buffer + |
||
260 | list(range(30000, 30000 + num_minutes))) * 1000, |
||
261 | "volume": np.array(zeroes_buffer + |
||
262 | list(range(40000, 40000 + num_minutes))) |
||
263 | }) |
||
264 | |||
265 | if gap_start_dt and gap_end_dt: |
||
266 | minutes = pd.date_range(normalize_date(start_dt), end_dt, freq='T') |
||
267 | gap_start_ix = minutes.get_loc(gap_start_dt) |
||
268 | gap_end_ix = minutes.get_loc(gap_end_dt) |
||
269 | future_df.iloc[gap_start_ix:gap_end_ix, :] = 0 |
||
270 | |||
271 | path = join(tempdir, "{0}.bcolz".format(asset.sid)) |
||
272 | ctable = bcolz.ctable.fromdataframe(future_df, rootdir=path) |
||
273 | |||
274 | ctable.attrs["start_dt"] = start_dt.value / 1e9 |
||
275 | ctable.attrs["last_dt"] = end_dt.value / 1e9 |
||
276 | |||
277 | @classmethod |
||
278 | def create_fake_minute_data(cls, tempdir): |
||
279 | resources = { |
||
280 | cls.AAPL: join(TEST_MINUTE_RESOURCE_PATH, 'AAPL_minute.csv.gz'), |
||
281 | cls.MSFT: join(TEST_MINUTE_RESOURCE_PATH, 'MSFT_minute.csv.gz'), |
||
282 | cls.DELL: join(TEST_MINUTE_RESOURCE_PATH, 'DELL_minute.csv.gz'), |
||
283 | cls.TSLA: join(TEST_MINUTE_RESOURCE_PATH, "TSLA_minute.csv.gz"), |
||
284 | cls.BRKA: join(TEST_MINUTE_RESOURCE_PATH, "BRKA_minute.csv.gz"), |
||
285 | cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"), |
||
286 | cls.GS: |
||
287 | join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"), # unused |
||
288 | cls.C: join(TEST_MINUTE_RESOURCE_PATH, "C_minute.csv.gz"), |
||
289 | cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH, |
||
290 | "DIVIDEND_minute.csv.gz"), |
||
291 | cls.FOO: join(TEST_MINUTE_RESOURCE_PATH, |
||
292 | "FOO_minute.csv.gz"), |
||
293 | } |
||
294 | |||
295 | equities_tempdir = os.path.join(tempdir.path, 'equity', 'minutes') |
||
296 | os.makedirs(equities_tempdir) |
||
297 | |||
298 | MinuteBarWriterFromCSVs(resources, |
||
299 | pd.Timestamp('2002-01-02', tz='UTC')).write( |
||
300 | equities_tempdir, cls.assets) |
||
301 | |||
302 | @classmethod |
||
303 | def create_fake_daily_data(cls, tempdir): |
||
304 | resources = { |
||
305 | cls.AAPL: join(TEST_DAILY_RESOURCE_PATH, 'AAPL.csv'), |
||
306 | cls.MSFT: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'), |
||
307 | cls.DELL: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'), # unused |
||
308 | cls.TSLA: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'), # unused |
||
309 | cls.BRKA: join(TEST_DAILY_RESOURCE_PATH, 'BRK-A.csv'), |
||
310 | cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, 'IBM_daily.csv.gz'), |
||
311 | cls.GS: join(TEST_MINUTE_RESOURCE_PATH, 'GS_daily.csv.gz'), |
||
312 | cls.C: join(TEST_MINUTE_RESOURCE_PATH, 'C_daily.csv.gz'), |
||
313 | cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH, |
||
314 | 'DIVIDEND_daily.csv.gz'), |
||
315 | cls.FOO: join(TEST_MINUTE_RESOURCE_PATH, 'FOO_daily.csv.gz'), |
||
316 | } |
||
317 | raw_data = { |
||
318 | asset: read_csv(path, parse_dates=['day']).set_index('day') |
||
319 | for asset, path in iteritems(resources) |
||
320 | } |
||
321 | for frame in raw_data.values(): |
||
322 | frame['price'] = frame['close'] |
||
323 | |||
324 | writer = DailyBarWriterFromCSVs(resources) |
||
325 | data_path = tempdir.getpath('test_daily_data.bcolz') |
||
326 | writer.write(data_path, trading_days, cls.assets) |
||
327 | |||
328 | @classmethod |
||
329 | def create_fake_adjustments(cls, tempdir, filename, |
||
330 | splits=None, mergers=None, dividends=None): |
||
331 | writer = SQLiteAdjustmentWriter(tempdir.getpath(filename), |
||
332 | cls.env.trading_days, |
||
333 | MockDailyBarReader()) |
||
334 | |||
335 | if dividends is None: |
||
336 | dividends = DataFrame( |
||
337 | { |
||
338 | # Hackery to make the dtypes correct on an empty frame. |
||
339 | 'ex_date': array([], dtype='datetime64[ns]'), |
||
340 | 'pay_date': array([], dtype='datetime64[ns]'), |
||
341 | 'record_date': array([], dtype='datetime64[ns]'), |
||
342 | 'declared_date': array([], dtype='datetime64[ns]'), |
||
343 | 'amount': array([], dtype=float), |
||
344 | 'sid': array([], dtype=int), |
||
345 | }, |
||
346 | index=DatetimeIndex([], tz='UTC'), |
||
347 | columns=['ex_date', |
||
348 | 'pay_date', |
||
349 | 'record_date', |
||
350 | 'declared_date', |
||
351 | 'amount', |
||
352 | 'sid'] |
||
353 | ) |
||
354 | |||
355 | if splits is None: |
||
356 | splits = DataFrame( |
||
357 | { |
||
358 | # Hackery to make the dtypes correct on an empty frame. |
||
359 | 'effective_date': array([], dtype=int), |
||
360 | 'ratio': array([], dtype=float), |
||
361 | 'sid': array([], dtype=int), |
||
362 | }, |
||
363 | index=DatetimeIndex([], tz='UTC')) |
||
364 | |||
365 | if mergers is None: |
||
366 | mergers = DataFrame( |
||
367 | { |
||
368 | # Hackery to make the dtypes correct on an empty frame. |
||
369 | 'effective_date': array([], dtype=int), |
||
370 | 'ratio': array([], dtype=float), |
||
371 | 'sid': array([], dtype=int), |
||
372 | }, |
||
373 | index=DatetimeIndex([], tz='UTC')) |
||
374 | |||
375 | writer.write(splits, mergers, dividends) |
||
376 | |||
377 | @classmethod |
||
378 | def get_portal(cls, |
||
379 | daily_equities_filename="test_daily_data.bcolz", |
||
380 | adjustments_filename="adjustments.sqlite", |
||
381 | env=None): |
||
382 | |||
383 | if env is None: |
||
384 | env = cls.env |
||
385 | |||
386 | temp_path = cls.tempdir.path |
||
387 | |||
388 | minutes_path = os.path.join(temp_path, 'equity', 'minutes') |
||
389 | futures_path = os.path.join(temp_path, 'futures', 'minutes') |
||
390 | |||
391 | adjustment_reader = SQLiteAdjustmentReader( |
||
392 | join(temp_path, adjustments_filename)) |
||
393 | |||
394 | equity_minute_reader = BcolzMinuteBarReader(minutes_path) |
||
395 | |||
396 | equity_daily_reader = BcolzDailyBarReader( |
||
397 | join(temp_path, daily_equities_filename)) |
||
398 | |||
399 | future_minute_reader = FutureMinuteReader(futures_path) |
||
400 | |||
401 | return DataPortal( |
||
402 | env, |
||
403 | equity_minute_reader=equity_minute_reader, |
||
404 | future_minute_reader=future_minute_reader, |
||
405 | equity_daily_reader=equity_daily_reader, |
||
406 | adjustment_reader=adjustment_reader |
||
407 | ) |
||
408 | |||
409 | def test_history_in_initialize(self): |
||
410 | algo_text = dedent( |
||
411 | """\ |
||
412 | from zipline.api import history |
||
413 | |||
414 | def initialize(context): |
||
415 | history([24], 10, '1d', 'price') |
||
416 | |||
417 | def handle_data(context, data): |
||
418 | pass |
||
419 | """ |
||
420 | ) |
||
421 | |||
422 | start = pd.Timestamp('2007-04-05', tz='UTC') |
||
423 | end = pd.Timestamp('2007-04-10', tz='UTC') |
||
424 | |||
425 | sim_params = SimulationParameters( |
||
426 | period_start=start, |
||
427 | period_end=end, |
||
428 | capital_base=float("1.0e5"), |
||
429 | data_frequency='minute', |
||
430 | emission_rate='daily', |
||
431 | env=self.env, |
||
432 | ) |
||
433 | |||
434 | test_algo = TradingAlgorithm( |
||
435 | script=algo_text, |
||
436 | data_frequency='minute', |
||
437 | sim_params=sim_params, |
||
438 | env=self.env, |
||
439 | ) |
||
440 | |||
441 | with self.assertRaises(HistoryInInitialize): |
||
442 | test_algo.initialize() |
||
443 | |||
444 | def test_minute_basic_functionality(self): |
||
445 | # get a 5-bar minute history from the very end of the available data |
||
446 | window = self.data_portal.get_history_window( |
||
447 | [1], |
||
448 | pd.Timestamp("2014-03-21 18:23:00+00:00", tz='UTC'), |
||
449 | 5, |
||
450 | "1m", |
||
451 | "open_price" |
||
452 | ) |
||
453 | |||
454 | self.assertEqual(len(window), 5) |
||
455 | reference = [534.469, 534.471, 534.475, 534.477, 534.477] |
||
456 | for i in range(0, 4): |
||
457 | self.assertEqual(window.iloc[-5 + i].loc[1], reference[i]) |
||
458 | |||
459 | def test_minute_splits(self): |
||
460 | portal = self.data_portal |
||
461 | |||
462 | window = portal.get_history_window( |
||
463 | [1], |
||
464 | pd.Timestamp("2014-03-21 18:30:00+00:00", tz='UTC'), |
||
465 | 1000, |
||
466 | "1m", |
||
467 | "open_price" |
||
468 | ) |
||
469 | |||
470 | self.assertEqual(len(window), 1000) |
||
471 | |||
472 | # there are two splits for AAPL (on 2014-03-20 and 2014-03-21), |
||
473 | # each with ratio 0.5). |
||
474 | |||
475 | day1_end = pd.Timestamp("2014-03-19 20:00", tz='UTC') |
||
476 | day2_start = pd.Timestamp("2014-03-20 13:31", tz='UTC') |
||
477 | day2_end = pd.Timestamp("2014-03-20 20:00", tz='UTC') |
||
478 | day3_start = pd.Timestamp("2014-03-21 13:31", tz='UTC') |
||
479 | |||
480 | self.assertEquals(window.loc[day1_end, 1], 533.086) |
||
481 | self.assertEquals(window.loc[day2_start, 1], 533.087) |
||
482 | self.assertEquals(window.loc[day2_end, 1], 533.853) |
||
483 | self.assertEquals(window.loc[day3_start, 1], 533.854) |
||
484 | |||
485 | def test_ffill_minute_equity_window_starts_with_nan(self): |
||
486 | """ |
||
487 | Test that forward filling does not leave leading nan if there is data |
||
488 | available before the start of the window. |
||
489 | """ |
||
490 | |||
491 | window = self.data_portal.get_history_window( |
||
492 | [self.FOO], |
||
493 | pd.Timestamp("2014-03-21 13:41:00+00:00", tz='UTC'), |
||
494 | 20, |
||
495 | "1m", |
||
496 | "price" |
||
497 | ) |
||
498 | |||
499 | # The previous value is on 2014-03-20, and there is a split between |
||
500 | # the two dates, the spot price of the latest value is 1066.92, with |
||
501 | # the expected result being 533.46 after the 2:1 split is applied. |
||
502 | expected = np.append(np.full(19, 533.460), |
||
503 | np.array(529.601)) |
||
504 | |||
505 | np.testing.assert_allclose(window.loc[:, self.FOO], expected) |
||
506 | |||
507 | def test_ffill_minute_equity_window_no_previous(self): |
||
508 | """ |
||
509 | Test that forward filling handles the case where the window starts |
||
510 | with a nan, and there are no previous values. |
||
511 | """ |
||
512 | |||
513 | window = self.data_portal.get_history_window( |
||
514 | [self.FOO], |
||
515 | pd.Timestamp("2014-03-19 13:41:00+00:00", tz='UTC'), |
||
516 | 20, |
||
517 | "1m", |
||
518 | "price" |
||
519 | ) |
||
520 | |||
521 | # There should be no values, since there is no data before 2014-03-20 |
||
522 | expected = np.full(20, np.nan) |
||
523 | |||
524 | np.testing.assert_allclose(window.loc[:, self.FOO], expected) |
||
525 | |||
526 | def test_ffill_minute_future_window_starts_with_nan(self): |
||
527 | """ |
||
528 | Test that forward filling does not leave leading nan if there is data |
||
529 | available before the start of the window. |
||
530 | """ |
||
531 | |||
532 | window = self.data_portal.get_history_window( |
||
533 | [self.FUTURE_ASSET3], |
||
534 | pd.Timestamp("2014-03-20 15:00:00+00:00", tz='UTC'), |
||
535 | 20, |
||
536 | "1m", |
||
537 | "price" |
||
538 | ) |
||
539 | |||
540 | # 31468 is the value at 2014-03-20 13:59, and should be the forward |
||
541 | # filled value until 2015-03-20 15:00 |
||
542 | expected = np.append(np.full(19, 31468), |
||
543 | np.array(31529)) |
||
544 | |||
545 | np.testing.assert_allclose(window.loc[:, self.FUTURE_ASSET3], |
||
546 | expected) |
||
547 | |||
548 | def test_ffill_daily_equity_window_starts_with_nan(self): |
||
549 | """ |
||
550 | Test that forward filling does not leave leading nan if there is data |
||
551 | available before the start of the window. |
||
552 | """ |
||
553 | window = self.data_portal.get_history_window( |
||
554 | [self.FOO], |
||
555 | pd.Timestamp("2014-03-21 00:00:00+00:00", tz='UTC'), |
||
556 | 2, |
||
557 | "1d", |
||
558 | "price" |
||
559 | ) |
||
560 | |||
561 | # The previous value is on 2014-03-20, and there is a split between |
||
562 | # the two dates, the spot price of the latest value is 106.692, with |
||
563 | # the expected result being 533.46 after the 2:1 split is applied. |
||
564 | expected = np.array([ |
||
565 | 53.346, |
||
566 | 52.95, |
||
567 | ]) |
||
568 | |||
569 | np.testing.assert_allclose(window.loc[:, self.FOO], expected) |
||
570 | |||
571 | def test_minute_window_starts_before_trading_start(self): |
||
572 | portal = self.data_portal |
||
573 | |||
574 | # get a 50-bar minute history for MSFT starting 5 minutes into 3/20, |
||
575 | # its first trading day |
||
576 | window = portal.get_history_window( |
||
577 | [2], |
||
578 | pd.Timestamp("2014-03-20 13:35:00", tz='UTC'), |
||
579 | 50, |
||
580 | "1m", |
||
581 | "high", |
||
582 | ) |
||
583 | |||
584 | self.assertEqual(len(window), 50) |
||
585 | reference = [107.081, 109.476, 102.316, 107.861, 106.040] |
||
586 | for i in range(0, 4): |
||
587 | self.assertEqual(window.iloc[-5 + i].loc[2], reference[i]) |
||
588 | |||
589 | # get history for two securities at the same time, where one starts |
||
590 | # trading a day later than the other |
||
591 | window2 = portal.get_history_window( |
||
592 | [1, 2], |
||
593 | pd.Timestamp("2014-03-20 13:35:00", tz='UTC'), |
||
594 | 50, |
||
595 | "1m", |
||
596 | "low", |
||
597 | ) |
||
598 | |||
599 | self.assertEqual(len(window2), 50) |
||
600 | reference2 = { |
||
601 | 1: [1059.318, 1055.914, 1061.136, 1063.698, 1055.964], |
||
602 | 2: [98.902, 99.841, 90.984, 99.891, 98.027] |
||
603 | } |
||
604 | |||
605 | for i in range(0, 45): |
||
606 | self.assertFalse(np.isnan(window2.iloc[i].loc[1])) |
||
607 | |||
608 | # there should be 45 NaNs for MSFT until it starts trading |
||
609 | self.assertTrue(np.isnan(window2.iloc[i].loc[2])) |
||
610 | |||
611 | for i in range(0, 4): |
||
612 | self.assertEquals(window2.iloc[-5 + i].loc[1], |
||
613 | reference2[1][i]) |
||
614 | self.assertEquals(window2.iloc[-5 + i].loc[2], |
||
615 | reference2[2][i]) |
||
616 | |||
617 | def test_minute_window_ends_before_trading_start(self): |
||
|
|||
618 | # entire window is before the trading start |
||
619 | window = self.data_portal.get_history_window( |
||
620 | [2], |
||
621 | pd.Timestamp("2014-02-05 14:35:00", tz='UTC'), |
||
622 | 100, |
||
623 | "1m", |
||
624 | "high" |
||
625 | ) |
||
626 | |||
627 | self.assertEqual(len(window), 100) |
||
628 | for i in range(0, 100): |
||
629 | self.assertTrue(np.isnan(window.iloc[i].loc[2])) |
||
630 | |||
631 | def test_minute_window_ends_after_trading_end(self): |
||
632 | portal = self.data_portal |
||
633 | |||
634 | window = portal.get_history_window( |
||
635 | [2], |
||
636 | pd.Timestamp("2014-03-24 13:35:00", tz='UTC'), |
||
637 | 50, |
||
638 | "1m", |
||
639 | "high", |
||
640 | ) |
||
641 | |||
642 | # should be 45 non-NaNs then 5 NaNs as MSFT has stopped trading at |
||
643 | # the end of the day 2014-03-21 (and the 22nd and 23rd is weekend) |
||
644 | self.assertEqual(len(window), 50) |
||
645 | |||
646 | for i in range(0, 45): |
||
647 | self.assertFalse(np.isnan(window.iloc[i].loc[2])) |
||
648 | |||
649 | for i in range(46, 50): |
||
650 | self.assertTrue(np.isnan(window.iloc[i].loc[2])) |
||
651 | |||
652 | def test_minute_window_starts_after_trading_end(self): |
||
653 | # entire window is after the trading end |
||
654 | window = self.data_portal.get_history_window( |
||
655 | [2], |
||
656 | pd.Timestamp("2014-04-02 14:35:00", tz='UTC'), |
||
657 | 100, |
||
658 | "1m", |
||
659 | "high" |
||
660 | ) |
||
661 | |||
662 | self.assertEqual(len(window), 100) |
||
663 | for i in range(0, 100): |
||
664 | self.assertTrue(np.isnan(window.iloc[i].loc[2])) |
||
665 | |||
666 | def test_minute_window_starts_before_1_2_2002(self): |
||
667 | window = self.data_portal.get_history_window( |
||
668 | [3], |
||
669 | pd.Timestamp("2002-01-02 14:35:00", tz='UTC'), |
||
670 | 50, |
||
671 | "1m", |
||
672 | "close_price" |
||
673 | ) |
||
674 | |||
675 | self.assertEqual(len(window), 50) |
||
676 | for i in range(0, 45): |
||
677 | self.assertTrue(np.isnan(window.iloc[i].loc[3])) |
||
678 | |||
679 | for i in range(46, 50): |
||
680 | self.assertFalse(np.isnan(window.iloc[i].loc[3])) |
||
681 | |||
682 | def test_minute_early_close(self): |
||
683 | # market was closed early on 7/3, and that's reflected in our |
||
684 | # fake IBM minute data. also, IBM had a split that takes effect |
||
685 | # right after the early close. |
||
686 | |||
687 | # five minutes into the day after an early close, get 20 1m bars |
||
688 | window = self.data_portal.get_history_window( |
||
689 | [self.IBM], |
||
690 | pd.Timestamp("2014-07-07 13:35:00", tz='UTC'), |
||
691 | 20, |
||
692 | "1m", |
||
693 | "high" |
||
694 | ) |
||
695 | |||
696 | self.assertEqual(len(window), 20) |
||
697 | |||
698 | reference = [27134.486, 27134.802, 27134.660, 27132.813, 27130.964, |
||
699 | 27133.767, 27133.268, 27131.510, 27134.946, 27132.400, |
||
700 | 27134.350, 27130.588, 27132.528, 27130.418, 27131.040, |
||
701 | 27132.664, 27131.307, 27133.978, 27132.779, 27134.476] |
||
702 | |||
703 | for i in range(0, 20): |
||
704 | self.assertAlmostEquals(window.iloc[i].loc[self.IBM], reference[i]) |
||
705 | |||
706 | def test_minute_merger(self): |
||
707 | def check(field, ref): |
||
708 | window = self.data_portal.get_history_window( |
||
709 | [self.C], |
||
710 | pd.Timestamp("2014-07-16 13:35", tz='UTC'), |
||
711 | 10, |
||
712 | "1m", |
||
713 | field |
||
714 | ) |
||
715 | |||
716 | self.assertEqual(len(window), len(ref)) |
||
717 | |||
718 | for i in range(0, len(ref) - 1): |
||
719 | self.assertEquals(window.iloc[i].loc[self.C], ref[i]) |
||
720 | |||
721 | open_ref = [71.99, 71.991, 71.992, 71.996, 71.996, |
||
722 | 72.000, 72.001, 72.002, 72.004, 72.005] |
||
723 | high_ref = [77.334, 80.196, 80.387, 72.331, 79.184, |
||
724 | 75.439, 81.176, 78.564, 80.498, 82.000] |
||
725 | low_ref = [62.621, 70.427, 65.572, 68.357, 63.623, |
||
726 | 69.805, 67.245, 64.238, 64.487, 71.864] |
||
727 | close_ref = [69.977, 75.311, 72.979, 70.344, 71.403, |
||
728 | 72.622, 74.210, 71.401, 72.492, 73.669] |
||
729 | vol_ref = [12663, 12662, 12661, 12661, 12660, 12661, |
||
730 | 12663, 12662, 12663, 12662] |
||
731 | |||
732 | check("open_price", open_ref) |
||
733 | check("high", high_ref) |
||
734 | check("low", low_ref) |
||
735 | check("close_price", close_ref) |
||
736 | check("price", close_ref) |
||
737 | check("volume", vol_ref) |
||
738 | |||
739 | def test_minute_forward_fill(self): |
||
740 | # only forward fill if ffill=True AND we are asking for "price" |
||
741 | |||
742 | # our fake TSLA data (sid 4) is missing a bunch of minute bars |
||
743 | # right after the open on 2002-01-02 |
||
744 | |||
745 | for field in ["open_price", "high", "low", "volume", "close_price"]: |
||
746 | no_ffill = self.data_portal.get_history_window( |
||
747 | [4], |
||
748 | pd.Timestamp("2002-01-02 21:00:00", tz='UTC'), |
||
749 | 390, |
||
750 | "1m", |
||
751 | field |
||
752 | ) |
||
753 | |||
754 | missing_bar_indices = [1, 3, 5, 7, 9, 11, 13] |
||
755 | if field == 'volume': |
||
756 | for bar_idx in missing_bar_indices: |
||
757 | self.assertEqual(no_ffill.iloc[bar_idx].loc[4], 0) |
||
758 | else: |
||
759 | for bar_idx in missing_bar_indices: |
||
760 | self.assertTrue(np.isnan(no_ffill.iloc[bar_idx].loc[4])) |
||
761 | |||
762 | ffill_window = self.data_portal.get_history_window( |
||
763 | [4], |
||
764 | pd.Timestamp("2002-01-02 21:00:00", tz='UTC'), |
||
765 | 390, |
||
766 | "1m", |
||
767 | "price" |
||
768 | ) |
||
769 | |||
770 | for i in range(0, 390): |
||
771 | self.assertFalse(np.isnan(ffill_window.iloc[i].loc[4])) |
||
772 | |||
773 | # 2002-01-02 14:31:00+00:00 126.183 |
||
774 | # 2002-01-02 14:32:00+00:00 126.183 |
||
775 | # 2002-01-02 14:33:00+00:00 125.648 |
||
776 | # 2002-01-02 14:34:00+00:00 125.648 |
||
777 | # 2002-01-02 14:35:00+00:00 126.016 |
||
778 | # 2002-01-02 14:36:00+00:00 126.016 |
||
779 | # 2002-01-02 14:37:00+00:00 127.918 |
||
780 | # 2002-01-02 14:38:00+00:00 127.918 |
||
781 | # 2002-01-02 14:39:00+00:00 126.423 |
||
782 | # 2002-01-02 14:40:00+00:00 126.423 |
||
783 | # 2002-01-02 14:41:00+00:00 129.825 |
||
784 | # 2002-01-02 14:42:00+00:00 129.825 |
||
785 | # 2002-01-02 14:43:00+00:00 125.392 |
||
786 | # 2002-01-02 14:44:00+00:00 125.392 |
||
787 | |||
788 | vals = [126.183, 125.648, 126.016, 127.918, 126.423, 129.825, 125.392] |
||
789 | for idx, val in enumerate(vals): |
||
790 | self.assertEqual(ffill_window.iloc[2 * idx].loc[4], val) |
||
791 | self.assertEqual(ffill_window.iloc[(2 * idx) + 1].loc[4], val) |
||
792 | |||
793 | # make sure that if we pass ffill=False with field="price", we do |
||
794 | # not ffill |
||
795 | really_no_ffill_window = self.data_portal.get_history_window( |
||
796 | [4], |
||
797 | pd.Timestamp("2002-01-02 21:00:00", tz='UTC'), |
||
798 | 390, |
||
799 | "1m", |
||
800 | "price", |
||
801 | ffill=False |
||
802 | ) |
||
803 | |||
804 | for idx, val in enumerate(vals): |
||
805 | idx1 = 2 * idx |
||
806 | idx2 = idx1 + 1 |
||
807 | self.assertEqual(really_no_ffill_window.iloc[idx1].loc[4], val) |
||
808 | self.assertTrue(np.isnan(really_no_ffill_window.iloc[idx2].loc[4])) |
||
809 | |||
810 | def test_daily_functionality(self): |
||
811 | # 9 daily bars |
||
812 | # 2014-03-10,183999.0,186400.0,183601.0,186400.0,400 |
||
813 | # 2014-03-11,186925.0,187490.0,185910.0,187101.0,600 |
||
814 | # 2014-03-12,186498.0,187832.0,186005.0,187750.0,300 |
||
815 | # 2014-03-13,188150.0,188852.0,185254.0,185750.0,700 |
||
816 | # 2014-03-14,185825.0,186507.0,183418.0,183860.0,600 |
||
817 | # 2014-03-17,184350.0,185790.0,184350.0,185050.0,400 |
||
818 | # 2014-03-18,185400.0,185400.0,183860.0,184860.0,200 |
||
819 | # 2014-03-19,184860.0,185489.0,182764.0,183860.0,200 |
||
820 | # 2014-03-20,183999.0,186742.0,183630.0,186540.0,300 |
||
821 | |||
822 | # 5 one-minute bars that will be aggregated |
||
823 | # 2014-03-21 13:31:00+00:00,185422401,185426332,185413974,185420153,304 |
||
824 | # 2014-03-21 13:32:00+00:00,185422402,185424165,185417717,185420941,300 |
||
825 | # 2014-03-21 13:33:00+00:00,185422403,185430663,185419420,185425041,303 |
||
826 | # 2014-03-21 13:34:00+00:00,185422403,185431290,185417079,185424184,302 |
||
827 | # 2014-03-21 13:35:00+00:00,185422405,185430210,185416293,185423251,302 |
||
828 | |||
829 | def run_query(field, values): |
||
830 | window = self.data_portal.get_history_window( |
||
831 | [self.BRKA], |
||
832 | pd.Timestamp("2014-03-21 13:35", tz='UTC'), |
||
833 | 10, |
||
834 | "1d", |
||
835 | field |
||
836 | ) |
||
837 | |||
838 | self.assertEqual(len(window), 10) |
||
839 | |||
840 | for i in range(0, 10): |
||
841 | self.assertEquals(window.iloc[i].loc[self.BRKA], |
||
842 | values[i]) |
||
843 | |||
844 | # last value is the first minute's open |
||
845 | opens = [183999, 186925, 186498, 188150, 185825, 184350, |
||
846 | 185400, 184860, 183999, 185422.401] |
||
847 | |||
848 | # last value is the last minute's close |
||
849 | closes = [186400, 187101, 187750, 185750, 183860, 185050, |
||
850 | 184860, 183860, 186540, 185423.251] |
||
851 | |||
852 | # last value is the highest high value |
||
853 | highs = [186400, 187490, 187832, 188852, 186507, 185790, |
||
854 | 185400, 185489, 186742, 185431.290] |
||
855 | |||
856 | # last value is the lowest low value |
||
857 | lows = [183601, 185910, 186005, 185254, 183418, 184350, 183860, |
||
858 | 182764, 183630, 185413.974] |
||
859 | |||
860 | # last value is the sum of all the minute volumes |
||
861 | volumes = [400, 600, 300, 700, 600, 400, 200, 200, 300, 1511] |
||
862 | |||
863 | run_query("open_price", opens) |
||
864 | run_query("close_price", closes) |
||
865 | run_query("price", closes) |
||
866 | run_query("high", highs) |
||
867 | run_query("low", lows) |
||
868 | run_query("volume", volumes) |
||
869 | |||
870 | def test_daily_splits_with_no_minute_data(self): |
||
871 | # scenario is that we have daily data for AAPL through 6/11, |
||
872 | # but we have no minute data for AAPL on 6/11. there's also a split |
||
873 | # for AAPL on 6/9. |
||
874 | splits = DataFrame( |
||
875 | [ |
||
876 | { |
||
877 | 'effective_date': str_to_seconds('2014-06-09'), |
||
878 | 'ratio': (1 / 7.0), |
||
879 | 'sid': self.AAPL, |
||
880 | } |
||
881 | ], |
||
882 | columns=['effective_date', 'ratio', 'sid']) |
||
883 | |||
884 | self.create_fake_adjustments(self.tempdir, |
||
885 | "adjustments2.sqlite", |
||
886 | splits=splits) |
||
887 | |||
888 | portal = self.get_portal(adjustments_filename="adjustments2.sqlite") |
||
889 | |||
890 | def test_window(field, reference, ffill=True): |
||
891 | window = portal.get_history_window( |
||
892 | [self.AAPL], |
||
893 | pd.Timestamp("2014-06-11 15:30", tz='UTC'), |
||
894 | 6, |
||
895 | "1d", |
||
896 | field, |
||
897 | ffill |
||
898 | ) |
||
899 | |||
900 | self.assertEqual(len(window), 6) |
||
901 | |||
902 | for i in range(0, 5): |
||
903 | self.assertEquals(window.iloc[i].loc[self.AAPL], |
||
904 | reference[i]) |
||
905 | |||
906 | if ffill and field == "price": |
||
907 | last_val = window.iloc[5].loc[self.AAPL] |
||
908 | second_to_last_val = window.iloc[4].loc[self.AAPL] |
||
909 | |||
910 | self.assertEqual(last_val, second_to_last_val) |
||
911 | else: |
||
912 | if field == "volume": |
||
913 | self.assertEqual(window.iloc[5].loc[self.AAPL], 0) |
||
914 | else: |
||
915 | self.assertTrue(np.isnan(window.iloc[5].loc[self.AAPL])) |
||
916 | |||
917 | # 2014-06-04,637.4400099999999,647.8899690000001,636.110046,644.819992,p |
||
918 | # 2014-06-05,646.20005,649.370003,642.610008,647.349983,75951400 |
||
919 | # 2014-06-06,649.900002,651.259979,644.469971,645.570023,87484600 |
||
920 | # 2014-06-09,92.699997,93.879997,91.75,93.699997,75415000 |
||
921 | # 2014-06-10,94.730003,95.050003,93.57,94.25,62777000 |
||
922 | open_data = [91.063, 92.314, 92.843, 92.699, 94.730] |
||
923 | test_window("open_price", open_data, ffill=False) |
||
924 | test_window("open_price", open_data) |
||
925 | |||
926 | high_data = [92.556, 92.767, 93.037, 93.879, 95.050] |
||
927 | test_window("high", high_data, ffill=False) |
||
928 | test_window("high", high_data) |
||
929 | |||
930 | low_data = [90.873, 91.801, 92.067, 91.750, 93.570] |
||
931 | test_window("low", low_data, ffill=False) |
||
932 | test_window("low", low_data) |
||
933 | |||
934 | close_data = [92.117, 92.478, 92.224, 93.699, 94.250] |
||
935 | test_window("close_price", close_data, ffill=False) |
||
936 | test_window("close_price", close_data) |
||
937 | test_window("price", close_data, ffill=False) |
||
938 | test_window("price", close_data) |
||
939 | |||
940 | vol_data = [587093500, 531659800, 612392200, 75415000, 62777000] |
||
941 | test_window("volume", vol_data) |
||
942 | test_window("volume", vol_data, ffill=False) |
||
943 | |||
944 | def test_daily_window_starts_before_trading_start(self): |
||
945 | portal = self.data_portal |
||
946 | |||
947 | # MSFT started on 3/3/2014, so try to go before that |
||
948 | window = portal.get_history_window( |
||
949 | [self.MSFT], |
||
950 | pd.Timestamp("2014-03-05 13:35:00", tz='UTC'), |
||
951 | 5, |
||
952 | "1d", |
||
953 | "high" |
||
954 | ) |
||
955 | |||
956 | self.assertEqual(len(window), 5) |
||
957 | |||
958 | # should be two empty days, then 3/3 and 3/4, then |
||
959 | # an empty day because we don't have minute data for 3/5 |
||
960 | self.assertTrue(np.isnan(window.iloc[0].loc[self.MSFT])) |
||
961 | self.assertTrue(np.isnan(window.iloc[1].loc[self.MSFT])) |
||
962 | self.assertEquals(window.iloc[2].loc[self.MSFT], 38.130) |
||
963 | self.assertEquals(window.iloc[3].loc[self.MSFT], 38.48) |
||
964 | self.assertTrue(np.isnan(window.iloc[4].loc[self.MSFT])) |
||
965 | |||
966 | def test_daily_window_ends_before_trading_start(self): |
||
967 | portal = self.data_portal |
||
968 | |||
969 | # MSFT started on 3/3/2014, so try to go before that |
||
970 | window = portal.get_history_window( |
||
971 | [self.MSFT], |
||
972 | pd.Timestamp("2014-02-28 13:35:00", tz='UTC'), |
||
973 | 5, |
||
974 | "1d", |
||
975 | "high" |
||
976 | ) |
||
977 | |||
978 | self.assertEqual(len(window), 5) |
||
979 | for i in range(0, 5): |
||
980 | self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT])) |
||
981 | |||
982 | def test_daily_window_starts_after_trading_end(self): |
||
983 | # MSFT stopped trading EOD Friday 8/29/2014 |
||
984 | window = self.data_portal.get_history_window( |
||
985 | [self.MSFT], |
||
986 | pd.Timestamp("2014-09-12 13:35:00", tz='UTC'), |
||
987 | 8, |
||
988 | "1d", |
||
989 | "high", |
||
990 | ) |
||
991 | |||
992 | self.assertEqual(len(window), 8) |
||
993 | for i in range(0, 8): |
||
994 | self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT])) |
||
995 | |||
996 | def test_daily_window_ends_after_trading_end(self): |
||
997 | # MSFT stopped trading EOD Friday 8/29/2014 |
||
998 | window = self.data_portal.get_history_window( |
||
999 | [self.MSFT], |
||
1000 | pd.Timestamp("2014-09-04 13:35:00", tz='UTC'), |
||
1001 | 10, |
||
1002 | "1d", |
||
1003 | "high", |
||
1004 | ) |
||
1005 | |||
1006 | # should be 7 non-NaNs (8/21-8/22, 8/25-8/29) and 3 NaNs (9/2 - 9/4) |
||
1007 | # (9/1/2014 is labor day) |
||
1008 | self.assertEqual(len(window), 10) |
||
1009 | |||
1010 | for i in range(0, 7): |
||
1011 | self.assertFalse(np.isnan(window.iloc[i].loc[self.MSFT])) |
||
1012 | |||
1013 | for i in range(7, 10): |
||
1014 | self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT])) |
||
1015 | |||
1016 | def test_empty_sid_list(self): |
||
1017 | portal = self.data_portal |
||
1018 | |||
1019 | fields = ["open_price", |
||
1020 | "close_price", |
||
1021 | "high", |
||
1022 | "low", |
||
1023 | "volume", |
||
1024 | "price"] |
||
1025 | freqs = ["1m", "1d"] |
||
1026 | |||
1027 | for field in fields: |
||
1028 | for freq in freqs: |
||
1029 | window = portal.get_history_window( |
||
1030 | [], |
||
1031 | pd.Timestamp("2014-06-11 15:30", tz='UTC'), |
||
1032 | 6, |
||
1033 | freq, |
||
1034 | field |
||
1035 | ) |
||
1036 | |||
1037 | self.assertEqual(len(window), 6) |
||
1038 | |||
1039 | for i in range(0, 6): |
||
1040 | self.assertEqual(len(window.iloc[i]), 0) |
||
1041 | |||
1042 | def test_daily_window_starts_before_minute_data(self): |
||
1043 | |||
1044 | env = TradingEnvironment() |
||
1045 | asset_info = make_simple_asset_info( |
||
1046 | [self.GS], |
||
1047 | Timestamp('1999-04-05'), |
||
1048 | Timestamp('2004-08-30'), |
||
1049 | ['GS'] |
||
1050 | ) |
||
1051 | env.write_data(equities_df=asset_info) |
||
1052 | portal = self.get_portal(env=env) |
||
1053 | |||
1054 | window = portal.get_history_window( |
||
1055 | [self.GS], |
||
1056 | # 3rd day of daily data for GS, minute data starts in 2002. |
||
1057 | pd.Timestamp("1999-04-07 14:35:00", tz='UTC'), |
||
1058 | 10, |
||
1059 | "1d", |
||
1060 | "low" |
||
1061 | ) |
||
1062 | |||
1063 | # 12/20, 12/21, 12/24, 12/26, 12/27, 12/28, 12/31 should be NaNs |
||
1064 | # 1/2 and 1/3 should be non-NaN |
||
1065 | # 1/4 should be NaN (since we don't have minute data for it) |
||
1066 | |||
1067 | self.assertEqual(len(window), 10) |
||
1068 | |||
1069 | for i in range(0, 7): |
||
1070 | self.assertTrue(np.isnan(window.iloc[i].loc[self.GS])) |
||
1071 | |||
1072 | for i in range(8, 9): |
||
1073 | self.assertFalse(np.isnan(window.iloc[i].loc[self.GS])) |
||
1074 | |||
1075 | self.assertTrue(np.isnan(window.iloc[9].loc[self.GS])) |
||
1076 | |||
1077 | def test_minute_window_ends_before_1_2_2002(self): |
||
1078 | with self.assertRaises(ValueError): |
||
1079 | self.data_portal.get_history_window( |
||
1080 | [self.GS], |
||
1081 | pd.Timestamp("2001-12-31 14:35:00", tz='UTC'), |
||
1082 | 50, |
||
1083 | "1m", |
||
1084 | "close_price" |
||
1085 | ) |
||
1086 | |||
1087 | def test_bad_history_inputs(self): |
||
1088 | portal = self.data_portal |
||
1089 | |||
1090 | # bad fieldname |
||
1091 | for field in ["foo", "bar", "", "5"]: |
||
1092 | with self.assertRaises(ValueError): |
||
1093 | portal.get_history_window( |
||
1094 | [self.AAPL], |
||
1095 | pd.Timestamp("2014-06-11 15:30", tz='UTC'), |
||
1096 | 6, |
||
1097 | "1d", |
||
1098 | field |
||
1099 | ) |
||
1100 | |||
1101 | # bad frequency |
||
1102 | for freq in ["2m", "30m", "3d", "300d", "", "5"]: |
||
1103 | with self.assertRaises(ValueError): |
||
1104 | portal.get_history_window( |
||
1105 | [self.AAPL], |
||
1106 | pd.Timestamp("2014-06-11 15:30", tz='UTC'), |
||
1107 | 6, |
||
1108 | freq, |
||
1109 | "volume" |
||
1110 | ) |
||
1111 | |||
1112 | def test_daily_merger(self): |
||
1113 | def check(field, ref): |
||
1114 | window = self.data_portal.get_history_window( |
||
1115 | [self.C], |
||
1116 | pd.Timestamp("2014-07-17 13:35", tz='UTC'), |
||
1117 | 4, |
||
1118 | "1d", |
||
1119 | field |
||
1120 | ) |
||
1121 | |||
1122 | self.assertEqual(len(window), len(ref),) |
||
1123 | |||
1124 | for i in range(0, len(ref) - 1): |
||
1125 | self.assertEquals(window.iloc[i].loc[self.C], ref[i], i) |
||
1126 | |||
1127 | # 2014-07-14 00:00:00+00:00,139.18,139.14,139.2,139.17,12351 |
||
1128 | # 2014-07-15 00:00:00+00:00,139.2,139.2,139.18,139.19,12354 |
||
1129 | # 2014-07-16 00:00:00+00:00,69.58,69.56,69.57,69.565,12352 |
||
1130 | # 2014-07-17 13:31:00+00:00,72767,80146,63406,71776,12876 |
||
1131 | # 2014-07-17 13:32:00+00:00,72769,76943,68907,72925,12875 |
||
1132 | # 2014-07-17 13:33:00+00:00,72771,76127,63194,69660,12875 |
||
1133 | # 2014-07-17 13:34:00+00:00,72774,79349,69771,74560,12877 |
||
1134 | # 2014-07-17 13:35:00+00:00,72776,75340,68970,72155,12879 |
||
1135 | |||
1136 | open_ref = [69.59, 69.6, 69.58, 72.767] |
||
1137 | high_ref = [69.57, 69.6, 69.56, 80.146] |
||
1138 | low_ref = [69.6, 69.59, 69.57, 63.194] |
||
1139 | close_ref = [69.585, 69.595, 69.565, 72.155] |
||
1140 | vol_ref = [12351, 12354, 12352, 64382] |
||
1141 | |||
1142 | check("open_price", open_ref) |
||
1143 | check("high", high_ref) |
||
1144 | check("low", low_ref) |
||
1145 | check("close_price", close_ref) |
||
1146 | check("price", close_ref) |
||
1147 | check("volume", vol_ref) |
||
1148 | |||
1149 | def test_minute_adjustments_as_of_lookback_date(self): |
||
1150 | # AAPL has splits on 2014-03-20 and 2014-03-21 |
||
1151 | window_0320 = self.data_portal.get_history_window( |
||
1152 | [self.AAPL], |
||
1153 | pd.Timestamp("2014-03-20 13:35", tz='UTC'), |
||
1154 | 395, |
||
1155 | "1m", |
||
1156 | "open_price" |
||
1157 | ) |
||
1158 | |||
1159 | window_0321 = self.data_portal.get_history_window( |
||
1160 | [self.AAPL], |
||
1161 | pd.Timestamp("2014-03-21 13:35", tz='UTC'), |
||
1162 | 785, |
||
1163 | "1m", |
||
1164 | "open_price" |
||
1165 | ) |
||
1166 | |||
1167 | for i in range(0, 395): |
||
1168 | # history on 3/20, since the 3/21 0.5 split hasn't |
||
1169 | # happened yet, should return values 2x larger than history on |
||
1170 | # 3/21 |
||
1171 | self.assertEqual(window_0320.iloc[i].loc[self.AAPL], |
||
1172 | window_0321.iloc[i].loc[self.AAPL] * 2) |
||
1173 | |||
1174 | def test_daily_adjustments_as_of_lookback_date(self): |
||
1175 | window_0402 = self.data_portal.get_history_window( |
||
1176 | [self.IBM], |
||
1177 | pd.Timestamp("2014-04-02 13:35", tz='UTC'), |
||
1178 | 23, |
||
1179 | "1d", |
||
1180 | "open_price" |
||
1181 | ) |
||
1182 | |||
1183 | window_0702 = self.data_portal.get_history_window( |
||
1184 | [self.IBM], |
||
1185 | pd.Timestamp("2014-07-02 13:35", tz='UTC'), |
||
1186 | 86, |
||
1187 | "1d", |
||
1188 | "open_price" |
||
1189 | ) |
||
1190 | |||
1191 | for i in range(0, 22): |
||
1192 | self.assertEqual(window_0402.iloc[i].loc[self.IBM], |
||
1193 | window_0702.iloc[i].loc[self.IBM] * 2) |
||
1194 | |||
1195 | def test_minute_dividends(self): |
||
1196 | def check(field, ref): |
||
1197 | window = self.data_portal.get_history_window( |
||
1198 | [self.DIVIDEND_SID], |
||
1199 | pd.Timestamp("2014-03-18 13:35", tz='UTC'), |
||
1200 | 10, |
||
1201 | "1m", |
||
1202 | field |
||
1203 | ) |
||
1204 | |||
1205 | self.assertEqual(len(window), len(ref)) |
||
1206 | |||
1207 | np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref) |
||
1208 | |||
1209 | # the DIVIDEND stock has dividends on 2014-03-18 (0.98) |
||
1210 | # 2014-03-17 19:56:00+00:00,118923,123229,112445,117837,2273 |
||
1211 | # 2014-03-17 19:57:00+00:00,118927,122997,117911,120454,2274 |
||
1212 | # 2014-03-17 19:58:00+00:00,118930,129112,111136,120124,2274 |
||
1213 | # 2014-03-17 19:59:00+00:00,118932,126147,112112,119129,2276 |
||
1214 | # 2014-03-17 20:00:00+00:00,118932,124541,108717,116628,2275 |
||
1215 | # 2014-03-18 13:31:00+00:00,116457,120731,114148,117439,2274 |
||
1216 | # 2014-03-18 13:32:00+00:00,116461,116520,106572,111546,2275 |
||
1217 | # 2014-03-18 13:33:00+00:00,116461,117115,108506,112810,2274 |
||
1218 | # 2014-03-18 13:34:00+00:00,116461,119787,108861,114323,2273 |
||
1219 | # 2014-03-18 13:35:00+00:00,116464,117221,112698,114960,2272 |
||
1220 | |||
1221 | open_ref = [116.545, # 2014-03-17 19:56:00+00:00 |
||
1222 | 116.548, # 2014-03-17 19:57:00+00:00 |
||
1223 | 116.551, # 2014-03-17 19:58:00+00:00 |
||
1224 | 116.553, # 2014-03-17 19:59:00+00:00 |
||
1225 | 116.553, # 2014-03-17 20:00:00+00:00 |
||
1226 | 116.457, # 2014-03-18 13:31:00+00:00 |
||
1227 | 116.461, # 2014-03-18 13:32:00+00:00 |
||
1228 | 116.461, # 2014-03-18 13:33:00+00:00 |
||
1229 | 116.461, # 2014-03-18 13:34:00+00:00 |
||
1230 | 116.464] # 2014-03-18 13:35:00+00:00 |
||
1231 | |||
1232 | high_ref = [120.764, # 2014-03-17 19:56:00+00:00 |
||
1233 | 120.537, # 2014-03-17 19:57:00+00:00 |
||
1234 | 126.530, # 2014-03-17 19:58:00+00:00 |
||
1235 | 123.624, # 2014-03-17 19:59:00+00:00 |
||
1236 | 122.050, # 2014-03-17 20:00:00+00:00 |
||
1237 | 120.731, # 2014-03-18 13:31:00+00:00 |
||
1238 | 116.520, # 2014-03-18 13:32:00+00:00 |
||
1239 | 117.115, # 2014-03-18 13:33:00+00:00 |
||
1240 | 119.787, # 2014-03-18 13:34:00+00:00 |
||
1241 | 117.221] # 2014-03-18 13:35:00+00:00 |
||
1242 | |||
1243 | low_ref = [110.196, # 2014-03-17 19:56:00+00:00 |
||
1244 | 115.553, # 2014-03-17 19:57:00+00:00 |
||
1245 | 108.913, # 2014-03-17 19:58:00+00:00 |
||
1246 | 109.870, # 2014-03-17 19:59:00+00:00 |
||
1247 | 106.543, # 2014-03-17 20:00:00+00:00 |
||
1248 | 114.148, # 2014-03-18 13:31:00+00:00 |
||
1249 | 106.572, # 2014-03-18 13:32:00+00:00 |
||
1250 | 108.506, # 2014-03-18 13:33:00+00:00 |
||
1251 | 108.861, # 2014-03-18 13:34:00+00:00 |
||
1252 | 112.698] # 2014-03-18 13:35:00+00:00 |
||
1253 | |||
1254 | close_ref = [115.480, # 2014-03-17 19:56:00+00:00 |
||
1255 | 118.045, # 2014-03-17 19:57:00+00:00 |
||
1256 | 117.722, # 2014-03-17 19:58:00+00:00 |
||
1257 | 116.746, # 2014-03-17 19:59:00+00:00 |
||
1258 | 114.295, # 2014-03-17 20:00:00+00:00 |
||
1259 | 117.439, # 2014-03-18 13:31:00+00:00 |
||
1260 | 111.546, # 2014-03-18 13:32:00+00:00 |
||
1261 | 112.810, # 2014-03-18 13:33:00+00:00 |
||
1262 | 114.323, # 2014-03-18 13:34:00+00:00 |
||
1263 | 114.960] # 2014-03-18 13:35:00+00:00 |
||
1264 | |||
1265 | volume_ref = [2273, # 2014-03-17 19:56:00+00:00 |
||
1266 | 2274, # 2014-03-17 19:57:00+00:00 |
||
1267 | 2274, # 2014-03-17 19:58:00+00:00 |
||
1268 | 2276, # 2014-03-17 19:59:00+00:00 |
||
1269 | 2275, # 2014-03-17 20:00:00+00:00 |
||
1270 | 2274, # 2014-03-18 13:31:00+00:00 |
||
1271 | 2275, # 2014-03-18 13:32:00+00:00 |
||
1272 | 2274, # 2014-03-18 13:33:00+00:00 |
||
1273 | 2273, # 2014-03-18 13:34:00+00:00 |
||
1274 | 2272] # 2014-03-18 13:35:00+00:00 |
||
1275 | |||
1276 | check("open_price", open_ref) |
||
1277 | check("high", high_ref) |
||
1278 | check("low", low_ref) |
||
1279 | check("close_price", close_ref) |
||
1280 | check("price", close_ref) |
||
1281 | check("volume", volume_ref) |
||
1282 | |||
1283 | def test_daily_dividends(self): |
||
1284 | def check(field, ref): |
||
1285 | window = self.data_portal.get_history_window( |
||
1286 | [self.DIVIDEND_SID], |
||
1287 | pd.Timestamp("2014-03-21 13:35", tz='UTC'), |
||
1288 | 6, |
||
1289 | "1d", |
||
1290 | field |
||
1291 | ) |
||
1292 | |||
1293 | self.assertEqual(len(window), len(ref)) |
||
1294 | |||
1295 | np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref) |
||
1296 | |||
1297 | # 2014-03-14 00:00:00+00:00,106408,106527,103498,105012,950 |
||
1298 | # 2014-03-17 00:00:00+00:00,106411,110252,99877,105064,950 |
||
1299 | # 2014-03-18 00:00:00+00:00,104194,110891,95342,103116,972 |
||
1300 | # 2014-03-19 00:00:00+00:00,104198,107086,102615,104851,973 |
||
1301 | # 2014-03-20 00:00:00+00:00,100032,102989,92179,97584,1016 |
||
1302 | # 2014-03-21 13:31:00+00:00,114098,120818,110333,115575,2866 |
||
1303 | # 2014-03-21 13:32:00+00:00,114099,120157,105353,112755,2866 |
||
1304 | # 2014-03-21 13:33:00+00:00,114099,122263,108838,115550,2867 |
||
1305 | # 2014-03-21 13:34:00+00:00,114101,116620,106654,111637,2867 |
||
1306 | # 2014-03-21 13:35:00+00:00,114104,123773,107769,115771,2867 |
||
1307 | |||
1308 | open_ref = [100.108, # 2014-03-14 00:00:00+00:00 |
||
1309 | 100.111, # 2014-03-17 00:00:00+00:00 |
||
1310 | 100.026, # 2014-03-18 00:00:00+00:00 |
||
1311 | 100.030, # 2014-03-19 00:00:00+00:00 |
||
1312 | 100.032, # 2014-03-20 00:00:00+00:00 |
||
1313 | 114.098] # 2014-03-21 00:00:00+00:00 |
||
1314 | |||
1315 | high_ref = [100.221, # 2014-03-14 00:00:00+00:00 |
||
1316 | 103.725, # 2014-03-17 00:00:00+00:00 |
||
1317 | 106.455, # 2014-03-18 00:00:00+00:00 |
||
1318 | 102.803, # 2014-03-19 00:00:00+00:00 |
||
1319 | 102.988, # 2014-03-20 00:00:00+00:00 |
||
1320 | 123.773] # 2014-03-21 00:00:00+00:00 |
||
1321 | |||
1322 | low_ref = [97.370, # 2014-03-14 00:00:00+00:00 |
||
1323 | 93.964, # 2014-03-17 00:00:00+00:00 |
||
1324 | 91.528, # 2014-03-18 00:00:00+00:00 |
||
1325 | 98.510, # 2014-03-19 00:00:00+00:00 |
||
1326 | 92.179, # 2014-03-20 00:00:00+00:00 |
||
1327 | 105.353] # 2014-03-21 00:00:00+00:00 |
||
1328 | |||
1329 | close_ref = [98.795, # 2014-03-14 00:00:00+00:00 |
||
1330 | 98.844, # 2014-03-17 00:00:00+00:00 |
||
1331 | 98.991, # 2014-03-18 00:00:00+00:00 |
||
1332 | 100.657, # 2014-03-19 00:00:00+00:00 |
||
1333 | 97.584, # 2014-03-20 00:00:00+00:00 |
||
1334 | 115.771] # 2014-03-21 00:00:00+00:00 |
||
1335 | |||
1336 | volume_ref = [950, # 2014-03-14 00:00:00+00:00 |
||
1337 | 950, # 2014-03-17 00:00:00+00:00 |
||
1338 | 972, # 2014-03-18 00:00:00+00:00 |
||
1339 | 973, # 2014-03-19 00:00:00+00:00 |
||
1340 | 1016, # 2014-03-20 00:00:00+00:00 |
||
1341 | 14333] # 2014-03-21 00:00:00+00:00 |
||
1342 | |||
1343 | check("open_price", open_ref) |
||
1344 | check("high", high_ref) |
||
1345 | check("low", low_ref) |
||
1346 | check("close_price", close_ref) |
||
1347 | check("price", close_ref) |
||
1348 | check("volume", volume_ref) |
||
1349 | |||
1350 | @parameterized.expand([('open', 0), |
||
1351 | ('high', 10000), |
||
1352 | ('low', 20000), |
||
1353 | ('close', 30000), |
||
1354 | ('price', 30000), |
||
1355 | ('volume', 40000)]) |
||
1356 | def test_futures_history_minutes(self, field, offset): |
||
1357 | # our history data, for self.FUTURE_ASSET, is 10,000 bars starting at |
||
1358 | # self.futures_start_dt. Those 10k bars are 24/7. |
||
1359 | |||
1360 | # = 2015-11-30 18:50 UTC, 13:50 Eastern = during market hours |
||
1361 | futures_end_dt = \ |
||
1362 | self.futures_start_dates[self.FUTURE_ASSET] + \ |
||
1363 | timedelta(minutes=9999) |
||
1364 | |||
1365 | window = self.data_portal.get_history_window( |
||
1366 | [self.FUTURE_ASSET], |
||
1367 | futures_end_dt, |
||
1368 | 1000, |
||
1369 | "1m", |
||
1370 | field |
||
1371 | ) |
||
1372 | |||
1373 | # check the minutes are right |
||
1374 | reference_minutes = self.env.market_minute_window( |
||
1375 | futures_end_dt, 1000, step=-1 |
||
1376 | )[::-1] |
||
1377 | |||
1378 | np.testing.assert_array_equal(window.index, reference_minutes) |
||
1379 | |||
1380 | # check the values |
||
1381 | |||
1382 | # 2015-11-24 18:41 |
||
1383 | # ... |
||
1384 | # 2015-11-24 21:00 |
||
1385 | # 2015-11-25 14:31 |
||
1386 | # ... |
||
1387 | # 2015-11-25 21:00 |
||
1388 | # 2015-11-27 14:31 |
||
1389 | # ... |
||
1390 | # 2015-11-27 18:00 # early close |
||
1391 | # 2015-11-30 14:31 |
||
1392 | # ... |
||
1393 | # 2015-11-30 18:50 |
||
1394 | |||
1395 | reference_values = pd.date_range( |
||
1396 | start=self.futures_start_dates[self.FUTURE_ASSET], |
||
1397 | end=futures_end_dt, |
||
1398 | freq="T" |
||
1399 | ) |
||
1400 | |||
1401 | for idx, dt in enumerate(window.index): |
||
1402 | date_val = reference_values.searchsorted(dt) |
||
1403 | self.assertEqual(offset + date_val, |
||
1404 | window.iloc[idx][self.FUTURE_ASSET]) |
||
1405 | |||
1406 | def test_history_minute_blended(self): |
||
1407 | window = self.data_portal.get_history_window( |
||
1408 | [self.FUTURE_ASSET2, self.AAPL], |
||
1409 | pd.Timestamp("2014-03-21 20:00", tz='UTC'), |
||
1410 | 200, |
||
1411 | "1m", |
||
1412 | "price" |
||
1413 | ) |
||
1414 | |||
1415 | # just a sanity check |
||
1416 | self.assertEqual(200, len(window[self.AAPL])) |
||
1417 | self.assertEqual(200, len(window[self.FUTURE_ASSET2])) |
||
1418 | |||
1419 | def test_futures_history_daily(self): |
||
1420 | # get 3 days ending 11/30 10:00 am Eastern |
||
1421 | # = 11/25, 11/27 (half day), 11/30 (partial) |
||
1422 | |||
1423 | window = self.data_portal.get_history_window( |
||
1424 | [self.env.asset_finder.retrieve_asset(self.FUTURE_ASSET)], |
||
1425 | pd.Timestamp("2015-11-30 15:00", tz='UTC'), |
||
1426 | 3, |
||
1427 | "1d", |
||
1428 | "high" |
||
1429 | ) |
||
1430 | |||
1431 | self.assertEqual(3, len(window[self.FUTURE_ASSET])) |
||
1432 | |||
1433 | np.testing.assert_array_equal([12929.0, 15629.0, 19769.0], |
||
1434 | window.values.T[0]) |
||
1435 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.