Completed
Pull Request — master (#858)
by Eddie
05:34 queued 02:25
created

tests.TestHistoryIndex   A

Complexity

Total Complexity 4

Size/Duplication

Total Lines 21
Duplicated Lines 0 %
Metric Value
dl 0
loc 21
rs 10
wmc 4
1
from os.path import dirname, join, realpath
2
from textwrap import dedent
3
from unittest import TestCase
4
import bcolz
5
from datetime import timedelta
6
from nose_parameterized import parameterized
7
from pandas.tslib import normalize_date
8
from testfixtures import TempDirectory
9
import numpy as np
10
from numpy import array
11
import pandas as pd
12
from pandas import (
13
    read_csv,
14
    Timestamp,
15
    DataFrame, DatetimeIndex)
16
17
from six import iteritems
18
from zipline import TradingAlgorithm
19
20
from zipline.data.data_portal import DataPortal
21
from zipline.data.us_equity_pricing import (
22
    DailyBarWriterFromCSVs,
23
    SQLiteAdjustmentWriter,
24
    SQLiteAdjustmentReader,
25
)
26
from zipline.errors import HistoryInInitialize
27
from zipline.utils.test_utils import (
28
    make_simple_asset_info,
29
    str_to_seconds,
30
    MockDailyBarReader
31
)
32
from zipline.data.minute_writer import MinuteBarWriterFromCSVs
33
from zipline.utils.tradingcalendar import trading_days
34
from zipline.finance.trading import (
35
    TradingEnvironment,
36
    SimulationParameters
37
)
38
39
TEST_MINUTE_RESOURCE_PATH = join(
40
    dirname(dirname(realpath(__file__))),  # zipline_repo/tests
41
    'tests',
42
    'resources',
43
    'history_inputs',
44
)
45
46
TEST_DAILY_RESOURCE_PATH = join(
47
    dirname(dirname(realpath(__file__))),  # zipline_repo/tests
48
    'tests',
49
    'resources',
50
    'pipeline_inputs',
51
)
52
53
54
class HistoryTestCase(TestCase):
55
    @classmethod
56
    def setUpClass(cls):
57
        cls.AAPL = 1
58
        cls.MSFT = 2
59
        cls.DELL = 3
60
        cls.TSLA = 4
61
        cls.BRKA = 5
62
        cls.IBM = 6
63
        cls.GS = 7
64
        cls.C = 8
65
        cls.DIVIDEND_SID = 9
66
        cls.FUTURE_ASSET = 10
67
        cls.FUTURE_ASSET2 = 11
68
        cls.assets = [cls.AAPL, cls.MSFT, cls.DELL, cls.TSLA, cls.BRKA,
69
                      cls.IBM, cls.GS, cls.C, cls.DIVIDEND_SID]
70
71
        asset_info = make_simple_asset_info(
72
            cls.assets,
73
            Timestamp('2014-03-03'),
74
            Timestamp('2014-08-30'),
75
            ['AAPL', 'MSFT', 'DELL', 'TSLA', 'BRKA', 'IBM', 'GS', 'C',
76
             'DIVIDEND_SID']
77
        )
78
        cls.env = TradingEnvironment()
79
80
        cls.env.write_data(
81
            equities_df=asset_info,
82
            futures_data={
83
                cls.FUTURE_ASSET: {
84
                    "start_date": pd.Timestamp('2015-11-23', tz='UTC'),
85
                    "end_date": pd.Timestamp('2014-12-01', tz='UTC'),
86
                    'symbol': 'TEST_FUTURE',
87
                    'asset_type': 'future',
88
                },
89
                cls.FUTURE_ASSET2: {
90
                    "start_date": pd.Timestamp('2014-03-19', tz='UTC'),
91
                    "end_date": pd.Timestamp('2014-03-22', tz='UTC'),
92
                    'symbol': 'TEST_FUTURE2',
93
                    'asset_type': 'future',
94
                }
95
            }
96
        )
97
98
        cls.tempdir = TempDirectory()
99
        cls.tempdir.create()
100
101
        try:
102
            cls.create_fake_minute_data(cls.tempdir)
103
104
            cls.futures_start_dates = {
105
                cls.FUTURE_ASSET: pd.Timestamp("2015-11-23 20:11", tz='UTC'),
106
                cls.FUTURE_ASSET2: pd.Timestamp("2014-03-19 13:31", tz='UTC')
107
            }
108
109
            cls.create_fake_futures_minute_data(
110
                cls.tempdir,
111
                cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET),
112
                cls.futures_start_dates[cls.FUTURE_ASSET],
113
                cls.futures_start_dates[cls.FUTURE_ASSET] +
114
                timedelta(minutes=10000)
115
            )
116
117
            # build data for FUTURE_ASSET2 from 2014-03-19 13:31 to
118
            # 2014-03-21 20:00
119
            cls.create_fake_futures_minute_data(
120
                cls.tempdir,
121
                cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET2),
122
                cls.futures_start_dates[cls.FUTURE_ASSET2],
123
                cls.futures_start_dates[cls.FUTURE_ASSET2] +
124
                timedelta(minutes=3270)
125
            )
126
127
            cls.create_fake_daily_data(cls.tempdir)
128
129
            splits = DataFrame([
130
                {'effective_date': str_to_seconds("2002-01-03"),
131
                 'ratio': 0.5,
132
                 'sid': cls.AAPL},
133
                {'effective_date': str_to_seconds("2014-03-20"),
134
                 'ratio': 0.5,
135
                 'sid': cls.AAPL},
136
                {'effective_date': str_to_seconds("2014-03-21"),
137
                 'ratio': 0.5,
138
                 'sid': cls.AAPL},
139
                {'effective_date': str_to_seconds("2014-04-01"),
140
                 'ratio': 0.5,
141
                 'sid': cls.IBM},
142
                {'effective_date': str_to_seconds("2014-07-01"),
143
                 'ratio': 0.5,
144
                 'sid': cls.IBM},
145
                {'effective_date': str_to_seconds("2014-07-07"),
146
                 'ratio': 0.5,
147
                 'sid': cls.IBM}],
148
                columns=['effective_date', 'ratio', 'sid'],
149
            )
150
151
            mergers = DataFrame([
152
                {'effective_date': str_to_seconds("2014-07-16"),
153
                 'ratio': 0.5,
154
                 'sid': cls.C}
155
            ],
156
                columns=['effective_date', 'ratio', 'sid'])
157
158
            dividends = DataFrame([
159
                {'ex_date':
160
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
161
                 'record_date':
162
                 Timestamp("2014-03-19", tz='UTC').to_datetime64(),
163
                 'declared_date':
164
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
165
                 'pay_date':
166
                 Timestamp("2014-03-20", tz='UTC').to_datetime64(),
167
                 'amount': 2.0,
168
                 'sid': cls.DIVIDEND_SID},
169
                {'ex_date':
170
                 Timestamp("2014-03-20", tz='UTC').to_datetime64(),
171
                 'record_date':
172
                 Timestamp("2014-03-21", tz='UTC').to_datetime64(),
173
                 'declared_date':
174
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
175
                 'pay_date':
176
                 Timestamp("2014-03-23", tz='UTC').to_datetime64(),
177
                 'amount': 4.0,
178
                 'sid': cls.DIVIDEND_SID}],
179
                columns=['ex_date',
180
                         'record_date',
181
                         'declared_date',
182
                         'pay_date',
183
                         'amount',
184
                         'sid'])
185
186
            cls.create_fake_adjustments(cls.tempdir,
187
                                        "adjustments.sqlite",
188
                                        splits=splits,
189
                                        mergers=mergers,
190
                                        dividends=dividends)
191
        except:
192
            cls.tempdir.cleanup()
193
            raise
194
195
    @classmethod
196
    def tearDownClass(cls):
197
        cls.tempdir.cleanup()
198
199
    @classmethod
200
    def create_fake_futures_minute_data(cls, tempdir, asset, start_dt, end_dt):
201
        num_minutes = int((end_dt - start_dt).total_seconds() / 60)
202
203
        # need to prepend one 0 per minute between normalize_date(start_dt)
204
        # and start_dt
205
        zeroes_buffer = \
206
            [0] * int((start_dt -
207
                       normalize_date(start_dt)).total_seconds() / 60)
208
209
        future_df = pd.DataFrame({
210
            "open": np.array(zeroes_buffer +
211
                             list(range(0, num_minutes))) * 1000,
212
            "high": np.array(zeroes_buffer +
213
                             list(range(10000, 10000 + num_minutes))) * 1000,
214
            "low": np.array(zeroes_buffer +
215
                            list(range(20000, 20000 + num_minutes))) * 1000,
216
            "close": np.array(zeroes_buffer +
217
                              list(range(30000, 30000 + num_minutes))) * 1000,
218
            "volume": np.array(zeroes_buffer +
219
                               list(range(40000, 40000 + num_minutes)))
220
        })
221
222
        path = join(tempdir.path, "{0}.bcolz".format(asset.sid))
223
        ctable = bcolz.ctable.fromdataframe(future_df, rootdir=path)
224
225
        ctable.attrs["start_dt"] = start_dt.value / 1e9
226
        ctable.attrs["last_dt"] = end_dt.value / 1e9
227
228
    @classmethod
229
    def create_fake_minute_data(cls, tempdir):
230
        resources = {
231
            cls.AAPL: join(TEST_MINUTE_RESOURCE_PATH, 'AAPL_minute.csv.gz'),
232
            cls.MSFT: join(TEST_MINUTE_RESOURCE_PATH, 'MSFT_minute.csv.gz'),
233
            cls.DELL: join(TEST_MINUTE_RESOURCE_PATH, 'DELL_minute.csv.gz'),
234
            cls.TSLA: join(TEST_MINUTE_RESOURCE_PATH, "TSLA_minute.csv.gz"),
235
            cls.BRKA: join(TEST_MINUTE_RESOURCE_PATH, "BRKA_minute.csv.gz"),
236
            cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"),
237
            cls.GS:
238
            join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"),  # unused
239
            cls.C: join(TEST_MINUTE_RESOURCE_PATH, "C_minute.csv.gz"),
240
            cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH,
241
                                   "DIVIDEND_minute.csv.gz"),
242
        }
243
244
        MinuteBarWriterFromCSVs(resources).write(tempdir.path, cls.assets)
245
246
    @classmethod
247
    def create_fake_daily_data(cls, tempdir):
248
        resources = {
249
            cls.AAPL: join(TEST_DAILY_RESOURCE_PATH, 'AAPL.csv'),
250
            cls.MSFT: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),
251
            cls.DELL: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),  # unused
252
            cls.TSLA: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),  # unused
253
            cls.BRKA: join(TEST_DAILY_RESOURCE_PATH, 'BRK-A.csv'),
254
            cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, 'IBM_daily.csv.gz'),
255
            cls.GS: join(TEST_MINUTE_RESOURCE_PATH, 'GS_daily.csv.gz'),
256
            cls.C: join(TEST_MINUTE_RESOURCE_PATH, 'C_daily.csv.gz'),
257
            cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH,
258
                                   'DIVIDEND_daily.csv.gz')
259
        }
260
        raw_data = {
261
            asset: read_csv(path, parse_dates=['day']).set_index('day')
262
            for asset, path in iteritems(resources)
263
        }
264
        for frame in raw_data.values():
265
            frame['price'] = frame['close']
266
267
        writer = DailyBarWriterFromCSVs(resources)
268
        data_path = tempdir.getpath('test_daily_data.bcolz')
269
        writer.write(data_path, trading_days, cls.assets)
270
271
    @classmethod
272
    def create_fake_adjustments(cls, tempdir, filename,
273
                                splits=None, mergers=None, dividends=None):
274
        writer = SQLiteAdjustmentWriter(tempdir.getpath(filename),
275
                                        cls.env.trading_days,
276
                                        MockDailyBarReader())
277
278
        if dividends is None:
279
            dividends = DataFrame(
280
                {
281
                    # Hackery to make the dtypes correct on an empty frame.
282
                    'ex_date': array([], dtype='datetime64[ns]'),
283
                    'pay_date': array([], dtype='datetime64[ns]'),
284
                    'record_date': array([], dtype='datetime64[ns]'),
285
                    'declared_date': array([], dtype='datetime64[ns]'),
286
                    'amount': array([], dtype=float),
287
                    'sid': array([], dtype=int),
288
                },
289
                index=DatetimeIndex([], tz='UTC'),
290
                columns=['ex_date',
291
                         'pay_date',
292
                         'record_date',
293
                         'declared_date',
294
                         'amount',
295
                         'sid']
296
                )
297
298
        if splits is None:
299
            splits = DataFrame(
300
                {
301
                    # Hackery to make the dtypes correct on an empty frame.
302
                    'effective_date': array([], dtype=int),
303
                    'ratio': array([], dtype=float),
304
                    'sid': array([], dtype=int),
305
                },
306
                index=DatetimeIndex([], tz='UTC'))
307
308
        if mergers is None:
309
            mergers = DataFrame(
310
                {
311
                    # Hackery to make the dtypes correct on an empty frame.
312
                    'effective_date': array([], dtype=int),
313
                    'ratio': array([], dtype=float),
314
                    'sid': array([], dtype=int),
315
                },
316
                index=DatetimeIndex([], tz='UTC'))
317
318
        writer.write(splits, mergers, dividends)
319
320
    def get_portal(self,
321
                   daily_equities_filename="test_daily_data.bcolz",
322
                   adjustments_filename="adjustments.sqlite",
323
                   env=None):
324
325
        if env is None:
326
            env = self.env
327
328
        temp_path = self.tempdir.path
329
330
        adjustment_reader = SQLiteAdjustmentReader(
331
            join(temp_path, adjustments_filename))
332
333
        return DataPortal(
334
            env,
335
            minutes_equities_path=temp_path,
336
            daily_equities_path=join(temp_path, daily_equities_filename),
337
            adjustment_reader=adjustment_reader
338
        )
339
340
    def test_history_in_initialize(self):
341
        algo_text = dedent(
342
            """\
343
            from zipline.api import history
344
345
            def initialize(context):
346
                history([24], 10, '1d', 'price')
347
348
            def handle_data(context, data):
349
                pass
350
            """
351
        )
352
353
        start = pd.Timestamp('2007-04-05', tz='UTC')
354
        end = pd.Timestamp('2007-04-10', tz='UTC')
355
356
        sim_params = SimulationParameters(
357
            period_start=start,
358
            period_end=end,
359
            capital_base=float("1.0e5"),
360
            data_frequency='minute',
361
            emission_rate='daily',
362
            env=self.env,
363
        )
364
365
        test_algo = TradingAlgorithm(
366
            script=algo_text,
367
            data_frequency='minute',
368
            sim_params=sim_params,
369
            env=self.env,
370
        )
371
372
        with self.assertRaises(HistoryInInitialize):
373
            test_algo.initialize()
374
375
    def test_minute_basic_functionality(self):
376
        # get a 5-bar minute history from the very end of the available data
377
        window = self.get_portal().get_history_window(
378
            [1],
379
            pd.Timestamp("2014-03-21 18:23:00+00:00", tz='UTC'),
380
            5,
381
            "1m",
382
            "open_price"
383
        )
384
385
        self.assertEqual(len(window), 5)
386
        reference = [534.469, 534.471, 534.475, 534.477, 534.477]
387
        for i in range(0, 4):
388
            self.assertEqual(window.iloc[-5 + i].loc[1], reference[i])
389
390
    def test_minute_splits(self):
391
        portal = self.get_portal()
392
393
        window = portal.get_history_window(
394
            [1],
395
            pd.Timestamp("2014-03-21 18:30:00+00:00", tz='UTC'),
396
            1000,
397
            "1m",
398
            "open_price"
399
        )
400
401
        self.assertEqual(len(window), 1000)
402
403
        # there are two splits for AAPL (on 2014-03-20 and 2014-03-21),
404
        # each with ratio 0.5).
405
406
        day1_end = pd.Timestamp("2014-03-19 20:00", tz='UTC')
407
        day2_start = pd.Timestamp("2014-03-20 13:31", tz='UTC')
408
        day2_end = pd.Timestamp("2014-03-20 20:00", tz='UTC')
409
        day3_start = pd.Timestamp("2014-03-21 13:31", tz='UTC')
410
411
        self.assertEquals(window.loc[day1_end, 1], 533.086)
412
        self.assertEquals(window.loc[day2_start, 1], 533.087)
413
        self.assertEquals(window.loc[day2_end, 1], 533.853)
414
        self.assertEquals(window.loc[day3_start, 1], 533.854)
415
416
    def test_minute_window_starts_before_trading_start(self):
417
        portal = self.get_portal()
418
419
        # get a 50-bar minute history for MSFT starting 5 minutes into 3/20,
420
        # its first trading day
421
        window = portal.get_history_window(
422
            [2],
423
            pd.Timestamp("2014-03-20 13:35:00", tz='UTC'),
424
            50,
425
            "1m",
426
            "high",
427
        )
428
429
        self.assertEqual(len(window), 50)
430
        reference = [107.081, 109.476, 102.316, 107.861, 106.040]
431
        for i in range(0, 4):
432
            self.assertEqual(window.iloc[-5 + i].loc[2], reference[i])
433
434
        # get history for two securities at the same time, where one starts
435
        # trading a day later than the other
436
        window2 = portal.get_history_window(
437
            [1, 2],
438
            pd.Timestamp("2014-03-20 13:35:00", tz='UTC'),
439
            50,
440
            "1m",
441
            "low",
442
        )
443
444
        self.assertEqual(len(window2), 50)
445
        reference2 = {
446
            1: [1059.318, 1055.914, 1061.136, 1063.698, 1055.964],
447
            2: [98.902, 99.841, 90.984, 99.891, 98.027]
448
        }
449
450
        for i in range(0, 45):
451
            self.assertFalse(np.isnan(window2.iloc[i].loc[1]))
452
453
            # there should be 45 NaNs for MSFT until it starts trading
454
            self.assertTrue(np.isnan(window2.iloc[i].loc[2]))
455
456
        for i in range(0, 4):
457
            self.assertEquals(window2.iloc[-5 + i].loc[1],
458
                              reference2[1][i])
459
            self.assertEquals(window2.iloc[-5 + i].loc[2],
460
                              reference2[2][i])
461
462
    def test_minute_window_ends_before_trading_start(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
463
        # entire window is before the trading start
464
        window = self.get_portal().get_history_window(
465
            [2],
466
            pd.Timestamp("2014-02-05 14:35:00", tz='UTC'),
467
            100,
468
            "1m",
469
            "high"
470
        )
471
472
        self.assertEqual(len(window), 100)
473
        for i in range(0, 100):
474
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
475
476
    def test_minute_window_ends_after_trading_end(self):
477
        portal = self.get_portal()
478
479
        window = portal.get_history_window(
480
            [2],
481
            pd.Timestamp("2014-03-24 13:35:00", tz='UTC'),
482
            50,
483
            "1m",
484
            "high",
485
        )
486
487
        # should be 45 non-NaNs then 5 NaNs as MSFT has stopped trading at
488
        # the end of the day 2014-03-21 (and the 22nd and 23rd is weekend)
489
        self.assertEqual(len(window), 50)
490
491
        for i in range(0, 45):
492
            self.assertFalse(np.isnan(window.iloc[i].loc[2]))
493
494
        for i in range(46, 50):
495
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
496
497
    def test_minute_window_starts_after_trading_end(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
498
        # entire window is after the trading end
499
        window = self.get_portal().get_history_window(
500
            [2],
501
            pd.Timestamp("2014-04-02 14:35:00", tz='UTC'),
502
            100,
503
            "1m",
504
            "high"
505
        )
506
507
        self.assertEqual(len(window), 100)
508
        for i in range(0, 100):
509
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
510
511
    def test_minute_window_starts_before_1_2_2002(self):
512
        window = self.get_portal().get_history_window(
513
            [3],
514
            pd.Timestamp("2002-01-02 14:35:00", tz='UTC'),
515
            50,
516
            "1m",
517
            "close_price"
518
        )
519
520
        self.assertEqual(len(window), 50)
521
        for i in range(0, 45):
522
            self.assertTrue(np.isnan(window.iloc[i].loc[3]))
523
524
        for i in range(46, 50):
525
            self.assertFalse(np.isnan(window.iloc[i].loc[3]))
526
527
    def test_minute_early_close(self):
528
        # market was closed early on 7/3, and that's reflected in our
529
        # fake IBM minute data.  also, IBM had a split that takes effect
530
        # right after the early close.
531
532
        # five minutes into the day after an early close, get 20 1m bars
533
        window = self.get_portal().get_history_window(
534
            [self.IBM],
535
            pd.Timestamp("2014-07-07 13:35:00", tz='UTC'),
536
            20,
537
            "1m",
538
            "high"
539
        )
540
541
        self.assertEqual(len(window), 20)
542
543
        reference = [27134.486, 27134.802, 27134.660, 27132.813, 27130.964,
544
                     27133.767, 27133.268, 27131.510, 27134.946, 27132.400,
545
                     27134.350, 27130.588, 27132.528, 27130.418, 27131.040,
546
                     27132.664, 27131.307, 27133.978, 27132.779, 27134.476]
547
548
        for i in range(0, 20):
549
            self.assertAlmostEquals(window.iloc[i].loc[self.IBM], reference[i])
550
551
    def test_minute_merger(self):
552
        def check(field, ref):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
553
            window = self.get_portal().get_history_window(
554
                [self.C],
555
                pd.Timestamp("2014-07-16 13:35", tz='UTC'),
556
                10,
557
                "1m",
558
                field
559
            )
560
561
            self.assertEqual(len(window), len(ref))
562
563
            for i in range(0, len(ref) - 1):
564
                self.assertEquals(window.iloc[i].loc[self.C], ref[i])
565
566
        open_ref = [71.99, 71.991, 71.992, 71.996, 71.996,
567
                    72.000, 72.001, 72.002, 72.004, 72.005]
568
        high_ref = [77.334, 80.196, 80.387, 72.331, 79.184,
569
                    75.439, 81.176, 78.564, 80.498, 82.000]
570
        low_ref = [62.621, 70.427, 65.572, 68.357, 63.623,
571
                   69.805, 67.245, 64.238, 64.487, 71.864]
572
        close_ref = [69.977, 75.311, 72.979, 70.344, 71.403,
573
                     72.622, 74.210, 71.401, 72.492, 73.669]
574
        vol_ref = [12663, 12662, 12661, 12661, 12660, 12661,
575
                   12663, 12662, 12663, 12662]
576
577
        check("open_price", open_ref)
578
        check("high", high_ref)
579
        check("low", low_ref)
580
        check("close_price", close_ref)
581
        check("price", close_ref)
582
        check("volume", vol_ref)
583
584
    def test_minute_forward_fill(self):
585
        # only forward fill if ffill=True AND we are asking for "price"
586
587
        # our fake TSLA data (sid 4) is missing a bunch of minute bars
588
        # right after the open on 2002-01-02
589
590
        for field in ["open_price", "high", "low", "volume", "close_price"]:
591
            no_ffill = self.get_portal().get_history_window(
592
                [4],
593
                pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
594
                390,
595
                "1m",
596
                field
597
            )
598
599
            missing_bar_indices = [1, 3, 5, 7, 9, 11, 13]
600
            if field == 'volume':
601
                for bar_idx in missing_bar_indices:
602
                    self.assertEqual(no_ffill.iloc[bar_idx].loc[4], 0)
603
            else:
604
                for bar_idx in missing_bar_indices:
605
                    self.assertTrue(np.isnan(no_ffill.iloc[bar_idx].loc[4]))
606
607
        ffill_window = self.get_portal().get_history_window(
608
            [4],
609
            pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
610
            390,
611
            "1m",
612
            "price"
613
        )
614
615
        for i in range(0, 390):
616
            self.assertFalse(np.isnan(ffill_window.iloc[i].loc[4]))
617
618
        # 2002-01-02 14:31:00+00:00  126.183
619
        # 2002-01-02 14:32:00+00:00  126.183
620
        # 2002-01-02 14:33:00+00:00  125.648
621
        # 2002-01-02 14:34:00+00:00  125.648
622
        # 2002-01-02 14:35:00+00:00  126.016
623
        # 2002-01-02 14:36:00+00:00  126.016
624
        # 2002-01-02 14:37:00+00:00  127.918
625
        # 2002-01-02 14:38:00+00:00  127.918
626
        # 2002-01-02 14:39:00+00:00  126.423
627
        # 2002-01-02 14:40:00+00:00  126.423
628
        # 2002-01-02 14:41:00+00:00  129.825
629
        # 2002-01-02 14:42:00+00:00  129.825
630
        # 2002-01-02 14:43:00+00:00  125.392
631
        # 2002-01-02 14:44:00+00:00  125.392
632
633
        vals = [126.183, 125.648, 126.016, 127.918, 126.423, 129.825, 125.392]
634
        for idx, val in enumerate(vals):
635
            self.assertEqual(ffill_window.iloc[2 * idx].loc[4], val)
636
            self.assertEqual(ffill_window.iloc[(2 * idx) + 1].loc[4], val)
637
638
        # make sure that if we pass ffill=False with field="price", we do
639
        # not ffill
640
        really_no_ffill_window = self.get_portal().get_history_window(
641
            [4],
642
            pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
643
            390,
644
            "1m",
645
            "price",
646
            ffill=False
647
        )
648
649
        for idx, val in enumerate(vals):
650
            idx1 = 2 * idx
651
            idx2 = idx1 + 1
652
            self.assertEqual(really_no_ffill_window.iloc[idx1].loc[4], val)
653
            self.assertTrue(np.isnan(really_no_ffill_window.iloc[idx2].loc[4]))
654
655
    def test_daily_functionality(self):
656
        # 9 daily bars
657
        # 2014-03-10,183999.0,186400.0,183601.0,186400.0,400
658
        # 2014-03-11,186925.0,187490.0,185910.0,187101.0,600
659
        # 2014-03-12,186498.0,187832.0,186005.0,187750.0,300
660
        # 2014-03-13,188150.0,188852.0,185254.0,185750.0,700
661
        # 2014-03-14,185825.0,186507.0,183418.0,183860.0,600
662
        # 2014-03-17,184350.0,185790.0,184350.0,185050.0,400
663
        # 2014-03-18,185400.0,185400.0,183860.0,184860.0,200
664
        # 2014-03-19,184860.0,185489.0,182764.0,183860.0,200
665
        # 2014-03-20,183999.0,186742.0,183630.0,186540.0,300
666
667
        # 5 one-minute bars that will be aggregated
668
        # 2014-03-21 13:31:00+00:00,185422401,185426332,185413974,185420153,304
669
        # 2014-03-21 13:32:00+00:00,185422402,185424165,185417717,185420941,300
670
        # 2014-03-21 13:33:00+00:00,185422403,185430663,185419420,185425041,303
671
        # 2014-03-21 13:34:00+00:00,185422403,185431290,185417079,185424184,302
672
        # 2014-03-21 13:35:00+00:00,185422405,185430210,185416293,185423251,302
673
674
        def run_query(field, values):
675
            window = self.get_portal().get_history_window(
676
                [self.BRKA],
677
                pd.Timestamp("2014-03-21 13:35", tz='UTC'),
678
                10,
679
                "1d",
680
                field
681
            )
682
683
            self.assertEqual(len(window), 10)
684
685
            for i in range(0, 10):
686
                self.assertEquals(window.iloc[i].loc[self.BRKA],
687
                                  values[i])
688
689
        # last value is the first minute's open
690
        opens = [183999, 186925, 186498, 188150, 185825, 184350,
691
                 185400, 184860, 183999, 185422.401]
692
693
        # last value is the last minute's close
694
        closes = [186400, 187101, 187750, 185750, 183860, 185050,
695
                  184860, 183860, 186540, 185423.251]
696
697
        # last value is the highest high value
698
        highs = [186400, 187490, 187832, 188852, 186507, 185790,
699
                 185400, 185489, 186742, 185431.290]
700
701
        # last value is the lowest low value
702
        lows = [183601, 185910, 186005, 185254, 183418, 184350, 183860,
703
                182764, 183630, 185413.974]
704
705
        # last value is the sum of all the minute volumes
706
        volumes = [400, 600, 300, 700, 600, 400, 200, 200, 300, 1511]
707
708
        run_query("open_price", opens)
709
        run_query("close_price", closes)
710
        run_query("price", closes)
711
        run_query("high", highs)
712
        run_query("low", lows)
713
        run_query("volume", volumes)
714
715
    def test_daily_splits_with_no_minute_data(self):
716
        # scenario is that we have daily data for AAPL through 6/11,
717
        # but we have no minute data for AAPL on 6/11. there's also a split
718
        # for AAPL on 6/9.
719
        splits = DataFrame(
720
            [
721
                {
722
                    'effective_date': str_to_seconds('2014-06-09'),
723
                    'ratio': (1 / 7.0),
724
                    'sid': self.AAPL,
725
                }
726
            ],
727
            columns=['effective_date', 'ratio', 'sid'])
728
729
        self.create_fake_adjustments(self.tempdir,
730
                                     "adjustments2.sqlite",
731
                                     splits=splits)
732
733
        portal = self.get_portal(adjustments_filename="adjustments2.sqlite")
734
735
        def test_window(field, reference, ffill=True):
736
            window = portal.get_history_window(
737
                [self.AAPL],
738
                pd.Timestamp("2014-06-11 15:30", tz='UTC'),
739
                6,
740
                "1d",
741
                field,
742
                ffill
743
            )
744
745
            self.assertEqual(len(window), 6)
746
747
            for i in range(0, 5):
748
                self.assertEquals(window.iloc[i].loc[self.AAPL],
749
                                  reference[i])
750
751
            if ffill and field == "price":
752
                last_val = window.iloc[5].loc[self.AAPL]
753
                second_to_last_val = window.iloc[4].loc[self.AAPL]
754
755
                self.assertEqual(last_val, second_to_last_val)
756
            else:
757
                if field == "volume":
758
                    self.assertEqual(window.iloc[5].loc[self.AAPL], 0)
759
                else:
760
                    self.assertTrue(np.isnan(window.iloc[5].loc[self.AAPL]))
761
762
        # 2014-06-04,637.4400099999999,647.8899690000001,636.110046,644.819992,p
763
        # 2014-06-05,646.20005,649.370003,642.610008,647.349983,75951400
764
        # 2014-06-06,649.900002,651.259979,644.469971,645.570023,87484600
765
        # 2014-06-09,92.699997,93.879997,91.75,93.699997,75415000
766
        # 2014-06-10,94.730003,95.050003,93.57,94.25,62777000
767
        open_data = [91.063, 92.314, 92.843, 92.699, 94.730]
768
        test_window("open_price", open_data, ffill=False)
769
        test_window("open_price", open_data)
770
771
        high_data = [92.556, 92.767, 93.037, 93.879, 95.050]
772
        test_window("high", high_data, ffill=False)
773
        test_window("high", high_data)
774
775
        low_data = [90.873, 91.801, 92.067, 91.750, 93.570]
776
        test_window("low", low_data, ffill=False)
777
        test_window("low", low_data)
778
779
        close_data = [92.117, 92.478, 92.224, 93.699, 94.250]
780
        test_window("close_price", close_data, ffill=False)
781
        test_window("close_price", close_data)
782
        test_window("price", close_data, ffill=False)
783
        test_window("price", close_data)
784
785
        vol_data = [587093500, 531659800, 612392200, 75415000, 62777000]
786
        test_window("volume", vol_data)
787
        test_window("volume", vol_data, ffill=False)
788
789
    def test_daily_window_starts_before_trading_start(self):
790
        portal = self.get_portal()
791
792
        # MSFT started on 3/3/2014, so try to go before that
793
        window = portal.get_history_window(
794
            [self.MSFT],
795
            pd.Timestamp("2014-03-05 13:35:00", tz='UTC'),
796
            5,
797
            "1d",
798
            "high"
799
        )
800
801
        self.assertEqual(len(window), 5)
802
803
        # should be two empty days, then 3/3 and 3/4, then
804
        # an empty day because we don't have minute data for 3/5
805
        self.assertTrue(np.isnan(window.iloc[0].loc[self.MSFT]))
806
        self.assertTrue(np.isnan(window.iloc[1].loc[self.MSFT]))
807
        self.assertEquals(window.iloc[2].loc[self.MSFT], 38.130)
808
        self.assertEquals(window.iloc[3].loc[self.MSFT], 38.48)
809
        self.assertTrue(np.isnan(window.iloc[4].loc[self.MSFT]))
810
811
    def test_daily_window_ends_before_trading_start(self):
812
        portal = self.get_portal()
813
814
        # MSFT started on 3/3/2014, so try to go before that
815
        window = portal.get_history_window(
816
            [self.MSFT],
817
            pd.Timestamp("2014-02-28 13:35:00", tz='UTC'),
818
            5,
819
            "1d",
820
            "high"
821
        )
822
823
        self.assertEqual(len(window), 5)
824
        for i in range(0, 5):
825
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
826
827
    def test_daily_window_starts_after_trading_end(self):
828
        # MSFT stopped trading EOD Friday 8/29/2014
829
        window = self.get_portal().get_history_window(
830
            [self.MSFT],
831
            pd.Timestamp("2014-09-12 13:35:00", tz='UTC'),
832
            8,
833
            "1d",
834
            "high",
835
        )
836
837
        self.assertEqual(len(window), 8)
838
        for i in range(0, 8):
839
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
840
841
    def test_daily_window_ends_after_trading_end(self):
842
        # MSFT stopped trading EOD Friday 8/29/2014
843
        window = self.get_portal().get_history_window(
844
            [self.MSFT],
845
            pd.Timestamp("2014-09-04 13:35:00", tz='UTC'),
846
            10,
847
            "1d",
848
            "high",
849
        )
850
851
        # should be 7 non-NaNs (8/21-8/22, 8/25-8/29) and 3 NaNs (9/2 - 9/4)
852
        # (9/1/2014 is labor day)
853
        self.assertEqual(len(window), 10)
854
855
        for i in range(0, 7):
856
            self.assertFalse(np.isnan(window.iloc[i].loc[self.MSFT]))
857
858
        for i in range(7, 10):
859
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
860
861
    def test_empty_sid_list(self):
862
        portal = self.get_portal()
863
864
        fields = ["open_price",
865
                  "close_price",
866
                  "high",
867
                  "low",
868
                  "volume",
869
                  "price"]
870
        freqs = ["1m", "1d"]
871
872
        for field in fields:
873
            for freq in freqs:
874
                window = portal.get_history_window(
875
                    [],
876
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
877
                    6,
878
                    freq,
879
                    field
880
                )
881
882
                self.assertEqual(len(window), 6)
883
884
                for i in range(0, 6):
885
                    self.assertEqual(len(window.iloc[i]), 0)
886
887
    def test_daily_window_starts_before_1_2_2002(self):
888
889
        env = TradingEnvironment()
890
        asset_info = make_simple_asset_info(
891
            [self.GS],
892
            Timestamp('1999-05-04'),
893
            Timestamp('2004-08-30'),
894
            ['GS']
895
        )
896
        env.write_data(equities_df=asset_info)
897
        portal = self.get_portal(env=env)
898
899
        window = portal.get_history_window(
900
            [self.GS],
901
            pd.Timestamp("2002-01-04 14:35:00", tz='UTC'),
902
            10,
903
            "1d",
904
            "low"
905
        )
906
907
        # 12/20, 12/21, 12/24, 12/26, 12/27, 12/28, 12/31 should be NaNs
908
        # 1/2 and 1/3 should be non-NaN
909
        # 1/4 should be NaN (since we don't have minute data for it)
910
911
        self.assertEqual(len(window), 10)
912
913
        for i in range(0, 7):
914
            self.assertTrue(np.isnan(window.iloc[i].loc[self.GS]))
915
916
        for i in range(8, 9):
917
            self.assertFalse(np.isnan(window.iloc[i].loc[self.GS]))
918
919
        self.assertTrue(np.isnan(window.iloc[9].loc[self.GS]))
920
921
    def test_minute_window_ends_before_1_2_2002(self):
922
        with self.assertRaises(ValueError):
923
            self.get_portal().get_history_window(
924
                [self.GS],
925
                pd.Timestamp("2001-12-31 14:35:00", tz='UTC'),
926
                50,
927
                "1m",
928
                "close_price"
929
            )
930
931
    def test_bad_history_inputs(self):
932
        portal = self.get_portal()
933
934
        # bad fieldname
935
        for field in ["foo", "bar", "", "5"]:
936
            with self.assertRaises(ValueError):
937
                portal.get_history_window(
938
                    [self.AAPL],
939
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
940
                    6,
941
                    "1d",
942
                    field
943
                )
944
945
        # bad frequency
946
        for freq in ["2m", "30m", "3d", "300d", "", "5"]:
947
            with self.assertRaises(ValueError):
948
                portal.get_history_window(
949
                    [self.AAPL],
950
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
951
                    6,
952
                    freq,
953
                    "volume"
954
                )
955
956
    def test_daily_merger(self):
957
        def check(field, ref):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
958
            window = self.get_portal().get_history_window(
959
                [self.C],
960
                pd.Timestamp("2014-07-17 13:35", tz='UTC'),
961
                4,
962
                "1d",
963
                field
964
            )
965
966
            self.assertEqual(len(window), len(ref),)
967
968
            for i in range(0, len(ref) - 1):
969
                self.assertEquals(window.iloc[i].loc[self.C], ref[i], i)
970
971
        # 2014-07-14 00:00:00+00:00,139.18,139.14,139.2,139.17,12351
972
        # 2014-07-15 00:00:00+00:00,139.2,139.2,139.18,139.19,12354
973
        # 2014-07-16 00:00:00+00:00,69.58,69.56,69.57,69.565,12352
974
        # 2014-07-17 13:31:00+00:00,72767,80146,63406,71776,12876
975
        # 2014-07-17 13:32:00+00:00,72769,76943,68907,72925,12875
976
        # 2014-07-17 13:33:00+00:00,72771,76127,63194,69660,12875
977
        # 2014-07-17 13:34:00+00:00,72774,79349,69771,74560,12877
978
        # 2014-07-17 13:35:00+00:00,72776,75340,68970,72155,12879
979
980
        open_ref = [69.59, 69.6, 69.58, 72.767]
981
        high_ref = [69.57, 69.6, 69.56, 80.146]
982
        low_ref = [69.6, 69.59, 69.57, 63.194]
983
        close_ref = [69.585, 69.595, 69.565, 72.155]
984
        vol_ref = [12351, 12354, 12352, 64382]
985
986
        check("open_price", open_ref)
987
        check("high", high_ref)
988
        check("low", low_ref)
989
        check("close_price", close_ref)
990
        check("price", close_ref)
991
        check("volume", vol_ref)
992
993
    def test_minute_adjustments_as_of_lookback_date(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
994
        # AAPL has splits on 2014-03-20 and 2014-03-21
995
        window_0320 = self.get_portal().get_history_window(
996
            [self.AAPL],
997
            pd.Timestamp("2014-03-20 13:35", tz='UTC'),
998
            395,
999
            "1m",
1000
            "open_price"
1001
        )
1002
1003
        window_0321 = self.get_portal().get_history_window(
1004
            [self.AAPL],
1005
            pd.Timestamp("2014-03-21 13:35", tz='UTC'),
1006
            785,
1007
            "1m",
1008
            "open_price"
1009
        )
1010
1011
        for i in range(0, 395):
1012
            # history on 3/20, since the 3/21 0.5 split hasn't
1013
            # happened yet, should return values 2x larger than history on
1014
            # 3/21
1015
            self.assertEqual(window_0320.iloc[i].loc[self.AAPL],
1016
                             window_0321.iloc[i].loc[self.AAPL] * 2)
1017
1018
    def test_daily_adjustments_as_of_lookback_date(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1019
        window_0402 = self.get_portal().get_history_window(
1020
            [self.IBM],
1021
            pd.Timestamp("2014-04-02 13:35", tz='UTC'),
1022
            23,
1023
            "1d",
1024
            "open_price"
1025
        )
1026
1027
        window_0702 = self.get_portal().get_history_window(
1028
            [self.IBM],
1029
            pd.Timestamp("2014-07-02 13:35", tz='UTC'),
1030
            86,
1031
            "1d",
1032
            "open_price"
1033
        )
1034
1035
        for i in range(0, 22):
1036
            self.assertEqual(window_0402.iloc[i].loc[self.IBM],
1037
                             window_0702.iloc[i].loc[self.IBM] * 2)
1038
1039
    def test_minute_dividends(self):
1040
        def check(field, ref):
1041
            window = self.get_portal().get_history_window(
1042
                [self.DIVIDEND_SID],
1043
                pd.Timestamp("2014-03-18 13:35", tz='UTC'),
1044
                10,
1045
                "1m",
1046
                field
1047
            )
1048
1049
            self.assertEqual(len(window), len(ref))
1050
1051
            np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref)
1052
1053
        # the DIVIDEND stock has dividends on 2014-03-18 (0.98)
1054
        # 2014-03-17 19:56:00+00:00,118923,123229,112445,117837,2273
1055
        # 2014-03-17 19:57:00+00:00,118927,122997,117911,120454,2274
1056
        # 2014-03-17 19:58:00+00:00,118930,129112,111136,120124,2274
1057
        # 2014-03-17 19:59:00+00:00,118932,126147,112112,119129,2276
1058
        # 2014-03-17 20:00:00+00:00,118932,124541,108717,116628,2275
1059
        # 2014-03-18 13:31:00+00:00,116457,120731,114148,117439,2274
1060
        # 2014-03-18 13:32:00+00:00,116461,116520,106572,111546,2275
1061
        # 2014-03-18 13:33:00+00:00,116461,117115,108506,112810,2274
1062
        # 2014-03-18 13:34:00+00:00,116461,119787,108861,114323,2273
1063
        # 2014-03-18 13:35:00+00:00,116464,117221,112698,114960,2272
1064
1065
        open_ref = [116.545,  # 2014-03-17 19:56:00+00:00
1066
                    116.548,  # 2014-03-17 19:57:00+00:00
1067
                    116.551,  # 2014-03-17 19:58:00+00:00
1068
                    116.553,  # 2014-03-17 19:59:00+00:00
1069
                    116.553,  # 2014-03-17 20:00:00+00:00
1070
                    116.457,  # 2014-03-18 13:31:00+00:00
1071
                    116.461,  # 2014-03-18 13:32:00+00:00
1072
                    116.461,  # 2014-03-18 13:33:00+00:00
1073
                    116.461,  # 2014-03-18 13:34:00+00:00
1074
                    116.464]  # 2014-03-18 13:35:00+00:00
1075
1076
        high_ref = [120.764,  # 2014-03-17 19:56:00+00:00
1077
                    120.537,  # 2014-03-17 19:57:00+00:00
1078
                    126.530,  # 2014-03-17 19:58:00+00:00
1079
                    123.624,  # 2014-03-17 19:59:00+00:00
1080
                    122.050,  # 2014-03-17 20:00:00+00:00
1081
                    120.731,  # 2014-03-18 13:31:00+00:00
1082
                    116.520,  # 2014-03-18 13:32:00+00:00
1083
                    117.115,  # 2014-03-18 13:33:00+00:00
1084
                    119.787,  # 2014-03-18 13:34:00+00:00
1085
                    117.221]  # 2014-03-18 13:35:00+00:00
1086
1087
        low_ref = [110.196,  # 2014-03-17 19:56:00+00:00
1088
                   115.553,  # 2014-03-17 19:57:00+00:00
1089
                   108.913,  # 2014-03-17 19:58:00+00:00
1090
                   109.870,  # 2014-03-17 19:59:00+00:00
1091
                   106.543,  # 2014-03-17 20:00:00+00:00
1092
                   114.148,  # 2014-03-18 13:31:00+00:00
1093
                   106.572,  # 2014-03-18 13:32:00+00:00
1094
                   108.506,  # 2014-03-18 13:33:00+00:00
1095
                   108.861,  # 2014-03-18 13:34:00+00:00
1096
                   112.698]  # 2014-03-18 13:35:00+00:00
1097
1098
        close_ref = [115.480,  # 2014-03-17 19:56:00+00:00
1099
                     118.045,  # 2014-03-17 19:57:00+00:00
1100
                     117.722,  # 2014-03-17 19:58:00+00:00
1101
                     116.746,  # 2014-03-17 19:59:00+00:00
1102
                     114.295,  # 2014-03-17 20:00:00+00:00
1103
                     117.439,  # 2014-03-18 13:31:00+00:00
1104
                     111.546,  # 2014-03-18 13:32:00+00:00
1105
                     112.810,  # 2014-03-18 13:33:00+00:00
1106
                     114.323,  # 2014-03-18 13:34:00+00:00
1107
                     114.960]  # 2014-03-18 13:35:00+00:00
1108
1109
        volume_ref = [2273,  # 2014-03-17 19:56:00+00:00
1110
                      2274,  # 2014-03-17 19:57:00+00:00
1111
                      2274,  # 2014-03-17 19:58:00+00:00
1112
                      2276,  # 2014-03-17 19:59:00+00:00
1113
                      2275,  # 2014-03-17 20:00:00+00:00
1114
                      2274,  # 2014-03-18 13:31:00+00:00
1115
                      2275,  # 2014-03-18 13:32:00+00:00
1116
                      2274,  # 2014-03-18 13:33:00+00:00
1117
                      2273,  # 2014-03-18 13:34:00+00:00
1118
                      2272]  # 2014-03-18 13:35:00+00:00
1119
1120
        check("open_price", open_ref)
1121
        check("high", high_ref)
1122
        check("low", low_ref)
1123
        check("close_price", close_ref)
1124
        check("price", close_ref)
1125
        check("volume", volume_ref)
1126
1127
    def test_daily_dividends(self):
1128
        def check(field, ref):
1129
            window = self.get_portal().get_history_window(
1130
                [self.DIVIDEND_SID],
1131
                pd.Timestamp("2014-03-21 13:35", tz='UTC'),
1132
                6,
1133
                "1d",
1134
                field
1135
            )
1136
1137
            self.assertEqual(len(window), len(ref))
1138
1139
            np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref)
1140
1141
        # 2014-03-14 00:00:00+00:00,106408,106527,103498,105012,950
1142
        # 2014-03-17 00:00:00+00:00,106411,110252,99877,105064,950
1143
        # 2014-03-18 00:00:00+00:00,104194,110891,95342,103116,972
1144
        # 2014-03-19 00:00:00+00:00,104198,107086,102615,104851,973
1145
        # 2014-03-20 00:00:00+00:00,100032,102989,92179,97584,1016
1146
        # 2014-03-21 13:31:00+00:00,114098,120818,110333,115575,2866
1147
        # 2014-03-21 13:32:00+00:00,114099,120157,105353,112755,2866
1148
        # 2014-03-21 13:33:00+00:00,114099,122263,108838,115550,2867
1149
        # 2014-03-21 13:34:00+00:00,114101,116620,106654,111637,2867
1150
        # 2014-03-21 13:35:00+00:00,114104,123773,107769,115771,2867
1151
1152
        open_ref = [100.108,  # 2014-03-14 00:00:00+00:00
1153
                    100.111,  # 2014-03-17 00:00:00+00:00
1154
                    100.026,  # 2014-03-18 00:00:00+00:00
1155
                    100.030,  # 2014-03-19 00:00:00+00:00
1156
                    100.032,  # 2014-03-20 00:00:00+00:00
1157
                    114.098]  # 2014-03-21 00:00:00+00:00
1158
1159
        high_ref = [100.221,  # 2014-03-14 00:00:00+00:00
1160
                    103.725,  # 2014-03-17 00:00:00+00:00
1161
                    106.455,  # 2014-03-18 00:00:00+00:00
1162
                    102.803,  # 2014-03-19 00:00:00+00:00
1163
                    102.988,  # 2014-03-20 00:00:00+00:00
1164
                    123.773]  # 2014-03-21 00:00:00+00:00
1165
1166
        low_ref = [97.370,  # 2014-03-14 00:00:00+00:00
1167
                   93.964,  # 2014-03-17 00:00:00+00:00
1168
                   91.528,  # 2014-03-18 00:00:00+00:00
1169
                   98.510,  # 2014-03-19 00:00:00+00:00
1170
                   92.179,  # 2014-03-20 00:00:00+00:00
1171
                   105.353]  # 2014-03-21 00:00:00+00:00
1172
1173
        close_ref = [98.795,  # 2014-03-14 00:00:00+00:00
1174
                     98.844,  # 2014-03-17 00:00:00+00:00
1175
                     98.991,  # 2014-03-18 00:00:00+00:00
1176
                     100.657,  # 2014-03-19 00:00:00+00:00
1177
                     97.584,  # 2014-03-20 00:00:00+00:00
1178
                     115.771]  # 2014-03-21 00:00:00+00:00
1179
1180
        volume_ref = [950,  # 2014-03-14 00:00:00+00:00
1181
                      950,  # 2014-03-17 00:00:00+00:00
1182
                      972,  # 2014-03-18 00:00:00+00:00
1183
                      973,  # 2014-03-19 00:00:00+00:00
1184
                      1016,  # 2014-03-20 00:00:00+00:00
1185
                      14333]  # 2014-03-21 00:00:00+00:00
1186
1187
        check("open_price", open_ref)
1188
        check("high", high_ref)
1189
        check("low", low_ref)
1190
        check("close_price", close_ref)
1191
        check("price", close_ref)
1192
        check("volume", volume_ref)
1193
1194
    @parameterized.expand([('open', 0),
1195
                           ('high', 10000),
1196
                           ('low', 20000),
1197
                           ('close', 30000),
1198
                           ('price', 30000),
1199
                           ('volume', 40000)])
1200
    def test_futures_history_minutes(self, field, offset):
1201
        # our history data, for self.FUTURE_ASSET, is 10,000 bars starting at
1202
        # self.futures_start_dt.  Those 10k bars are 24/7.
1203
1204
        # = 2015-11-30 18:50 UTC, 13:50 Eastern = during market hours
1205
        futures_end_dt = \
1206
            self.futures_start_dates[self.FUTURE_ASSET] + \
1207
            timedelta(minutes=9999)
1208
1209
        window = self.get_portal().get_history_window(
1210
            [self.FUTURE_ASSET],
1211
            futures_end_dt,
1212
            1000,
1213
            "1m",
1214
            field
1215
        )
1216
1217
        # check the minutes are right
1218
        reference_minutes = self.env.market_minute_window(
1219
            futures_end_dt, 1000, step=-1
1220
        )[::-1]
1221
1222
        np.testing.assert_array_equal(window.index, reference_minutes)
1223
1224
        # check the values
1225
1226
        # 2015-11-24 18:41
1227
        # ...
1228
        # 2015-11-24 21:00
1229
        # 2015-11-25 14:31
1230
        # ...
1231
        # 2015-11-25 21:00
1232
        # 2015-11-27 14:31
1233
        # ...
1234
        # 2015-11-27 18:00  # early close
1235
        # 2015-11-30 14:31
1236
        # ...
1237
        # 2015-11-30 18:50
1238
1239
        reference_values = pd.date_range(
1240
            start=self.futures_start_dates[self.FUTURE_ASSET],
1241
            end=futures_end_dt,
1242
            freq="T"
1243
        )
1244
1245
        for idx, dt in enumerate(window.index):
1246
            date_val = reference_values.searchsorted(dt)
1247
            self.assertEqual(offset + date_val,
1248
                             window.iloc[idx][self.FUTURE_ASSET])
1249
1250
    def test_history_minute_blended(self):
1251
        window = self.get_portal().get_history_window(
1252
            [self.FUTURE_ASSET2, self.AAPL],
1253
            pd.Timestamp("2014-03-21 20:00", tz='UTC'),
1254
            200,
1255
            "1m",
1256
            "price"
1257
        )
1258
1259
        # just a sanity check
1260
        self.assertEqual(200, len(window[self.AAPL]))
1261
        self.assertEqual(200, len(window[self.FUTURE_ASSET2]))
1262
1263
    def test_futures_history_daily(self):
1264
        # get 3 days ending 11/30 10:00 am Eastern
1265
        # = 11/25, 11/27 (half day), 11/30 (partial)
1266
1267
        window = self.get_portal().get_history_window(
1268
            [self.env.asset_finder.retrieve_asset(self.FUTURE_ASSET)],
1269
            pd.Timestamp("2015-11-30 15:00", tz='UTC'),
1270
            3,
1271
            "1d",
1272
            "high"
1273
        )
1274
1275
        self.assertEqual(3, len(window[self.FUTURE_ASSET]))
1276
1277
        np.testing.assert_array_equal([12929.0, 15629.0, 19769.0],
1278
                                      window.values.T[0])
1279