Completed
Pull Request — master (#858)
by Eddie
01:46
created

tests.HistoryTestCase.run_query()   A

Complexity

Conditions 2

Size

Total Lines 14

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 2
dl 0
loc 14
rs 9.4286
1
from os.path import dirname, join, realpath
2
from textwrap import dedent
3
from unittest import TestCase
4
import bcolz
5
import os
6
from datetime import timedelta
7
from nose_parameterized import parameterized
8
from pandas.tslib import normalize_date
9
from testfixtures import TempDirectory
10
import numpy as np
11
from numpy import array
12
import pandas as pd
13
from pandas import (
14
    read_csv,
15
    Timestamp,
16
    DataFrame, DatetimeIndex)
17
18
from six import iteritems
19
from zipline import TradingAlgorithm
20
21
from zipline.data.data_portal import DataPortal
22
from zipline.data.us_equity_pricing import (
23
    DailyBarWriterFromCSVs,
24
    SQLiteAdjustmentWriter,
25
    SQLiteAdjustmentReader,
26
)
27
from zipline.errors import HistoryInInitialize
28
from zipline.utils.test_utils import (
29
    make_simple_asset_info,
30
    str_to_seconds,
31
    MockDailyBarReader
32
)
33
from zipline.data.future_pricing import FutureMinuteReader
34
from zipline.data.us_equity_pricing import BcolzDailyBarReader
35
from zipline.data.us_equity_minutes import (
36
    MinuteBarWriterFromCSVs,
37
    BcolzMinuteBarReader
38
)
39
from zipline.utils.tradingcalendar import trading_days
40
from zipline.finance.trading import (
41
    TradingEnvironment,
42
    SimulationParameters
43
)
44
45
TEST_MINUTE_RESOURCE_PATH = join(
46
    dirname(dirname(realpath(__file__))),  # zipline_repo/tests
47
    'tests',
48
    'resources',
49
    'history_inputs',
50
)
51
52
TEST_DAILY_RESOURCE_PATH = join(
53
    dirname(dirname(realpath(__file__))),  # zipline_repo/tests
54
    'tests',
55
    'resources',
56
    'pipeline_inputs',
57
)
58
59
60
class HistoryTestCase(TestCase):
61
    @classmethod
62
    def setUpClass(cls):
63
        cls.AAPL = 1
64
        cls.MSFT = 2
65
        cls.DELL = 3
66
        cls.TSLA = 4
67
        cls.BRKA = 5
68
        cls.IBM = 6
69
        cls.GS = 7
70
        cls.C = 8
71
        cls.DIVIDEND_SID = 9
72
        cls.FUTURE_ASSET = 10
73
        cls.FUTURE_ASSET2 = 11
74
        cls.assets = [cls.AAPL, cls.MSFT, cls.DELL, cls.TSLA, cls.BRKA,
75
                      cls.IBM, cls.GS, cls.C, cls.DIVIDEND_SID]
76
77
        asset_info = make_simple_asset_info(
78
            cls.assets,
79
            Timestamp('2014-03-03'),
80
            Timestamp('2014-08-30'),
81
            ['AAPL', 'MSFT', 'DELL', 'TSLA', 'BRKA', 'IBM', 'GS', 'C',
82
             'DIVIDEND_SID']
83
        )
84
        cls.env = TradingEnvironment()
85
86
        cls.env.write_data(
87
            equities_df=asset_info,
88
            futures_data={
89
                cls.FUTURE_ASSET: {
90
                    "start_date": pd.Timestamp('2015-11-23', tz='UTC'),
91
                    "end_date": pd.Timestamp('2014-12-01', tz='UTC'),
92
                    'symbol': 'TEST_FUTURE',
93
                    'asset_type': 'future',
94
                },
95
                cls.FUTURE_ASSET2: {
96
                    "start_date": pd.Timestamp('2014-03-19', tz='UTC'),
97
                    "end_date": pd.Timestamp('2014-03-22', tz='UTC'),
98
                    'symbol': 'TEST_FUTURE2',
99
                    'asset_type': 'future',
100
                }
101
            }
102
        )
103
104
        cls.tempdir = TempDirectory()
105
        cls.tempdir.create()
106
107
        try:
108
            cls.create_fake_minute_data(cls.tempdir)
109
110
            cls.futures_start_dates = {
111
                cls.FUTURE_ASSET: pd.Timestamp("2015-11-23 20:11", tz='UTC'),
112
                cls.FUTURE_ASSET2: pd.Timestamp("2014-03-19 13:31", tz='UTC')
113
            }
114
115
            futures_tempdir = os.path.join(cls.tempdir.path,
116
                                           'futures', 'minutes')
117
            os.makedirs(futures_tempdir)
118
            cls.create_fake_futures_minute_data(
119
                futures_tempdir,
120
                cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET),
121
                cls.futures_start_dates[cls.FUTURE_ASSET],
122
                cls.futures_start_dates[cls.FUTURE_ASSET] +
123
                timedelta(minutes=10000)
124
            )
125
126
            # build data for FUTURE_ASSET2 from 2014-03-19 13:31 to
127
            # 2014-03-21 20:00
128
            cls.create_fake_futures_minute_data(
129
                futures_tempdir,
130
                cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET2),
131
                cls.futures_start_dates[cls.FUTURE_ASSET2],
132
                cls.futures_start_dates[cls.FUTURE_ASSET2] +
133
                timedelta(minutes=3270)
134
            )
135
136
            cls.create_fake_daily_data(cls.tempdir)
137
138
            splits = DataFrame([
139
                {'effective_date': str_to_seconds("2002-01-03"),
140
                 'ratio': 0.5,
141
                 'sid': cls.AAPL},
142
                {'effective_date': str_to_seconds("2014-03-20"),
143
                 'ratio': 0.5,
144
                 'sid': cls.AAPL},
145
                {'effective_date': str_to_seconds("2014-03-21"),
146
                 'ratio': 0.5,
147
                 'sid': cls.AAPL},
148
                {'effective_date': str_to_seconds("2014-04-01"),
149
                 'ratio': 0.5,
150
                 'sid': cls.IBM},
151
                {'effective_date': str_to_seconds("2014-07-01"),
152
                 'ratio': 0.5,
153
                 'sid': cls.IBM},
154
                {'effective_date': str_to_seconds("2014-07-07"),
155
                 'ratio': 0.5,
156
                 'sid': cls.IBM}],
157
                columns=['effective_date', 'ratio', 'sid'],
158
            )
159
160
            mergers = DataFrame([
161
                {'effective_date': str_to_seconds("2014-07-16"),
162
                 'ratio': 0.5,
163
                 'sid': cls.C}
164
            ],
165
                columns=['effective_date', 'ratio', 'sid'])
166
167
            dividends = DataFrame([
168
                {'ex_date':
169
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
170
                 'record_date':
171
                 Timestamp("2014-03-19", tz='UTC').to_datetime64(),
172
                 'declared_date':
173
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
174
                 'pay_date':
175
                 Timestamp("2014-03-20", tz='UTC').to_datetime64(),
176
                 'amount': 2.0,
177
                 'sid': cls.DIVIDEND_SID},
178
                {'ex_date':
179
                 Timestamp("2014-03-20", tz='UTC').to_datetime64(),
180
                 'record_date':
181
                 Timestamp("2014-03-21", tz='UTC').to_datetime64(),
182
                 'declared_date':
183
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
184
                 'pay_date':
185
                 Timestamp("2014-03-23", tz='UTC').to_datetime64(),
186
                 'amount': 4.0,
187
                 'sid': cls.DIVIDEND_SID}],
188
                columns=['ex_date',
189
                         'record_date',
190
                         'declared_date',
191
                         'pay_date',
192
                         'amount',
193
                         'sid'])
194
195
            cls.create_fake_adjustments(cls.tempdir,
196
                                        "adjustments.sqlite",
197
                                        splits=splits,
198
                                        mergers=mergers,
199
                                        dividends=dividends)
200
201
            cls.data_portal = cls.get_portal(
202
                daily_equities_filename="test_daily_data.bcolz",
203
                adjustments_filename="adjustments.sqlite"
204
            )
205
        except:
206
            cls.tempdir.cleanup()
207
            raise
208
209
    @classmethod
210
    def tearDownClass(cls):
211
        cls.tempdir.cleanup()
212
213
    @classmethod
214
    def create_fake_futures_minute_data(cls, tempdir, asset, start_dt, end_dt):
215
        num_minutes = int((end_dt - start_dt).total_seconds() / 60)
216
217
        # need to prepend one 0 per minute between normalize_date(start_dt)
218
        # and start_dt
219
        zeroes_buffer = \
220
            [0] * int((start_dt -
221
                       normalize_date(start_dt)).total_seconds() / 60)
222
223
        future_df = pd.DataFrame({
224
            "open": np.array(zeroes_buffer +
225
                             list(range(0, num_minutes))) * 1000,
226
            "high": np.array(zeroes_buffer +
227
                             list(range(10000, 10000 + num_minutes))) * 1000,
228
            "low": np.array(zeroes_buffer +
229
                            list(range(20000, 20000 + num_minutes))) * 1000,
230
            "close": np.array(zeroes_buffer +
231
                              list(range(30000, 30000 + num_minutes))) * 1000,
232
            "volume": np.array(zeroes_buffer +
233
                               list(range(40000, 40000 + num_minutes)))
234
        })
235
236
        path = join(tempdir, "{0}.bcolz".format(asset.sid))
237
        ctable = bcolz.ctable.fromdataframe(future_df, rootdir=path)
238
239
        ctable.attrs["start_dt"] = start_dt.value / 1e9
240
        ctable.attrs["last_dt"] = end_dt.value / 1e9
241
242
    @classmethod
243
    def create_fake_minute_data(cls, tempdir):
244
        resources = {
245
            cls.AAPL: join(TEST_MINUTE_RESOURCE_PATH, 'AAPL_minute.csv.gz'),
246
            cls.MSFT: join(TEST_MINUTE_RESOURCE_PATH, 'MSFT_minute.csv.gz'),
247
            cls.DELL: join(TEST_MINUTE_RESOURCE_PATH, 'DELL_minute.csv.gz'),
248
            cls.TSLA: join(TEST_MINUTE_RESOURCE_PATH, "TSLA_minute.csv.gz"),
249
            cls.BRKA: join(TEST_MINUTE_RESOURCE_PATH, "BRKA_minute.csv.gz"),
250
            cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"),
251
            cls.GS:
252
            join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"),  # unused
253
            cls.C: join(TEST_MINUTE_RESOURCE_PATH, "C_minute.csv.gz"),
254
            cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH,
255
                                   "DIVIDEND_minute.csv.gz"),
256
        }
257
258
        equities_tempdir = os.path.join(tempdir.path, 'equity', 'minutes')
259
        os.makedirs(equities_tempdir)
260
261
        MinuteBarWriterFromCSVs(resources,
262
                                pd.Timestamp('2002-01-02', tz='UTC')).write(
263
                                    equities_tempdir, cls.assets)
264
265
    @classmethod
266
    def create_fake_daily_data(cls, tempdir):
267
        resources = {
268
            cls.AAPL: join(TEST_DAILY_RESOURCE_PATH, 'AAPL.csv'),
269
            cls.MSFT: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),
270
            cls.DELL: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),  # unused
271
            cls.TSLA: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),  # unused
272
            cls.BRKA: join(TEST_DAILY_RESOURCE_PATH, 'BRK-A.csv'),
273
            cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, 'IBM_daily.csv.gz'),
274
            cls.GS: join(TEST_MINUTE_RESOURCE_PATH, 'GS_daily.csv.gz'),
275
            cls.C: join(TEST_MINUTE_RESOURCE_PATH, 'C_daily.csv.gz'),
276
            cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH,
277
                                   'DIVIDEND_daily.csv.gz')
278
        }
279
        raw_data = {
280
            asset: read_csv(path, parse_dates=['day']).set_index('day')
281
            for asset, path in iteritems(resources)
282
        }
283
        for frame in raw_data.values():
284
            frame['price'] = frame['close']
285
286
        writer = DailyBarWriterFromCSVs(resources)
287
        data_path = tempdir.getpath('test_daily_data.bcolz')
288
        writer.write(data_path, trading_days, cls.assets)
289
290
    @classmethod
291
    def create_fake_adjustments(cls, tempdir, filename,
292
                                splits=None, mergers=None, dividends=None):
293
        writer = SQLiteAdjustmentWriter(tempdir.getpath(filename),
294
                                        cls.env.trading_days,
295
                                        MockDailyBarReader())
296
297
        if dividends is None:
298
            dividends = DataFrame(
299
                {
300
                    # Hackery to make the dtypes correct on an empty frame.
301
                    'ex_date': array([], dtype='datetime64[ns]'),
302
                    'pay_date': array([], dtype='datetime64[ns]'),
303
                    'record_date': array([], dtype='datetime64[ns]'),
304
                    'declared_date': array([], dtype='datetime64[ns]'),
305
                    'amount': array([], dtype=float),
306
                    'sid': array([], dtype=int),
307
                },
308
                index=DatetimeIndex([], tz='UTC'),
309
                columns=['ex_date',
310
                         'pay_date',
311
                         'record_date',
312
                         'declared_date',
313
                         'amount',
314
                         'sid']
315
                )
316
317
        if splits is None:
318
            splits = DataFrame(
319
                {
320
                    # Hackery to make the dtypes correct on an empty frame.
321
                    'effective_date': array([], dtype=int),
322
                    'ratio': array([], dtype=float),
323
                    'sid': array([], dtype=int),
324
                },
325
                index=DatetimeIndex([], tz='UTC'))
326
327
        if mergers is None:
328
            mergers = DataFrame(
329
                {
330
                    # Hackery to make the dtypes correct on an empty frame.
331
                    'effective_date': array([], dtype=int),
332
                    'ratio': array([], dtype=float),
333
                    'sid': array([], dtype=int),
334
                },
335
                index=DatetimeIndex([], tz='UTC'))
336
337
        writer.write(splits, mergers, dividends)
338
339
    @classmethod
340
    def get_portal(cls,
341
                   daily_equities_filename="test_daily_data.bcolz",
342
                   adjustments_filename="adjustments.sqlite",
343
                   env=None):
344
345
        if env is None:
346
            env = cls.env
347
348
        temp_path = cls.tempdir.path
349
350
        minutes_path = os.path.join(temp_path, 'equity', 'minutes')
351
        futures_path = os.path.join(temp_path, 'futures', 'minutes')
352
353
        adjustment_reader = SQLiteAdjustmentReader(
354
            join(temp_path, adjustments_filename))
355
356
        equity_minute_reader = BcolzMinuteBarReader(minutes_path)
357
358
        equity_daily_reader = BcolzDailyBarReader(
359
            join(temp_path, daily_equities_filename))
360
361
        future_minute_reader = FutureMinuteReader(futures_path)
362
363
        return DataPortal(
364
            env,
365
            equity_minute_reader=equity_minute_reader,
366
            future_minute_reader=future_minute_reader,
367
            equity_daily_reader=equity_daily_reader,
368
            adjustment_reader=adjustment_reader
369
        )
370
371
    def test_history_in_initialize(self):
372
        algo_text = dedent(
373
            """\
374
            from zipline.api import history
375
376
            def initialize(context):
377
                history([24], 10, '1d', 'price')
378
379
            def handle_data(context, data):
380
                pass
381
            """
382
        )
383
384
        start = pd.Timestamp('2007-04-05', tz='UTC')
385
        end = pd.Timestamp('2007-04-10', tz='UTC')
386
387
        sim_params = SimulationParameters(
388
            period_start=start,
389
            period_end=end,
390
            capital_base=float("1.0e5"),
391
            data_frequency='minute',
392
            emission_rate='daily',
393
            env=self.env,
394
        )
395
396
        test_algo = TradingAlgorithm(
397
            script=algo_text,
398
            data_frequency='minute',
399
            sim_params=sim_params,
400
            env=self.env,
401
        )
402
403
        with self.assertRaises(HistoryInInitialize):
404
            test_algo.initialize()
405
406
    def test_minute_basic_functionality(self):
407
        # get a 5-bar minute history from the very end of the available data
408
        window = self.data_portal.get_history_window(
409
            [1],
410
            pd.Timestamp("2014-03-21 18:23:00+00:00", tz='UTC'),
411
            5,
412
            "1m",
413
            "open_price"
414
        )
415
416
        self.assertEqual(len(window), 5)
417
        reference = [534.469, 534.471, 534.475, 534.477, 534.477]
418
        for i in range(0, 4):
419
            self.assertEqual(window.iloc[-5 + i].loc[1], reference[i])
420
421
    def test_minute_splits(self):
422
        portal = self.data_portal
423
424
        window = portal.get_history_window(
425
            [1],
426
            pd.Timestamp("2014-03-21 18:30:00+00:00", tz='UTC'),
427
            1000,
428
            "1m",
429
            "open_price"
430
        )
431
432
        self.assertEqual(len(window), 1000)
433
434
        # there are two splits for AAPL (on 2014-03-20 and 2014-03-21),
435
        # each with ratio 0.5).
436
437
        day1_end = pd.Timestamp("2014-03-19 20:00", tz='UTC')
438
        day2_start = pd.Timestamp("2014-03-20 13:31", tz='UTC')
439
        day2_end = pd.Timestamp("2014-03-20 20:00", tz='UTC')
440
        day3_start = pd.Timestamp("2014-03-21 13:31", tz='UTC')
441
442
        self.assertEquals(window.loc[day1_end, 1], 533.086)
443
        self.assertEquals(window.loc[day2_start, 1], 533.087)
444
        self.assertEquals(window.loc[day2_end, 1], 533.853)
445
        self.assertEquals(window.loc[day3_start, 1], 533.854)
446
447
    def test_minute_window_starts_before_trading_start(self):
448
        portal = self.data_portal
449
450
        # get a 50-bar minute history for MSFT starting 5 minutes into 3/20,
451
        # its first trading day
452
        window = portal.get_history_window(
453
            [2],
454
            pd.Timestamp("2014-03-20 13:35:00", tz='UTC'),
455
            50,
456
            "1m",
457
            "high",
458
        )
459
460
        self.assertEqual(len(window), 50)
461
        reference = [107.081, 109.476, 102.316, 107.861, 106.040]
462
        for i in range(0, 4):
463
            self.assertEqual(window.iloc[-5 + i].loc[2], reference[i])
464
465
        # get history for two securities at the same time, where one starts
466
        # trading a day later than the other
467
        window2 = portal.get_history_window(
468
            [1, 2],
469
            pd.Timestamp("2014-03-20 13:35:00", tz='UTC'),
470
            50,
471
            "1m",
472
            "low",
473
        )
474
475
        self.assertEqual(len(window2), 50)
476
        reference2 = {
477
            1: [1059.318, 1055.914, 1061.136, 1063.698, 1055.964],
478
            2: [98.902, 99.841, 90.984, 99.891, 98.027]
479
        }
480
481
        for i in range(0, 45):
482
            self.assertFalse(np.isnan(window2.iloc[i].loc[1]))
483
484
            # there should be 45 NaNs for MSFT until it starts trading
485
            self.assertTrue(np.isnan(window2.iloc[i].loc[2]))
486
487
        for i in range(0, 4):
488
            self.assertEquals(window2.iloc[-5 + i].loc[1],
489
                              reference2[1][i])
490
            self.assertEquals(window2.iloc[-5 + i].loc[2],
491
                              reference2[2][i])
492
493
    def test_minute_window_ends_before_trading_start(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
494
        # entire window is before the trading start
495
        window = self.data_portal.get_history_window(
496
            [2],
497
            pd.Timestamp("2014-02-05 14:35:00", tz='UTC'),
498
            100,
499
            "1m",
500
            "high"
501
        )
502
503
        self.assertEqual(len(window), 100)
504
        for i in range(0, 100):
505
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
506
507
    def test_minute_window_ends_after_trading_end(self):
508
        portal = self.data_portal
509
510
        window = portal.get_history_window(
511
            [2],
512
            pd.Timestamp("2014-03-24 13:35:00", tz='UTC'),
513
            50,
514
            "1m",
515
            "high",
516
        )
517
518
        # should be 45 non-NaNs then 5 NaNs as MSFT has stopped trading at
519
        # the end of the day 2014-03-21 (and the 22nd and 23rd is weekend)
520
        self.assertEqual(len(window), 50)
521
522
        for i in range(0, 45):
523
            self.assertFalse(np.isnan(window.iloc[i].loc[2]))
524
525
        for i in range(46, 50):
526
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
527
528
    def test_minute_window_starts_after_trading_end(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
529
        # entire window is after the trading end
530
        window = self.data_portal.get_history_window(
531
            [2],
532
            pd.Timestamp("2014-04-02 14:35:00", tz='UTC'),
533
            100,
534
            "1m",
535
            "high"
536
        )
537
538
        self.assertEqual(len(window), 100)
539
        for i in range(0, 100):
540
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
541
542
    def test_minute_window_starts_before_1_2_2002(self):
543
        window = self.data_portal.get_history_window(
544
            [3],
545
            pd.Timestamp("2002-01-02 14:35:00", tz='UTC'),
546
            50,
547
            "1m",
548
            "close_price"
549
        )
550
551
        self.assertEqual(len(window), 50)
552
        for i in range(0, 45):
553
            self.assertTrue(np.isnan(window.iloc[i].loc[3]))
554
555
        for i in range(46, 50):
556
            self.assertFalse(np.isnan(window.iloc[i].loc[3]))
557
558
    def test_minute_early_close(self):
559
        # market was closed early on 7/3, and that's reflected in our
560
        # fake IBM minute data.  also, IBM had a split that takes effect
561
        # right after the early close.
562
563
        # five minutes into the day after an early close, get 20 1m bars
564
        window = self.data_portal.get_history_window(
565
            [self.IBM],
566
            pd.Timestamp("2014-07-07 13:35:00", tz='UTC'),
567
            20,
568
            "1m",
569
            "high"
570
        )
571
572
        self.assertEqual(len(window), 20)
573
574
        reference = [27134.486, 27134.802, 27134.660, 27132.813, 27130.964,
575
                     27133.767, 27133.268, 27131.510, 27134.946, 27132.400,
576
                     27134.350, 27130.588, 27132.528, 27130.418, 27131.040,
577
                     27132.664, 27131.307, 27133.978, 27132.779, 27134.476]
578
579
        for i in range(0, 20):
580
            self.assertAlmostEquals(window.iloc[i].loc[self.IBM], reference[i])
581
582
    def test_minute_merger(self):
583
        def check(field, ref):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
584
            window = self.data_portal.get_history_window(
585
                [self.C],
586
                pd.Timestamp("2014-07-16 13:35", tz='UTC'),
587
                10,
588
                "1m",
589
                field
590
            )
591
592
            self.assertEqual(len(window), len(ref))
593
594
            for i in range(0, len(ref) - 1):
595
                self.assertEquals(window.iloc[i].loc[self.C], ref[i])
596
597
        open_ref = [71.99, 71.991, 71.992, 71.996, 71.996,
598
                    72.000, 72.001, 72.002, 72.004, 72.005]
599
        high_ref = [77.334, 80.196, 80.387, 72.331, 79.184,
600
                    75.439, 81.176, 78.564, 80.498, 82.000]
601
        low_ref = [62.621, 70.427, 65.572, 68.357, 63.623,
602
                   69.805, 67.245, 64.238, 64.487, 71.864]
603
        close_ref = [69.977, 75.311, 72.979, 70.344, 71.403,
604
                     72.622, 74.210, 71.401, 72.492, 73.669]
605
        vol_ref = [12663, 12662, 12661, 12661, 12660, 12661,
606
                   12663, 12662, 12663, 12662]
607
608
        check("open_price", open_ref)
609
        check("high", high_ref)
610
        check("low", low_ref)
611
        check("close_price", close_ref)
612
        check("price", close_ref)
613
        check("volume", vol_ref)
614
615
    def test_minute_forward_fill(self):
616
        # only forward fill if ffill=True AND we are asking for "price"
617
618
        # our fake TSLA data (sid 4) is missing a bunch of minute bars
619
        # right after the open on 2002-01-02
620
621
        for field in ["open_price", "high", "low", "volume", "close_price"]:
622
            no_ffill = self.data_portal.get_history_window(
623
                [4],
624
                pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
625
                390,
626
                "1m",
627
                field
628
            )
629
630
            missing_bar_indices = [1, 3, 5, 7, 9, 11, 13]
631
            if field == 'volume':
632
                for bar_idx in missing_bar_indices:
633
                    self.assertEqual(no_ffill.iloc[bar_idx].loc[4], 0)
634
            else:
635
                for bar_idx in missing_bar_indices:
636
                    self.assertTrue(np.isnan(no_ffill.iloc[bar_idx].loc[4]))
637
638
        ffill_window = self.data_portal.get_history_window(
639
            [4],
640
            pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
641
            390,
642
            "1m",
643
            "price"
644
        )
645
646
        for i in range(0, 390):
647
            self.assertFalse(np.isnan(ffill_window.iloc[i].loc[4]))
648
649
        # 2002-01-02 14:31:00+00:00  126.183
650
        # 2002-01-02 14:32:00+00:00  126.183
651
        # 2002-01-02 14:33:00+00:00  125.648
652
        # 2002-01-02 14:34:00+00:00  125.648
653
        # 2002-01-02 14:35:00+00:00  126.016
654
        # 2002-01-02 14:36:00+00:00  126.016
655
        # 2002-01-02 14:37:00+00:00  127.918
656
        # 2002-01-02 14:38:00+00:00  127.918
657
        # 2002-01-02 14:39:00+00:00  126.423
658
        # 2002-01-02 14:40:00+00:00  126.423
659
        # 2002-01-02 14:41:00+00:00  129.825
660
        # 2002-01-02 14:42:00+00:00  129.825
661
        # 2002-01-02 14:43:00+00:00  125.392
662
        # 2002-01-02 14:44:00+00:00  125.392
663
664
        vals = [126.183, 125.648, 126.016, 127.918, 126.423, 129.825, 125.392]
665
        for idx, val in enumerate(vals):
666
            self.assertEqual(ffill_window.iloc[2 * idx].loc[4], val)
667
            self.assertEqual(ffill_window.iloc[(2 * idx) + 1].loc[4], val)
668
669
        # make sure that if we pass ffill=False with field="price", we do
670
        # not ffill
671
        really_no_ffill_window = self.data_portal.get_history_window(
672
            [4],
673
            pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
674
            390,
675
            "1m",
676
            "price",
677
            ffill=False
678
        )
679
680
        for idx, val in enumerate(vals):
681
            idx1 = 2 * idx
682
            idx2 = idx1 + 1
683
            self.assertEqual(really_no_ffill_window.iloc[idx1].loc[4], val)
684
            self.assertTrue(np.isnan(really_no_ffill_window.iloc[idx2].loc[4]))
685
686
    def test_daily_functionality(self):
687
        # 9 daily bars
688
        # 2014-03-10,183999.0,186400.0,183601.0,186400.0,400
689
        # 2014-03-11,186925.0,187490.0,185910.0,187101.0,600
690
        # 2014-03-12,186498.0,187832.0,186005.0,187750.0,300
691
        # 2014-03-13,188150.0,188852.0,185254.0,185750.0,700
692
        # 2014-03-14,185825.0,186507.0,183418.0,183860.0,600
693
        # 2014-03-17,184350.0,185790.0,184350.0,185050.0,400
694
        # 2014-03-18,185400.0,185400.0,183860.0,184860.0,200
695
        # 2014-03-19,184860.0,185489.0,182764.0,183860.0,200
696
        # 2014-03-20,183999.0,186742.0,183630.0,186540.0,300
697
698
        # 5 one-minute bars that will be aggregated
699
        # 2014-03-21 13:31:00+00:00,185422401,185426332,185413974,185420153,304
700
        # 2014-03-21 13:32:00+00:00,185422402,185424165,185417717,185420941,300
701
        # 2014-03-21 13:33:00+00:00,185422403,185430663,185419420,185425041,303
702
        # 2014-03-21 13:34:00+00:00,185422403,185431290,185417079,185424184,302
703
        # 2014-03-21 13:35:00+00:00,185422405,185430210,185416293,185423251,302
704
705
        def run_query(field, values):
706
            window = self.data_portal.get_history_window(
707
                [self.BRKA],
708
                pd.Timestamp("2014-03-21 13:35", tz='UTC'),
709
                10,
710
                "1d",
711
                field
712
            )
713
714
            self.assertEqual(len(window), 10)
715
716
            for i in range(0, 10):
717
                self.assertEquals(window.iloc[i].loc[self.BRKA],
718
                                  values[i])
719
720
        # last value is the first minute's open
721
        opens = [183999, 186925, 186498, 188150, 185825, 184350,
722
                 185400, 184860, 183999, 185422.401]
723
724
        # last value is the last minute's close
725
        closes = [186400, 187101, 187750, 185750, 183860, 185050,
726
                  184860, 183860, 186540, 185423.251]
727
728
        # last value is the highest high value
729
        highs = [186400, 187490, 187832, 188852, 186507, 185790,
730
                 185400, 185489, 186742, 185431.290]
731
732
        # last value is the lowest low value
733
        lows = [183601, 185910, 186005, 185254, 183418, 184350, 183860,
734
                182764, 183630, 185413.974]
735
736
        # last value is the sum of all the minute volumes
737
        volumes = [400, 600, 300, 700, 600, 400, 200, 200, 300, 1511]
738
739
        run_query("open_price", opens)
740
        run_query("close_price", closes)
741
        run_query("price", closes)
742
        run_query("high", highs)
743
        run_query("low", lows)
744
        run_query("volume", volumes)
745
746
    def test_daily_splits_with_no_minute_data(self):
747
        # scenario is that we have daily data for AAPL through 6/11,
748
        # but we have no minute data for AAPL on 6/11. there's also a split
749
        # for AAPL on 6/9.
750
        splits = DataFrame(
751
            [
752
                {
753
                    'effective_date': str_to_seconds('2014-06-09'),
754
                    'ratio': (1 / 7.0),
755
                    'sid': self.AAPL,
756
                }
757
            ],
758
            columns=['effective_date', 'ratio', 'sid'])
759
760
        self.create_fake_adjustments(self.tempdir,
761
                                     "adjustments2.sqlite",
762
                                     splits=splits)
763
764
        portal = self.get_portal(adjustments_filename="adjustments2.sqlite")
765
766
        def test_window(field, reference, ffill=True):
767
            window = portal.get_history_window(
768
                [self.AAPL],
769
                pd.Timestamp("2014-06-11 15:30", tz='UTC'),
770
                6,
771
                "1d",
772
                field,
773
                ffill
774
            )
775
776
            self.assertEqual(len(window), 6)
777
778
            for i in range(0, 5):
779
                self.assertEquals(window.iloc[i].loc[self.AAPL],
780
                                  reference[i])
781
782
            if ffill and field == "price":
783
                last_val = window.iloc[5].loc[self.AAPL]
784
                second_to_last_val = window.iloc[4].loc[self.AAPL]
785
786
                self.assertEqual(last_val, second_to_last_val)
787
            else:
788
                if field == "volume":
789
                    self.assertEqual(window.iloc[5].loc[self.AAPL], 0)
790
                else:
791
                    self.assertTrue(np.isnan(window.iloc[5].loc[self.AAPL]))
792
793
        # 2014-06-04,637.4400099999999,647.8899690000001,636.110046,644.819992,p
794
        # 2014-06-05,646.20005,649.370003,642.610008,647.349983,75951400
795
        # 2014-06-06,649.900002,651.259979,644.469971,645.570023,87484600
796
        # 2014-06-09,92.699997,93.879997,91.75,93.699997,75415000
797
        # 2014-06-10,94.730003,95.050003,93.57,94.25,62777000
798
        open_data = [91.063, 92.314, 92.843, 92.699, 94.730]
799
        test_window("open_price", open_data, ffill=False)
800
        test_window("open_price", open_data)
801
802
        high_data = [92.556, 92.767, 93.037, 93.879, 95.050]
803
        test_window("high", high_data, ffill=False)
804
        test_window("high", high_data)
805
806
        low_data = [90.873, 91.801, 92.067, 91.750, 93.570]
807
        test_window("low", low_data, ffill=False)
808
        test_window("low", low_data)
809
810
        close_data = [92.117, 92.478, 92.224, 93.699, 94.250]
811
        test_window("close_price", close_data, ffill=False)
812
        test_window("close_price", close_data)
813
        test_window("price", close_data, ffill=False)
814
        test_window("price", close_data)
815
816
        vol_data = [587093500, 531659800, 612392200, 75415000, 62777000]
817
        test_window("volume", vol_data)
818
        test_window("volume", vol_data, ffill=False)
819
820
    def test_daily_window_starts_before_trading_start(self):
821
        portal = self.data_portal
822
823
        # MSFT started on 3/3/2014, so try to go before that
824
        window = portal.get_history_window(
825
            [self.MSFT],
826
            pd.Timestamp("2014-03-05 13:35:00", tz='UTC'),
827
            5,
828
            "1d",
829
            "high"
830
        )
831
832
        self.assertEqual(len(window), 5)
833
834
        # should be two empty days, then 3/3 and 3/4, then
835
        # an empty day because we don't have minute data for 3/5
836
        self.assertTrue(np.isnan(window.iloc[0].loc[self.MSFT]))
837
        self.assertTrue(np.isnan(window.iloc[1].loc[self.MSFT]))
838
        self.assertEquals(window.iloc[2].loc[self.MSFT], 38.130)
839
        self.assertEquals(window.iloc[3].loc[self.MSFT], 38.48)
840
        self.assertTrue(np.isnan(window.iloc[4].loc[self.MSFT]))
841
842
    def test_daily_window_ends_before_trading_start(self):
843
        portal = self.data_portal
844
845
        # MSFT started on 3/3/2014, so try to go before that
846
        window = portal.get_history_window(
847
            [self.MSFT],
848
            pd.Timestamp("2014-02-28 13:35:00", tz='UTC'),
849
            5,
850
            "1d",
851
            "high"
852
        )
853
854
        self.assertEqual(len(window), 5)
855
        for i in range(0, 5):
856
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
857
858
    def test_daily_window_starts_after_trading_end(self):
859
        # MSFT stopped trading EOD Friday 8/29/2014
860
        window = self.data_portal.get_history_window(
861
            [self.MSFT],
862
            pd.Timestamp("2014-09-12 13:35:00", tz='UTC'),
863
            8,
864
            "1d",
865
            "high",
866
        )
867
868
        self.assertEqual(len(window), 8)
869
        for i in range(0, 8):
870
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
871
872
    def test_daily_window_ends_after_trading_end(self):
873
        # MSFT stopped trading EOD Friday 8/29/2014
874
        window = self.data_portal.get_history_window(
875
            [self.MSFT],
876
            pd.Timestamp("2014-09-04 13:35:00", tz='UTC'),
877
            10,
878
            "1d",
879
            "high",
880
        )
881
882
        # should be 7 non-NaNs (8/21-8/22, 8/25-8/29) and 3 NaNs (9/2 - 9/4)
883
        # (9/1/2014 is labor day)
884
        self.assertEqual(len(window), 10)
885
886
        for i in range(0, 7):
887
            self.assertFalse(np.isnan(window.iloc[i].loc[self.MSFT]))
888
889
        for i in range(7, 10):
890
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
891
892
    def test_empty_sid_list(self):
893
        portal = self.data_portal
894
895
        fields = ["open_price",
896
                  "close_price",
897
                  "high",
898
                  "low",
899
                  "volume",
900
                  "price"]
901
        freqs = ["1m", "1d"]
902
903
        for field in fields:
904
            for freq in freqs:
905
                window = portal.get_history_window(
906
                    [],
907
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
908
                    6,
909
                    freq,
910
                    field
911
                )
912
913
                self.assertEqual(len(window), 6)
914
915
                for i in range(0, 6):
916
                    self.assertEqual(len(window.iloc[i]), 0)
917
918
    def test_daily_window_starts_before_minute_data(self):
919
920
        env = TradingEnvironment()
921
        asset_info = make_simple_asset_info(
922
            [self.GS],
923
            Timestamp('1999-04-05'),
924
            Timestamp('2004-08-30'),
925
            ['GS']
926
        )
927
        env.write_data(equities_df=asset_info)
928
        portal = self.get_portal(env=env)
929
930
        window = portal.get_history_window(
931
            [self.GS],
932
            # 3rd day of daily data for GS, minute data starts in 2002.
933
            pd.Timestamp("1999-04-07 14:35:00", tz='UTC'),
934
            10,
935
            "1d",
936
            "low"
937
        )
938
939
        # 12/20, 12/21, 12/24, 12/26, 12/27, 12/28, 12/31 should be NaNs
940
        # 1/2 and 1/3 should be non-NaN
941
        # 1/4 should be NaN (since we don't have minute data for it)
942
943
        self.assertEqual(len(window), 10)
944
945
        for i in range(0, 7):
946
            self.assertTrue(np.isnan(window.iloc[i].loc[self.GS]))
947
948
        for i in range(8, 9):
949
            self.assertFalse(np.isnan(window.iloc[i].loc[self.GS]))
950
951
        self.assertTrue(np.isnan(window.iloc[9].loc[self.GS]))
952
953
    def test_minute_window_ends_before_1_2_2002(self):
954
        with self.assertRaises(ValueError):
955
            self.data_portal.get_history_window(
956
                [self.GS],
957
                pd.Timestamp("2001-12-31 14:35:00", tz='UTC'),
958
                50,
959
                "1m",
960
                "close_price"
961
            )
962
963
    def test_bad_history_inputs(self):
964
        portal = self.data_portal
965
966
        # bad fieldname
967
        for field in ["foo", "bar", "", "5"]:
968
            with self.assertRaises(ValueError):
969
                portal.get_history_window(
970
                    [self.AAPL],
971
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
972
                    6,
973
                    "1d",
974
                    field
975
                )
976
977
        # bad frequency
978
        for freq in ["2m", "30m", "3d", "300d", "", "5"]:
979
            with self.assertRaises(ValueError):
980
                portal.get_history_window(
981
                    [self.AAPL],
982
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
983
                    6,
984
                    freq,
985
                    "volume"
986
                )
987
988
    def test_daily_merger(self):
989
        def check(field, ref):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
990
            window = self.data_portal.get_history_window(
991
                [self.C],
992
                pd.Timestamp("2014-07-17 13:35", tz='UTC'),
993
                4,
994
                "1d",
995
                field
996
            )
997
998
            self.assertEqual(len(window), len(ref),)
999
1000
            for i in range(0, len(ref) - 1):
1001
                self.assertEquals(window.iloc[i].loc[self.C], ref[i], i)
1002
1003
        # 2014-07-14 00:00:00+00:00,139.18,139.14,139.2,139.17,12351
1004
        # 2014-07-15 00:00:00+00:00,139.2,139.2,139.18,139.19,12354
1005
        # 2014-07-16 00:00:00+00:00,69.58,69.56,69.57,69.565,12352
1006
        # 2014-07-17 13:31:00+00:00,72767,80146,63406,71776,12876
1007
        # 2014-07-17 13:32:00+00:00,72769,76943,68907,72925,12875
1008
        # 2014-07-17 13:33:00+00:00,72771,76127,63194,69660,12875
1009
        # 2014-07-17 13:34:00+00:00,72774,79349,69771,74560,12877
1010
        # 2014-07-17 13:35:00+00:00,72776,75340,68970,72155,12879
1011
1012
        open_ref = [69.59, 69.6, 69.58, 72.767]
1013
        high_ref = [69.57, 69.6, 69.56, 80.146]
1014
        low_ref = [69.6, 69.59, 69.57, 63.194]
1015
        close_ref = [69.585, 69.595, 69.565, 72.155]
1016
        vol_ref = [12351, 12354, 12352, 64382]
1017
1018
        check("open_price", open_ref)
1019
        check("high", high_ref)
1020
        check("low", low_ref)
1021
        check("close_price", close_ref)
1022
        check("price", close_ref)
1023
        check("volume", vol_ref)
1024
1025
    def test_minute_adjustments_as_of_lookback_date(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1026
        # AAPL has splits on 2014-03-20 and 2014-03-21
1027
        window_0320 = self.data_portal.get_history_window(
1028
            [self.AAPL],
1029
            pd.Timestamp("2014-03-20 13:35", tz='UTC'),
1030
            395,
1031
            "1m",
1032
            "open_price"
1033
        )
1034
1035
        window_0321 = self.data_portal.get_history_window(
1036
            [self.AAPL],
1037
            pd.Timestamp("2014-03-21 13:35", tz='UTC'),
1038
            785,
1039
            "1m",
1040
            "open_price"
1041
        )
1042
1043
        for i in range(0, 395):
1044
            # history on 3/20, since the 3/21 0.5 split hasn't
1045
            # happened yet, should return values 2x larger than history on
1046
            # 3/21
1047
            self.assertEqual(window_0320.iloc[i].loc[self.AAPL],
1048
                             window_0321.iloc[i].loc[self.AAPL] * 2)
1049
1050
    def test_daily_adjustments_as_of_lookback_date(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1051
        window_0402 = self.data_portal.get_history_window(
1052
            [self.IBM],
1053
            pd.Timestamp("2014-04-02 13:35", tz='UTC'),
1054
            23,
1055
            "1d",
1056
            "open_price"
1057
        )
1058
1059
        window_0702 = self.data_portal.get_history_window(
1060
            [self.IBM],
1061
            pd.Timestamp("2014-07-02 13:35", tz='UTC'),
1062
            86,
1063
            "1d",
1064
            "open_price"
1065
        )
1066
1067
        for i in range(0, 22):
1068
            self.assertEqual(window_0402.iloc[i].loc[self.IBM],
1069
                             window_0702.iloc[i].loc[self.IBM] * 2)
1070
1071
    def test_minute_dividends(self):
1072
        def check(field, ref):
1073
            window = self.data_portal.get_history_window(
1074
                [self.DIVIDEND_SID],
1075
                pd.Timestamp("2014-03-18 13:35", tz='UTC'),
1076
                10,
1077
                "1m",
1078
                field
1079
            )
1080
1081
            self.assertEqual(len(window), len(ref))
1082
1083
            np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref)
1084
1085
        # the DIVIDEND stock has dividends on 2014-03-18 (0.98)
1086
        # 2014-03-17 19:56:00+00:00,118923,123229,112445,117837,2273
1087
        # 2014-03-17 19:57:00+00:00,118927,122997,117911,120454,2274
1088
        # 2014-03-17 19:58:00+00:00,118930,129112,111136,120124,2274
1089
        # 2014-03-17 19:59:00+00:00,118932,126147,112112,119129,2276
1090
        # 2014-03-17 20:00:00+00:00,118932,124541,108717,116628,2275
1091
        # 2014-03-18 13:31:00+00:00,116457,120731,114148,117439,2274
1092
        # 2014-03-18 13:32:00+00:00,116461,116520,106572,111546,2275
1093
        # 2014-03-18 13:33:00+00:00,116461,117115,108506,112810,2274
1094
        # 2014-03-18 13:34:00+00:00,116461,119787,108861,114323,2273
1095
        # 2014-03-18 13:35:00+00:00,116464,117221,112698,114960,2272
1096
1097
        open_ref = [116.545,  # 2014-03-17 19:56:00+00:00
1098
                    116.548,  # 2014-03-17 19:57:00+00:00
1099
                    116.551,  # 2014-03-17 19:58:00+00:00
1100
                    116.553,  # 2014-03-17 19:59:00+00:00
1101
                    116.553,  # 2014-03-17 20:00:00+00:00
1102
                    116.457,  # 2014-03-18 13:31:00+00:00
1103
                    116.461,  # 2014-03-18 13:32:00+00:00
1104
                    116.461,  # 2014-03-18 13:33:00+00:00
1105
                    116.461,  # 2014-03-18 13:34:00+00:00
1106
                    116.464]  # 2014-03-18 13:35:00+00:00
1107
1108
        high_ref = [120.764,  # 2014-03-17 19:56:00+00:00
1109
                    120.537,  # 2014-03-17 19:57:00+00:00
1110
                    126.530,  # 2014-03-17 19:58:00+00:00
1111
                    123.624,  # 2014-03-17 19:59:00+00:00
1112
                    122.050,  # 2014-03-17 20:00:00+00:00
1113
                    120.731,  # 2014-03-18 13:31:00+00:00
1114
                    116.520,  # 2014-03-18 13:32:00+00:00
1115
                    117.115,  # 2014-03-18 13:33:00+00:00
1116
                    119.787,  # 2014-03-18 13:34:00+00:00
1117
                    117.221]  # 2014-03-18 13:35:00+00:00
1118
1119
        low_ref = [110.196,  # 2014-03-17 19:56:00+00:00
1120
                   115.553,  # 2014-03-17 19:57:00+00:00
1121
                   108.913,  # 2014-03-17 19:58:00+00:00
1122
                   109.870,  # 2014-03-17 19:59:00+00:00
1123
                   106.543,  # 2014-03-17 20:00:00+00:00
1124
                   114.148,  # 2014-03-18 13:31:00+00:00
1125
                   106.572,  # 2014-03-18 13:32:00+00:00
1126
                   108.506,  # 2014-03-18 13:33:00+00:00
1127
                   108.861,  # 2014-03-18 13:34:00+00:00
1128
                   112.698]  # 2014-03-18 13:35:00+00:00
1129
1130
        close_ref = [115.480,  # 2014-03-17 19:56:00+00:00
1131
                     118.045,  # 2014-03-17 19:57:00+00:00
1132
                     117.722,  # 2014-03-17 19:58:00+00:00
1133
                     116.746,  # 2014-03-17 19:59:00+00:00
1134
                     114.295,  # 2014-03-17 20:00:00+00:00
1135
                     117.439,  # 2014-03-18 13:31:00+00:00
1136
                     111.546,  # 2014-03-18 13:32:00+00:00
1137
                     112.810,  # 2014-03-18 13:33:00+00:00
1138
                     114.323,  # 2014-03-18 13:34:00+00:00
1139
                     114.960]  # 2014-03-18 13:35:00+00:00
1140
1141
        volume_ref = [2273,  # 2014-03-17 19:56:00+00:00
1142
                      2274,  # 2014-03-17 19:57:00+00:00
1143
                      2274,  # 2014-03-17 19:58:00+00:00
1144
                      2276,  # 2014-03-17 19:59:00+00:00
1145
                      2275,  # 2014-03-17 20:00:00+00:00
1146
                      2274,  # 2014-03-18 13:31:00+00:00
1147
                      2275,  # 2014-03-18 13:32:00+00:00
1148
                      2274,  # 2014-03-18 13:33:00+00:00
1149
                      2273,  # 2014-03-18 13:34:00+00:00
1150
                      2272]  # 2014-03-18 13:35:00+00:00
1151
1152
        check("open_price", open_ref)
1153
        check("high", high_ref)
1154
        check("low", low_ref)
1155
        check("close_price", close_ref)
1156
        check("price", close_ref)
1157
        check("volume", volume_ref)
1158
1159
    def test_daily_dividends(self):
1160
        def check(field, ref):
1161
            window = self.data_portal.get_history_window(
1162
                [self.DIVIDEND_SID],
1163
                pd.Timestamp("2014-03-21 13:35", tz='UTC'),
1164
                6,
1165
                "1d",
1166
                field
1167
            )
1168
1169
            self.assertEqual(len(window), len(ref))
1170
1171
            np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref)
1172
1173
        # 2014-03-14 00:00:00+00:00,106408,106527,103498,105012,950
1174
        # 2014-03-17 00:00:00+00:00,106411,110252,99877,105064,950
1175
        # 2014-03-18 00:00:00+00:00,104194,110891,95342,103116,972
1176
        # 2014-03-19 00:00:00+00:00,104198,107086,102615,104851,973
1177
        # 2014-03-20 00:00:00+00:00,100032,102989,92179,97584,1016
1178
        # 2014-03-21 13:31:00+00:00,114098,120818,110333,115575,2866
1179
        # 2014-03-21 13:32:00+00:00,114099,120157,105353,112755,2866
1180
        # 2014-03-21 13:33:00+00:00,114099,122263,108838,115550,2867
1181
        # 2014-03-21 13:34:00+00:00,114101,116620,106654,111637,2867
1182
        # 2014-03-21 13:35:00+00:00,114104,123773,107769,115771,2867
1183
1184
        open_ref = [100.108,  # 2014-03-14 00:00:00+00:00
1185
                    100.111,  # 2014-03-17 00:00:00+00:00
1186
                    100.026,  # 2014-03-18 00:00:00+00:00
1187
                    100.030,  # 2014-03-19 00:00:00+00:00
1188
                    100.032,  # 2014-03-20 00:00:00+00:00
1189
                    114.098]  # 2014-03-21 00:00:00+00:00
1190
1191
        high_ref = [100.221,  # 2014-03-14 00:00:00+00:00
1192
                    103.725,  # 2014-03-17 00:00:00+00:00
1193
                    106.455,  # 2014-03-18 00:00:00+00:00
1194
                    102.803,  # 2014-03-19 00:00:00+00:00
1195
                    102.988,  # 2014-03-20 00:00:00+00:00
1196
                    123.773]  # 2014-03-21 00:00:00+00:00
1197
1198
        low_ref = [97.370,  # 2014-03-14 00:00:00+00:00
1199
                   93.964,  # 2014-03-17 00:00:00+00:00
1200
                   91.528,  # 2014-03-18 00:00:00+00:00
1201
                   98.510,  # 2014-03-19 00:00:00+00:00
1202
                   92.179,  # 2014-03-20 00:00:00+00:00
1203
                   105.353]  # 2014-03-21 00:00:00+00:00
1204
1205
        close_ref = [98.795,  # 2014-03-14 00:00:00+00:00
1206
                     98.844,  # 2014-03-17 00:00:00+00:00
1207
                     98.991,  # 2014-03-18 00:00:00+00:00
1208
                     100.657,  # 2014-03-19 00:00:00+00:00
1209
                     97.584,  # 2014-03-20 00:00:00+00:00
1210
                     115.771]  # 2014-03-21 00:00:00+00:00
1211
1212
        volume_ref = [950,  # 2014-03-14 00:00:00+00:00
1213
                      950,  # 2014-03-17 00:00:00+00:00
1214
                      972,  # 2014-03-18 00:00:00+00:00
1215
                      973,  # 2014-03-19 00:00:00+00:00
1216
                      1016,  # 2014-03-20 00:00:00+00:00
1217
                      14333]  # 2014-03-21 00:00:00+00:00
1218
1219
        check("open_price", open_ref)
1220
        check("high", high_ref)
1221
        check("low", low_ref)
1222
        check("close_price", close_ref)
1223
        check("price", close_ref)
1224
        check("volume", volume_ref)
1225
1226
    @parameterized.expand([('open', 0),
1227
                           ('high', 10000),
1228
                           ('low', 20000),
1229
                           ('close', 30000),
1230
                           ('price', 30000),
1231
                           ('volume', 40000)])
1232
    def test_futures_history_minutes(self, field, offset):
1233
        # our history data, for self.FUTURE_ASSET, is 10,000 bars starting at
1234
        # self.futures_start_dt.  Those 10k bars are 24/7.
1235
1236
        # = 2015-11-30 18:50 UTC, 13:50 Eastern = during market hours
1237
        futures_end_dt = \
1238
            self.futures_start_dates[self.FUTURE_ASSET] + \
1239
            timedelta(minutes=9999)
1240
1241
        window = self.data_portal.get_history_window(
1242
            [self.FUTURE_ASSET],
1243
            futures_end_dt,
1244
            1000,
1245
            "1m",
1246
            field
1247
        )
1248
1249
        # check the minutes are right
1250
        reference_minutes = self.env.market_minute_window(
1251
            futures_end_dt, 1000, step=-1
1252
        )[::-1]
1253
1254
        np.testing.assert_array_equal(window.index, reference_minutes)
1255
1256
        # check the values
1257
1258
        # 2015-11-24 18:41
1259
        # ...
1260
        # 2015-11-24 21:00
1261
        # 2015-11-25 14:31
1262
        # ...
1263
        # 2015-11-25 21:00
1264
        # 2015-11-27 14:31
1265
        # ...
1266
        # 2015-11-27 18:00  # early close
1267
        # 2015-11-30 14:31
1268
        # ...
1269
        # 2015-11-30 18:50
1270
1271
        reference_values = pd.date_range(
1272
            start=self.futures_start_dates[self.FUTURE_ASSET],
1273
            end=futures_end_dt,
1274
            freq="T"
1275
        )
1276
1277
        for idx, dt in enumerate(window.index):
1278
            date_val = reference_values.searchsorted(dt)
1279
            self.assertEqual(offset + date_val,
1280
                             window.iloc[idx][self.FUTURE_ASSET])
1281
1282
    def test_history_minute_blended(self):
1283
        window = self.data_portal.get_history_window(
1284
            [self.FUTURE_ASSET2, self.AAPL],
1285
            pd.Timestamp("2014-03-21 20:00", tz='UTC'),
1286
            200,
1287
            "1m",
1288
            "price"
1289
        )
1290
1291
        # just a sanity check
1292
        self.assertEqual(200, len(window[self.AAPL]))
1293
        self.assertEqual(200, len(window[self.FUTURE_ASSET2]))
1294
1295
    def test_futures_history_daily(self):
1296
        # get 3 days ending 11/30 10:00 am Eastern
1297
        # = 11/25, 11/27 (half day), 11/30 (partial)
1298
1299
        window = self.data_portal.get_history_window(
1300
            [self.env.asset_finder.retrieve_asset(self.FUTURE_ASSET)],
1301
            pd.Timestamp("2015-11-30 15:00", tz='UTC'),
1302
            3,
1303
            "1d",
1304
            "high"
1305
        )
1306
1307
        self.assertEqual(3, len(window[self.FUTURE_ASSET]))
1308
1309
        np.testing.assert_array_equal([12929.0, 15629.0, 19769.0],
1310
                                      window.values.T[0])
1311