Completed
Pull Request — master (#858)
by Eddie
01:35
created

tests.HistoryTestCase.create_fake_daily_data()   B

Complexity

Conditions 3

Size

Total Lines 24

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 3
dl 0
loc 24
rs 8.9714
1
from os.path import dirname, join, realpath
2
from textwrap import dedent
3
from unittest import TestCase
4
import bcolz
5
import os
6
from datetime import timedelta
7
from nose_parameterized import parameterized
8
from pandas.tslib import normalize_date
9
from testfixtures import TempDirectory
10
import numpy as np
11
from numpy import array
12
import pandas as pd
13
from pandas import (
14
    read_csv,
15
    Timestamp,
16
    DataFrame, DatetimeIndex)
17
18
from six import iteritems
19
from zipline import TradingAlgorithm
20
21
from zipline.data.data_portal import DataPortal
22
from zipline.data.us_equity_pricing import (
23
    DailyBarWriterFromCSVs,
24
    SQLiteAdjustmentWriter,
25
    SQLiteAdjustmentReader,
26
)
27
from zipline.errors import HistoryInInitialize
28
from zipline.utils.test_utils import (
29
    make_simple_asset_info,
30
    str_to_seconds,
31
    MockDailyBarReader
32
)
33
from zipline.data.us_equity_minutes import MinuteBarWriterFromCSVs
34
from zipline.utils.tradingcalendar import trading_days
35
from zipline.finance.trading import (
36
    TradingEnvironment,
37
    SimulationParameters
38
)
39
40
TEST_MINUTE_RESOURCE_PATH = join(
41
    dirname(dirname(realpath(__file__))),  # zipline_repo/tests
42
    'tests',
43
    'resources',
44
    'history_inputs',
45
)
46
47
TEST_DAILY_RESOURCE_PATH = join(
48
    dirname(dirname(realpath(__file__))),  # zipline_repo/tests
49
    'tests',
50
    'resources',
51
    'pipeline_inputs',
52
)
53
54
55
class HistoryTestCase(TestCase):
56
    @classmethod
57
    def setUpClass(cls):
58
        cls.AAPL = 1
59
        cls.MSFT = 2
60
        cls.DELL = 3
61
        cls.TSLA = 4
62
        cls.BRKA = 5
63
        cls.IBM = 6
64
        cls.GS = 7
65
        cls.C = 8
66
        cls.DIVIDEND_SID = 9
67
        cls.FUTURE_ASSET = 10
68
        cls.FUTURE_ASSET2 = 11
69
        cls.assets = [cls.AAPL, cls.MSFT, cls.DELL, cls.TSLA, cls.BRKA,
70
                      cls.IBM, cls.GS, cls.C, cls.DIVIDEND_SID]
71
72
        asset_info = make_simple_asset_info(
73
            cls.assets,
74
            Timestamp('2014-03-03'),
75
            Timestamp('2014-08-30'),
76
            ['AAPL', 'MSFT', 'DELL', 'TSLA', 'BRKA', 'IBM', 'GS', 'C',
77
             'DIVIDEND_SID']
78
        )
79
        cls.env = TradingEnvironment()
80
81
        cls.env.write_data(
82
            equities_df=asset_info,
83
            futures_data={
84
                cls.FUTURE_ASSET: {
85
                    "start_date": pd.Timestamp('2015-11-23', tz='UTC'),
86
                    "end_date": pd.Timestamp('2014-12-01', tz='UTC'),
87
                    'symbol': 'TEST_FUTURE',
88
                    'asset_type': 'future',
89
                },
90
                cls.FUTURE_ASSET2: {
91
                    "start_date": pd.Timestamp('2014-03-19', tz='UTC'),
92
                    "end_date": pd.Timestamp('2014-03-22', tz='UTC'),
93
                    'symbol': 'TEST_FUTURE2',
94
                    'asset_type': 'future',
95
                }
96
            }
97
        )
98
99
        cls.tempdir = TempDirectory()
100
        cls.tempdir.create()
101
102
        try:
103
            cls.create_fake_minute_data(cls.tempdir)
104
105
            cls.futures_start_dates = {
106
                cls.FUTURE_ASSET: pd.Timestamp("2015-11-23 20:11", tz='UTC'),
107
                cls.FUTURE_ASSET2: pd.Timestamp("2014-03-19 13:31", tz='UTC')
108
            }
109
110
            futures_tempdir = os.path.join(cls.tempdir.path,
111
                                           'futures', 'minutes')
112
            os.makedirs(futures_tempdir)
113
            cls.create_fake_futures_minute_data(
114
                futures_tempdir,
115
                cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET),
116
                cls.futures_start_dates[cls.FUTURE_ASSET],
117
                cls.futures_start_dates[cls.FUTURE_ASSET] +
118
                timedelta(minutes=10000)
119
            )
120
121
            # build data for FUTURE_ASSET2 from 2014-03-19 13:31 to
122
            # 2014-03-21 20:00
123
            cls.create_fake_futures_minute_data(
124
                futures_tempdir,
125
                cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET2),
126
                cls.futures_start_dates[cls.FUTURE_ASSET2],
127
                cls.futures_start_dates[cls.FUTURE_ASSET2] +
128
                timedelta(minutes=3270)
129
            )
130
131
            cls.create_fake_daily_data(cls.tempdir)
132
133
            splits = DataFrame([
134
                {'effective_date': str_to_seconds("2002-01-03"),
135
                 'ratio': 0.5,
136
                 'sid': cls.AAPL},
137
                {'effective_date': str_to_seconds("2014-03-20"),
138
                 'ratio': 0.5,
139
                 'sid': cls.AAPL},
140
                {'effective_date': str_to_seconds("2014-03-21"),
141
                 'ratio': 0.5,
142
                 'sid': cls.AAPL},
143
                {'effective_date': str_to_seconds("2014-04-01"),
144
                 'ratio': 0.5,
145
                 'sid': cls.IBM},
146
                {'effective_date': str_to_seconds("2014-07-01"),
147
                 'ratio': 0.5,
148
                 'sid': cls.IBM},
149
                {'effective_date': str_to_seconds("2014-07-07"),
150
                 'ratio': 0.5,
151
                 'sid': cls.IBM}],
152
                columns=['effective_date', 'ratio', 'sid'],
153
            )
154
155
            mergers = DataFrame([
156
                {'effective_date': str_to_seconds("2014-07-16"),
157
                 'ratio': 0.5,
158
                 'sid': cls.C}
159
            ],
160
                columns=['effective_date', 'ratio', 'sid'])
161
162
            dividends = DataFrame([
163
                {'ex_date':
164
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
165
                 'record_date':
166
                 Timestamp("2014-03-19", tz='UTC').to_datetime64(),
167
                 'declared_date':
168
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
169
                 'pay_date':
170
                 Timestamp("2014-03-20", tz='UTC').to_datetime64(),
171
                 'amount': 2.0,
172
                 'sid': cls.DIVIDEND_SID},
173
                {'ex_date':
174
                 Timestamp("2014-03-20", tz='UTC').to_datetime64(),
175
                 'record_date':
176
                 Timestamp("2014-03-21", tz='UTC').to_datetime64(),
177
                 'declared_date':
178
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
179
                 'pay_date':
180
                 Timestamp("2014-03-23", tz='UTC').to_datetime64(),
181
                 'amount': 4.0,
182
                 'sid': cls.DIVIDEND_SID}],
183
                columns=['ex_date',
184
                         'record_date',
185
                         'declared_date',
186
                         'pay_date',
187
                         'amount',
188
                         'sid'])
189
190
            cls.create_fake_adjustments(cls.tempdir,
191
                                        "adjustments.sqlite",
192
                                        splits=splits,
193
                                        mergers=mergers,
194
                                        dividends=dividends)
195
        except:
196
            cls.tempdir.cleanup()
197
            raise
198
199
    @classmethod
200
    def tearDownClass(cls):
201
        cls.tempdir.cleanup()
202
203
    @classmethod
204
    def create_fake_futures_minute_data(cls, tempdir, asset, start_dt, end_dt):
205
        num_minutes = int((end_dt - start_dt).total_seconds() / 60)
206
207
        # need to prepend one 0 per minute between normalize_date(start_dt)
208
        # and start_dt
209
        zeroes_buffer = \
210
            [0] * int((start_dt -
211
                       normalize_date(start_dt)).total_seconds() / 60)
212
213
        future_df = pd.DataFrame({
214
            "open": np.array(zeroes_buffer +
215
                             list(range(0, num_minutes))) * 1000,
216
            "high": np.array(zeroes_buffer +
217
                             list(range(10000, 10000 + num_minutes))) * 1000,
218
            "low": np.array(zeroes_buffer +
219
                            list(range(20000, 20000 + num_minutes))) * 1000,
220
            "close": np.array(zeroes_buffer +
221
                              list(range(30000, 30000 + num_minutes))) * 1000,
222
            "volume": np.array(zeroes_buffer +
223
                               list(range(40000, 40000 + num_minutes)))
224
        })
225
226
        path = join(tempdir, "{0}.bcolz".format(asset.sid))
227
        ctable = bcolz.ctable.fromdataframe(future_df, rootdir=path)
228
229
        ctable.attrs["start_dt"] = start_dt.value / 1e9
230
        ctable.attrs["last_dt"] = end_dt.value / 1e9
231
232
    @classmethod
233
    def create_fake_minute_data(cls, tempdir):
234
        resources = {
235
            cls.AAPL: join(TEST_MINUTE_RESOURCE_PATH, 'AAPL_minute.csv.gz'),
236
            cls.MSFT: join(TEST_MINUTE_RESOURCE_PATH, 'MSFT_minute.csv.gz'),
237
            cls.DELL: join(TEST_MINUTE_RESOURCE_PATH, 'DELL_minute.csv.gz'),
238
            cls.TSLA: join(TEST_MINUTE_RESOURCE_PATH, "TSLA_minute.csv.gz"),
239
            cls.BRKA: join(TEST_MINUTE_RESOURCE_PATH, "BRKA_minute.csv.gz"),
240
            cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"),
241
            cls.GS:
242
            join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"),  # unused
243
            cls.C: join(TEST_MINUTE_RESOURCE_PATH, "C_minute.csv.gz"),
244
            cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH,
245
                                   "DIVIDEND_minute.csv.gz"),
246
        }
247
248
        equities_tempdir = os.path.join(tempdir.path, 'equity', 'minutes')
249
        os.makedirs(equities_tempdir)
250
251
        MinuteBarWriterFromCSVs(resources,
252
                                pd.Timestamp('2002-01-02', tz='UTC')).write(
253
                                    equities_tempdir, cls.assets)
254
255
    @classmethod
256
    def create_fake_daily_data(cls, tempdir):
257
        resources = {
258
            cls.AAPL: join(TEST_DAILY_RESOURCE_PATH, 'AAPL.csv'),
259
            cls.MSFT: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),
260
            cls.DELL: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),  # unused
261
            cls.TSLA: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),  # unused
262
            cls.BRKA: join(TEST_DAILY_RESOURCE_PATH, 'BRK-A.csv'),
263
            cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, 'IBM_daily.csv.gz'),
264
            cls.GS: join(TEST_MINUTE_RESOURCE_PATH, 'GS_daily.csv.gz'),
265
            cls.C: join(TEST_MINUTE_RESOURCE_PATH, 'C_daily.csv.gz'),
266
            cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH,
267
                                   'DIVIDEND_daily.csv.gz')
268
        }
269
        raw_data = {
270
            asset: read_csv(path, parse_dates=['day']).set_index('day')
271
            for asset, path in iteritems(resources)
272
        }
273
        for frame in raw_data.values():
274
            frame['price'] = frame['close']
275
276
        writer = DailyBarWriterFromCSVs(resources)
277
        data_path = tempdir.getpath('test_daily_data.bcolz')
278
        writer.write(data_path, trading_days, cls.assets)
279
280
    @classmethod
281
    def create_fake_adjustments(cls, tempdir, filename,
282
                                splits=None, mergers=None, dividends=None):
283
        writer = SQLiteAdjustmentWriter(tempdir.getpath(filename),
284
                                        cls.env.trading_days,
285
                                        MockDailyBarReader())
286
287
        if dividends is None:
288
            dividends = DataFrame(
289
                {
290
                    # Hackery to make the dtypes correct on an empty frame.
291
                    'ex_date': array([], dtype='datetime64[ns]'),
292
                    'pay_date': array([], dtype='datetime64[ns]'),
293
                    'record_date': array([], dtype='datetime64[ns]'),
294
                    'declared_date': array([], dtype='datetime64[ns]'),
295
                    'amount': array([], dtype=float),
296
                    'sid': array([], dtype=int),
297
                },
298
                index=DatetimeIndex([], tz='UTC'),
299
                columns=['ex_date',
300
                         'pay_date',
301
                         'record_date',
302
                         'declared_date',
303
                         'amount',
304
                         'sid']
305
                )
306
307
        if splits is None:
308
            splits = DataFrame(
309
                {
310
                    # Hackery to make the dtypes correct on an empty frame.
311
                    'effective_date': array([], dtype=int),
312
                    'ratio': array([], dtype=float),
313
                    'sid': array([], dtype=int),
314
                },
315
                index=DatetimeIndex([], tz='UTC'))
316
317
        if mergers is None:
318
            mergers = DataFrame(
319
                {
320
                    # Hackery to make the dtypes correct on an empty frame.
321
                    'effective_date': array([], dtype=int),
322
                    'ratio': array([], dtype=float),
323
                    'sid': array([], dtype=int),
324
                },
325
                index=DatetimeIndex([], tz='UTC'))
326
327
        writer.write(splits, mergers, dividends)
328
329
    def get_portal(self,
330
                   daily_equities_filename="test_daily_data.bcolz",
331
                   adjustments_filename="adjustments.sqlite",
332
                   env=None):
333
334
        if env is None:
335
            env = self.env
336
337
        temp_path = self.tempdir.path
338
339
        minutes_path = os.path.join(temp_path, 'equity', 'minutes')
340
        futures_path = os.path.join(temp_path, 'futures', 'minutes')
341
342
        adjustment_reader = SQLiteAdjustmentReader(
343
            join(temp_path, adjustments_filename))
344
345
        return DataPortal(
346
            env,
347
            minutes_equities_path=minutes_path,
348
            minutes_futures_path=futures_path,
349
            daily_equities_path=join(temp_path, daily_equities_filename),
350
            adjustment_reader=adjustment_reader
351
        )
352
353
    def test_history_in_initialize(self):
354
        algo_text = dedent(
355
            """\
356
            from zipline.api import history
357
358
            def initialize(context):
359
                history([24], 10, '1d', 'price')
360
361
            def handle_data(context, data):
362
                pass
363
            """
364
        )
365
366
        start = pd.Timestamp('2007-04-05', tz='UTC')
367
        end = pd.Timestamp('2007-04-10', tz='UTC')
368
369
        sim_params = SimulationParameters(
370
            period_start=start,
371
            period_end=end,
372
            capital_base=float("1.0e5"),
373
            data_frequency='minute',
374
            emission_rate='daily',
375
            env=self.env,
376
        )
377
378
        test_algo = TradingAlgorithm(
379
            script=algo_text,
380
            data_frequency='minute',
381
            sim_params=sim_params,
382
            env=self.env,
383
        )
384
385
        with self.assertRaises(HistoryInInitialize):
386
            test_algo.initialize()
387
388
    def test_minute_basic_functionality(self):
389
        # get a 5-bar minute history from the very end of the available data
390
        window = self.get_portal().get_history_window(
391
            [1],
392
            pd.Timestamp("2014-03-21 18:23:00+00:00", tz='UTC'),
393
            5,
394
            "1m",
395
            "open_price"
396
        )
397
398
        self.assertEqual(len(window), 5)
399
        reference = [534.469, 534.471, 534.475, 534.477, 534.477]
400
        for i in range(0, 4):
401
            self.assertEqual(window.iloc[-5 + i].loc[1], reference[i])
402
403
    def test_minute_splits(self):
404
        portal = self.get_portal()
405
406
        window = portal.get_history_window(
407
            [1],
408
            pd.Timestamp("2014-03-21 18:30:00+00:00", tz='UTC'),
409
            1000,
410
            "1m",
411
            "open_price"
412
        )
413
414
        self.assertEqual(len(window), 1000)
415
416
        # there are two splits for AAPL (on 2014-03-20 and 2014-03-21),
417
        # each with ratio 0.5).
418
419
        day1_end = pd.Timestamp("2014-03-19 20:00", tz='UTC')
420
        day2_start = pd.Timestamp("2014-03-20 13:31", tz='UTC')
421
        day2_end = pd.Timestamp("2014-03-20 20:00", tz='UTC')
422
        day3_start = pd.Timestamp("2014-03-21 13:31", tz='UTC')
423
424
        self.assertEquals(window.loc[day1_end, 1], 533.086)
425
        self.assertEquals(window.loc[day2_start, 1], 533.087)
426
        self.assertEquals(window.loc[day2_end, 1], 533.853)
427
        self.assertEquals(window.loc[day3_start, 1], 533.854)
428
429
    def test_minute_window_starts_before_trading_start(self):
430
        portal = self.get_portal()
431
432
        # get a 50-bar minute history for MSFT starting 5 minutes into 3/20,
433
        # its first trading day
434
        window = portal.get_history_window(
435
            [2],
436
            pd.Timestamp("2014-03-20 13:35:00", tz='UTC'),
437
            50,
438
            "1m",
439
            "high",
440
        )
441
442
        self.assertEqual(len(window), 50)
443
        reference = [107.081, 109.476, 102.316, 107.861, 106.040]
444
        for i in range(0, 4):
445
            self.assertEqual(window.iloc[-5 + i].loc[2], reference[i])
446
447
        # get history for two securities at the same time, where one starts
448
        # trading a day later than the other
449
        window2 = portal.get_history_window(
450
            [1, 2],
451
            pd.Timestamp("2014-03-20 13:35:00", tz='UTC'),
452
            50,
453
            "1m",
454
            "low",
455
        )
456
457
        self.assertEqual(len(window2), 50)
458
        reference2 = {
459
            1: [1059.318, 1055.914, 1061.136, 1063.698, 1055.964],
460
            2: [98.902, 99.841, 90.984, 99.891, 98.027]
461
        }
462
463
        for i in range(0, 45):
464
            self.assertFalse(np.isnan(window2.iloc[i].loc[1]))
465
466
            # there should be 45 NaNs for MSFT until it starts trading
467
            self.assertTrue(np.isnan(window2.iloc[i].loc[2]))
468
469
        for i in range(0, 4):
470
            self.assertEquals(window2.iloc[-5 + i].loc[1],
471
                              reference2[1][i])
472
            self.assertEquals(window2.iloc[-5 + i].loc[2],
473
                              reference2[2][i])
474
475
    def test_minute_window_ends_before_trading_start(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
476
        # entire window is before the trading start
477
        window = self.get_portal().get_history_window(
478
            [2],
479
            pd.Timestamp("2014-02-05 14:35:00", tz='UTC'),
480
            100,
481
            "1m",
482
            "high"
483
        )
484
485
        self.assertEqual(len(window), 100)
486
        for i in range(0, 100):
487
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
488
489
    def test_minute_window_ends_after_trading_end(self):
490
        portal = self.get_portal()
491
492
        window = portal.get_history_window(
493
            [2],
494
            pd.Timestamp("2014-03-24 13:35:00", tz='UTC'),
495
            50,
496
            "1m",
497
            "high",
498
        )
499
500
        # should be 45 non-NaNs then 5 NaNs as MSFT has stopped trading at
501
        # the end of the day 2014-03-21 (and the 22nd and 23rd is weekend)
502
        self.assertEqual(len(window), 50)
503
504
        for i in range(0, 45):
505
            self.assertFalse(np.isnan(window.iloc[i].loc[2]))
506
507
        for i in range(46, 50):
508
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
509
510
    def test_minute_window_starts_after_trading_end(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
511
        # entire window is after the trading end
512
        window = self.get_portal().get_history_window(
513
            [2],
514
            pd.Timestamp("2014-04-02 14:35:00", tz='UTC'),
515
            100,
516
            "1m",
517
            "high"
518
        )
519
520
        self.assertEqual(len(window), 100)
521
        for i in range(0, 100):
522
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
523
524
    def test_minute_window_starts_before_1_2_2002(self):
525
        window = self.get_portal().get_history_window(
526
            [3],
527
            pd.Timestamp("2002-01-02 14:35:00", tz='UTC'),
528
            50,
529
            "1m",
530
            "close_price"
531
        )
532
533
        self.assertEqual(len(window), 50)
534
        for i in range(0, 45):
535
            self.assertTrue(np.isnan(window.iloc[i].loc[3]))
536
537
        for i in range(46, 50):
538
            self.assertFalse(np.isnan(window.iloc[i].loc[3]))
539
540
    def test_minute_early_close(self):
541
        # market was closed early on 7/3, and that's reflected in our
542
        # fake IBM minute data.  also, IBM had a split that takes effect
543
        # right after the early close.
544
545
        # five minutes into the day after an early close, get 20 1m bars
546
        window = self.get_portal().get_history_window(
547
            [self.IBM],
548
            pd.Timestamp("2014-07-07 13:35:00", tz='UTC'),
549
            20,
550
            "1m",
551
            "high"
552
        )
553
554
        self.assertEqual(len(window), 20)
555
556
        reference = [27134.486, 27134.802, 27134.660, 27132.813, 27130.964,
557
                     27133.767, 27133.268, 27131.510, 27134.946, 27132.400,
558
                     27134.350, 27130.588, 27132.528, 27130.418, 27131.040,
559
                     27132.664, 27131.307, 27133.978, 27132.779, 27134.476]
560
561
        for i in range(0, 20):
562
            self.assertAlmostEquals(window.iloc[i].loc[self.IBM], reference[i])
563
564
    def test_minute_merger(self):
565
        def check(field, ref):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
566
            window = self.get_portal().get_history_window(
567
                [self.C],
568
                pd.Timestamp("2014-07-16 13:35", tz='UTC'),
569
                10,
570
                "1m",
571
                field
572
            )
573
574
            self.assertEqual(len(window), len(ref))
575
576
            for i in range(0, len(ref) - 1):
577
                self.assertEquals(window.iloc[i].loc[self.C], ref[i])
578
579
        open_ref = [71.99, 71.991, 71.992, 71.996, 71.996,
580
                    72.000, 72.001, 72.002, 72.004, 72.005]
581
        high_ref = [77.334, 80.196, 80.387, 72.331, 79.184,
582
                    75.439, 81.176, 78.564, 80.498, 82.000]
583
        low_ref = [62.621, 70.427, 65.572, 68.357, 63.623,
584
                   69.805, 67.245, 64.238, 64.487, 71.864]
585
        close_ref = [69.977, 75.311, 72.979, 70.344, 71.403,
586
                     72.622, 74.210, 71.401, 72.492, 73.669]
587
        vol_ref = [12663, 12662, 12661, 12661, 12660, 12661,
588
                   12663, 12662, 12663, 12662]
589
590
        check("open_price", open_ref)
591
        check("high", high_ref)
592
        check("low", low_ref)
593
        check("close_price", close_ref)
594
        check("price", close_ref)
595
        check("volume", vol_ref)
596
597
    def test_minute_forward_fill(self):
598
        # only forward fill if ffill=True AND we are asking for "price"
599
600
        # our fake TSLA data (sid 4) is missing a bunch of minute bars
601
        # right after the open on 2002-01-02
602
603
        for field in ["open_price", "high", "low", "volume", "close_price"]:
604
            no_ffill = self.get_portal().get_history_window(
605
                [4],
606
                pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
607
                390,
608
                "1m",
609
                field
610
            )
611
612
            missing_bar_indices = [1, 3, 5, 7, 9, 11, 13]
613
            if field == 'volume':
614
                for bar_idx in missing_bar_indices:
615
                    self.assertEqual(no_ffill.iloc[bar_idx].loc[4], 0)
616
            else:
617
                for bar_idx in missing_bar_indices:
618
                    self.assertTrue(np.isnan(no_ffill.iloc[bar_idx].loc[4]))
619
620
        ffill_window = self.get_portal().get_history_window(
621
            [4],
622
            pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
623
            390,
624
            "1m",
625
            "price"
626
        )
627
628
        for i in range(0, 390):
629
            self.assertFalse(np.isnan(ffill_window.iloc[i].loc[4]))
630
631
        # 2002-01-02 14:31:00+00:00  126.183
632
        # 2002-01-02 14:32:00+00:00  126.183
633
        # 2002-01-02 14:33:00+00:00  125.648
634
        # 2002-01-02 14:34:00+00:00  125.648
635
        # 2002-01-02 14:35:00+00:00  126.016
636
        # 2002-01-02 14:36:00+00:00  126.016
637
        # 2002-01-02 14:37:00+00:00  127.918
638
        # 2002-01-02 14:38:00+00:00  127.918
639
        # 2002-01-02 14:39:00+00:00  126.423
640
        # 2002-01-02 14:40:00+00:00  126.423
641
        # 2002-01-02 14:41:00+00:00  129.825
642
        # 2002-01-02 14:42:00+00:00  129.825
643
        # 2002-01-02 14:43:00+00:00  125.392
644
        # 2002-01-02 14:44:00+00:00  125.392
645
646
        vals = [126.183, 125.648, 126.016, 127.918, 126.423, 129.825, 125.392]
647
        for idx, val in enumerate(vals):
648
            self.assertEqual(ffill_window.iloc[2 * idx].loc[4], val)
649
            self.assertEqual(ffill_window.iloc[(2 * idx) + 1].loc[4], val)
650
651
        # make sure that if we pass ffill=False with field="price", we do
652
        # not ffill
653
        really_no_ffill_window = self.get_portal().get_history_window(
654
            [4],
655
            pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
656
            390,
657
            "1m",
658
            "price",
659
            ffill=False
660
        )
661
662
        for idx, val in enumerate(vals):
663
            idx1 = 2 * idx
664
            idx2 = idx1 + 1
665
            self.assertEqual(really_no_ffill_window.iloc[idx1].loc[4], val)
666
            self.assertTrue(np.isnan(really_no_ffill_window.iloc[idx2].loc[4]))
667
668
    def test_daily_functionality(self):
669
        # 9 daily bars
670
        # 2014-03-10,183999.0,186400.0,183601.0,186400.0,400
671
        # 2014-03-11,186925.0,187490.0,185910.0,187101.0,600
672
        # 2014-03-12,186498.0,187832.0,186005.0,187750.0,300
673
        # 2014-03-13,188150.0,188852.0,185254.0,185750.0,700
674
        # 2014-03-14,185825.0,186507.0,183418.0,183860.0,600
675
        # 2014-03-17,184350.0,185790.0,184350.0,185050.0,400
676
        # 2014-03-18,185400.0,185400.0,183860.0,184860.0,200
677
        # 2014-03-19,184860.0,185489.0,182764.0,183860.0,200
678
        # 2014-03-20,183999.0,186742.0,183630.0,186540.0,300
679
680
        # 5 one-minute bars that will be aggregated
681
        # 2014-03-21 13:31:00+00:00,185422401,185426332,185413974,185420153,304
682
        # 2014-03-21 13:32:00+00:00,185422402,185424165,185417717,185420941,300
683
        # 2014-03-21 13:33:00+00:00,185422403,185430663,185419420,185425041,303
684
        # 2014-03-21 13:34:00+00:00,185422403,185431290,185417079,185424184,302
685
        # 2014-03-21 13:35:00+00:00,185422405,185430210,185416293,185423251,302
686
687
        def run_query(field, values):
688
            window = self.get_portal().get_history_window(
689
                [self.BRKA],
690
                pd.Timestamp("2014-03-21 13:35", tz='UTC'),
691
                10,
692
                "1d",
693
                field
694
            )
695
696
            self.assertEqual(len(window), 10)
697
698
            for i in range(0, 10):
699
                self.assertEquals(window.iloc[i].loc[self.BRKA],
700
                                  values[i])
701
702
        # last value is the first minute's open
703
        opens = [183999, 186925, 186498, 188150, 185825, 184350,
704
                 185400, 184860, 183999, 185422.401]
705
706
        # last value is the last minute's close
707
        closes = [186400, 187101, 187750, 185750, 183860, 185050,
708
                  184860, 183860, 186540, 185423.251]
709
710
        # last value is the highest high value
711
        highs = [186400, 187490, 187832, 188852, 186507, 185790,
712
                 185400, 185489, 186742, 185431.290]
713
714
        # last value is the lowest low value
715
        lows = [183601, 185910, 186005, 185254, 183418, 184350, 183860,
716
                182764, 183630, 185413.974]
717
718
        # last value is the sum of all the minute volumes
719
        volumes = [400, 600, 300, 700, 600, 400, 200, 200, 300, 1511]
720
721
        run_query("open_price", opens)
722
        run_query("close_price", closes)
723
        run_query("price", closes)
724
        run_query("high", highs)
725
        run_query("low", lows)
726
        run_query("volume", volumes)
727
728
    def test_daily_splits_with_no_minute_data(self):
729
        # scenario is that we have daily data for AAPL through 6/11,
730
        # but we have no minute data for AAPL on 6/11. there's also a split
731
        # for AAPL on 6/9.
732
        splits = DataFrame(
733
            [
734
                {
735
                    'effective_date': str_to_seconds('2014-06-09'),
736
                    'ratio': (1 / 7.0),
737
                    'sid': self.AAPL,
738
                }
739
            ],
740
            columns=['effective_date', 'ratio', 'sid'])
741
742
        self.create_fake_adjustments(self.tempdir,
743
                                     "adjustments2.sqlite",
744
                                     splits=splits)
745
746
        portal = self.get_portal(adjustments_filename="adjustments2.sqlite")
747
748
        def test_window(field, reference, ffill=True):
749
            window = portal.get_history_window(
750
                [self.AAPL],
751
                pd.Timestamp("2014-06-11 15:30", tz='UTC'),
752
                6,
753
                "1d",
754
                field,
755
                ffill
756
            )
757
758
            self.assertEqual(len(window), 6)
759
760
            for i in range(0, 5):
761
                self.assertEquals(window.iloc[i].loc[self.AAPL],
762
                                  reference[i])
763
764
            if ffill and field == "price":
765
                last_val = window.iloc[5].loc[self.AAPL]
766
                second_to_last_val = window.iloc[4].loc[self.AAPL]
767
768
                self.assertEqual(last_val, second_to_last_val)
769
            else:
770
                if field == "volume":
771
                    self.assertEqual(window.iloc[5].loc[self.AAPL], 0)
772
                else:
773
                    self.assertTrue(np.isnan(window.iloc[5].loc[self.AAPL]))
774
775
        # 2014-06-04,637.4400099999999,647.8899690000001,636.110046,644.819992,p
776
        # 2014-06-05,646.20005,649.370003,642.610008,647.349983,75951400
777
        # 2014-06-06,649.900002,651.259979,644.469971,645.570023,87484600
778
        # 2014-06-09,92.699997,93.879997,91.75,93.699997,75415000
779
        # 2014-06-10,94.730003,95.050003,93.57,94.25,62777000
780
        open_data = [91.063, 92.314, 92.843, 92.699, 94.730]
781
        test_window("open_price", open_data, ffill=False)
782
        test_window("open_price", open_data)
783
784
        high_data = [92.556, 92.767, 93.037, 93.879, 95.050]
785
        test_window("high", high_data, ffill=False)
786
        test_window("high", high_data)
787
788
        low_data = [90.873, 91.801, 92.067, 91.750, 93.570]
789
        test_window("low", low_data, ffill=False)
790
        test_window("low", low_data)
791
792
        close_data = [92.117, 92.478, 92.224, 93.699, 94.250]
793
        test_window("close_price", close_data, ffill=False)
794
        test_window("close_price", close_data)
795
        test_window("price", close_data, ffill=False)
796
        test_window("price", close_data)
797
798
        vol_data = [587093500, 531659800, 612392200, 75415000, 62777000]
799
        test_window("volume", vol_data)
800
        test_window("volume", vol_data, ffill=False)
801
802
    def test_daily_window_starts_before_trading_start(self):
803
        portal = self.get_portal()
804
805
        # MSFT started on 3/3/2014, so try to go before that
806
        window = portal.get_history_window(
807
            [self.MSFT],
808
            pd.Timestamp("2014-03-05 13:35:00", tz='UTC'),
809
            5,
810
            "1d",
811
            "high"
812
        )
813
814
        self.assertEqual(len(window), 5)
815
816
        # should be two empty days, then 3/3 and 3/4, then
817
        # an empty day because we don't have minute data for 3/5
818
        self.assertTrue(np.isnan(window.iloc[0].loc[self.MSFT]))
819
        self.assertTrue(np.isnan(window.iloc[1].loc[self.MSFT]))
820
        self.assertEquals(window.iloc[2].loc[self.MSFT], 38.130)
821
        self.assertEquals(window.iloc[3].loc[self.MSFT], 38.48)
822
        self.assertTrue(np.isnan(window.iloc[4].loc[self.MSFT]))
823
824
    def test_daily_window_ends_before_trading_start(self):
825
        portal = self.get_portal()
826
827
        # MSFT started on 3/3/2014, so try to go before that
828
        window = portal.get_history_window(
829
            [self.MSFT],
830
            pd.Timestamp("2014-02-28 13:35:00", tz='UTC'),
831
            5,
832
            "1d",
833
            "high"
834
        )
835
836
        self.assertEqual(len(window), 5)
837
        for i in range(0, 5):
838
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
839
840
    def test_daily_window_starts_after_trading_end(self):
841
        # MSFT stopped trading EOD Friday 8/29/2014
842
        window = self.get_portal().get_history_window(
843
            [self.MSFT],
844
            pd.Timestamp("2014-09-12 13:35:00", tz='UTC'),
845
            8,
846
            "1d",
847
            "high",
848
        )
849
850
        self.assertEqual(len(window), 8)
851
        for i in range(0, 8):
852
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
853
854
    def test_daily_window_ends_after_trading_end(self):
855
        # MSFT stopped trading EOD Friday 8/29/2014
856
        window = self.get_portal().get_history_window(
857
            [self.MSFT],
858
            pd.Timestamp("2014-09-04 13:35:00", tz='UTC'),
859
            10,
860
            "1d",
861
            "high",
862
        )
863
864
        # should be 7 non-NaNs (8/21-8/22, 8/25-8/29) and 3 NaNs (9/2 - 9/4)
865
        # (9/1/2014 is labor day)
866
        self.assertEqual(len(window), 10)
867
868
        for i in range(0, 7):
869
            self.assertFalse(np.isnan(window.iloc[i].loc[self.MSFT]))
870
871
        for i in range(7, 10):
872
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
873
874
    def test_empty_sid_list(self):
875
        portal = self.get_portal()
876
877
        fields = ["open_price",
878
                  "close_price",
879
                  "high",
880
                  "low",
881
                  "volume",
882
                  "price"]
883
        freqs = ["1m", "1d"]
884
885
        for field in fields:
886
            for freq in freqs:
887
                window = portal.get_history_window(
888
                    [],
889
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
890
                    6,
891
                    freq,
892
                    field
893
                )
894
895
                self.assertEqual(len(window), 6)
896
897
                for i in range(0, 6):
898
                    self.assertEqual(len(window.iloc[i]), 0)
899
900
    def test_daily_window_starts_before_minute_data(self):
901
902
        env = TradingEnvironment()
903
        asset_info = make_simple_asset_info(
904
            [self.GS],
905
            Timestamp('1999-04-05'),
906
            Timestamp('2004-08-30'),
907
            ['GS']
908
        )
909
        env.write_data(equities_df=asset_info)
910
        portal = self.get_portal(env=env)
911
912
        window = portal.get_history_window(
913
            [self.GS],
914
            # 3rd day of daily data for GS, minute data starts in 2002.
915
            pd.Timestamp("1999-04-07 14:35:00", tz='UTC'),
916
            10,
917
            "1d",
918
            "low"
919
        )
920
921
        # 12/20, 12/21, 12/24, 12/26, 12/27, 12/28, 12/31 should be NaNs
922
        # 1/2 and 1/3 should be non-NaN
923
        # 1/4 should be NaN (since we don't have minute data for it)
924
925
        self.assertEqual(len(window), 10)
926
927
        for i in range(0, 7):
928
            self.assertTrue(np.isnan(window.iloc[i].loc[self.GS]))
929
930
        for i in range(8, 9):
931
            self.assertFalse(np.isnan(window.iloc[i].loc[self.GS]))
932
933
        self.assertTrue(np.isnan(window.iloc[9].loc[self.GS]))
934
935
    def test_minute_window_ends_before_1_2_2002(self):
936
        with self.assertRaises(ValueError):
937
            self.get_portal().get_history_window(
938
                [self.GS],
939
                pd.Timestamp("2001-12-31 14:35:00", tz='UTC'),
940
                50,
941
                "1m",
942
                "close_price"
943
            )
944
945
    def test_bad_history_inputs(self):
946
        portal = self.get_portal()
947
948
        # bad fieldname
949
        for field in ["foo", "bar", "", "5"]:
950
            with self.assertRaises(ValueError):
951
                portal.get_history_window(
952
                    [self.AAPL],
953
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
954
                    6,
955
                    "1d",
956
                    field
957
                )
958
959
        # bad frequency
960
        for freq in ["2m", "30m", "3d", "300d", "", "5"]:
961
            with self.assertRaises(ValueError):
962
                portal.get_history_window(
963
                    [self.AAPL],
964
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
965
                    6,
966
                    freq,
967
                    "volume"
968
                )
969
970
    def test_daily_merger(self):
971
        def check(field, ref):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
972
            window = self.get_portal().get_history_window(
973
                [self.C],
974
                pd.Timestamp("2014-07-17 13:35", tz='UTC'),
975
                4,
976
                "1d",
977
                field
978
            )
979
980
            self.assertEqual(len(window), len(ref),)
981
982
            for i in range(0, len(ref) - 1):
983
                self.assertEquals(window.iloc[i].loc[self.C], ref[i], i)
984
985
        # 2014-07-14 00:00:00+00:00,139.18,139.14,139.2,139.17,12351
986
        # 2014-07-15 00:00:00+00:00,139.2,139.2,139.18,139.19,12354
987
        # 2014-07-16 00:00:00+00:00,69.58,69.56,69.57,69.565,12352
988
        # 2014-07-17 13:31:00+00:00,72767,80146,63406,71776,12876
989
        # 2014-07-17 13:32:00+00:00,72769,76943,68907,72925,12875
990
        # 2014-07-17 13:33:00+00:00,72771,76127,63194,69660,12875
991
        # 2014-07-17 13:34:00+00:00,72774,79349,69771,74560,12877
992
        # 2014-07-17 13:35:00+00:00,72776,75340,68970,72155,12879
993
994
        open_ref = [69.59, 69.6, 69.58, 72.767]
995
        high_ref = [69.57, 69.6, 69.56, 80.146]
996
        low_ref = [69.6, 69.59, 69.57, 63.194]
997
        close_ref = [69.585, 69.595, 69.565, 72.155]
998
        vol_ref = [12351, 12354, 12352, 64382]
999
1000
        check("open_price", open_ref)
1001
        check("high", high_ref)
1002
        check("low", low_ref)
1003
        check("close_price", close_ref)
1004
        check("price", close_ref)
1005
        check("volume", vol_ref)
1006
1007
    def test_minute_adjustments_as_of_lookback_date(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1008
        # AAPL has splits on 2014-03-20 and 2014-03-21
1009
        window_0320 = self.get_portal().get_history_window(
1010
            [self.AAPL],
1011
            pd.Timestamp("2014-03-20 13:35", tz='UTC'),
1012
            395,
1013
            "1m",
1014
            "open_price"
1015
        )
1016
1017
        window_0321 = self.get_portal().get_history_window(
1018
            [self.AAPL],
1019
            pd.Timestamp("2014-03-21 13:35", tz='UTC'),
1020
            785,
1021
            "1m",
1022
            "open_price"
1023
        )
1024
1025
        for i in range(0, 395):
1026
            # history on 3/20, since the 3/21 0.5 split hasn't
1027
            # happened yet, should return values 2x larger than history on
1028
            # 3/21
1029
            self.assertEqual(window_0320.iloc[i].loc[self.AAPL],
1030
                             window_0321.iloc[i].loc[self.AAPL] * 2)
1031
1032
    def test_daily_adjustments_as_of_lookback_date(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1033
        window_0402 = self.get_portal().get_history_window(
1034
            [self.IBM],
1035
            pd.Timestamp("2014-04-02 13:35", tz='UTC'),
1036
            23,
1037
            "1d",
1038
            "open_price"
1039
        )
1040
1041
        window_0702 = self.get_portal().get_history_window(
1042
            [self.IBM],
1043
            pd.Timestamp("2014-07-02 13:35", tz='UTC'),
1044
            86,
1045
            "1d",
1046
            "open_price"
1047
        )
1048
1049
        for i in range(0, 22):
1050
            self.assertEqual(window_0402.iloc[i].loc[self.IBM],
1051
                             window_0702.iloc[i].loc[self.IBM] * 2)
1052
1053
    def test_minute_dividends(self):
1054
        def check(field, ref):
1055
            window = self.get_portal().get_history_window(
1056
                [self.DIVIDEND_SID],
1057
                pd.Timestamp("2014-03-18 13:35", tz='UTC'),
1058
                10,
1059
                "1m",
1060
                field
1061
            )
1062
1063
            self.assertEqual(len(window), len(ref))
1064
1065
            np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref)
1066
1067
        # the DIVIDEND stock has dividends on 2014-03-18 (0.98)
1068
        # 2014-03-17 19:56:00+00:00,118923,123229,112445,117837,2273
1069
        # 2014-03-17 19:57:00+00:00,118927,122997,117911,120454,2274
1070
        # 2014-03-17 19:58:00+00:00,118930,129112,111136,120124,2274
1071
        # 2014-03-17 19:59:00+00:00,118932,126147,112112,119129,2276
1072
        # 2014-03-17 20:00:00+00:00,118932,124541,108717,116628,2275
1073
        # 2014-03-18 13:31:00+00:00,116457,120731,114148,117439,2274
1074
        # 2014-03-18 13:32:00+00:00,116461,116520,106572,111546,2275
1075
        # 2014-03-18 13:33:00+00:00,116461,117115,108506,112810,2274
1076
        # 2014-03-18 13:34:00+00:00,116461,119787,108861,114323,2273
1077
        # 2014-03-18 13:35:00+00:00,116464,117221,112698,114960,2272
1078
1079
        open_ref = [116.545,  # 2014-03-17 19:56:00+00:00
1080
                    116.548,  # 2014-03-17 19:57:00+00:00
1081
                    116.551,  # 2014-03-17 19:58:00+00:00
1082
                    116.553,  # 2014-03-17 19:59:00+00:00
1083
                    116.553,  # 2014-03-17 20:00:00+00:00
1084
                    116.457,  # 2014-03-18 13:31:00+00:00
1085
                    116.461,  # 2014-03-18 13:32:00+00:00
1086
                    116.461,  # 2014-03-18 13:33:00+00:00
1087
                    116.461,  # 2014-03-18 13:34:00+00:00
1088
                    116.464]  # 2014-03-18 13:35:00+00:00
1089
1090
        high_ref = [120.764,  # 2014-03-17 19:56:00+00:00
1091
                    120.537,  # 2014-03-17 19:57:00+00:00
1092
                    126.530,  # 2014-03-17 19:58:00+00:00
1093
                    123.624,  # 2014-03-17 19:59:00+00:00
1094
                    122.050,  # 2014-03-17 20:00:00+00:00
1095
                    120.731,  # 2014-03-18 13:31:00+00:00
1096
                    116.520,  # 2014-03-18 13:32:00+00:00
1097
                    117.115,  # 2014-03-18 13:33:00+00:00
1098
                    119.787,  # 2014-03-18 13:34:00+00:00
1099
                    117.221]  # 2014-03-18 13:35:00+00:00
1100
1101
        low_ref = [110.196,  # 2014-03-17 19:56:00+00:00
1102
                   115.553,  # 2014-03-17 19:57:00+00:00
1103
                   108.913,  # 2014-03-17 19:58:00+00:00
1104
                   109.870,  # 2014-03-17 19:59:00+00:00
1105
                   106.543,  # 2014-03-17 20:00:00+00:00
1106
                   114.148,  # 2014-03-18 13:31:00+00:00
1107
                   106.572,  # 2014-03-18 13:32:00+00:00
1108
                   108.506,  # 2014-03-18 13:33:00+00:00
1109
                   108.861,  # 2014-03-18 13:34:00+00:00
1110
                   112.698]  # 2014-03-18 13:35:00+00:00
1111
1112
        close_ref = [115.480,  # 2014-03-17 19:56:00+00:00
1113
                     118.045,  # 2014-03-17 19:57:00+00:00
1114
                     117.722,  # 2014-03-17 19:58:00+00:00
1115
                     116.746,  # 2014-03-17 19:59:00+00:00
1116
                     114.295,  # 2014-03-17 20:00:00+00:00
1117
                     117.439,  # 2014-03-18 13:31:00+00:00
1118
                     111.546,  # 2014-03-18 13:32:00+00:00
1119
                     112.810,  # 2014-03-18 13:33:00+00:00
1120
                     114.323,  # 2014-03-18 13:34:00+00:00
1121
                     114.960]  # 2014-03-18 13:35:00+00:00
1122
1123
        volume_ref = [2273,  # 2014-03-17 19:56:00+00:00
1124
                      2274,  # 2014-03-17 19:57:00+00:00
1125
                      2274,  # 2014-03-17 19:58:00+00:00
1126
                      2276,  # 2014-03-17 19:59:00+00:00
1127
                      2275,  # 2014-03-17 20:00:00+00:00
1128
                      2274,  # 2014-03-18 13:31:00+00:00
1129
                      2275,  # 2014-03-18 13:32:00+00:00
1130
                      2274,  # 2014-03-18 13:33:00+00:00
1131
                      2273,  # 2014-03-18 13:34:00+00:00
1132
                      2272]  # 2014-03-18 13:35:00+00:00
1133
1134
        check("open_price", open_ref)
1135
        check("high", high_ref)
1136
        check("low", low_ref)
1137
        check("close_price", close_ref)
1138
        check("price", close_ref)
1139
        check("volume", volume_ref)
1140
1141
    def test_daily_dividends(self):
1142
        def check(field, ref):
1143
            window = self.get_portal().get_history_window(
1144
                [self.DIVIDEND_SID],
1145
                pd.Timestamp("2014-03-21 13:35", tz='UTC'),
1146
                6,
1147
                "1d",
1148
                field
1149
            )
1150
1151
            self.assertEqual(len(window), len(ref))
1152
1153
            np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref)
1154
1155
        # 2014-03-14 00:00:00+00:00,106408,106527,103498,105012,950
1156
        # 2014-03-17 00:00:00+00:00,106411,110252,99877,105064,950
1157
        # 2014-03-18 00:00:00+00:00,104194,110891,95342,103116,972
1158
        # 2014-03-19 00:00:00+00:00,104198,107086,102615,104851,973
1159
        # 2014-03-20 00:00:00+00:00,100032,102989,92179,97584,1016
1160
        # 2014-03-21 13:31:00+00:00,114098,120818,110333,115575,2866
1161
        # 2014-03-21 13:32:00+00:00,114099,120157,105353,112755,2866
1162
        # 2014-03-21 13:33:00+00:00,114099,122263,108838,115550,2867
1163
        # 2014-03-21 13:34:00+00:00,114101,116620,106654,111637,2867
1164
        # 2014-03-21 13:35:00+00:00,114104,123773,107769,115771,2867
1165
1166
        open_ref = [100.108,  # 2014-03-14 00:00:00+00:00
1167
                    100.111,  # 2014-03-17 00:00:00+00:00
1168
                    100.026,  # 2014-03-18 00:00:00+00:00
1169
                    100.030,  # 2014-03-19 00:00:00+00:00
1170
                    100.032,  # 2014-03-20 00:00:00+00:00
1171
                    114.098]  # 2014-03-21 00:00:00+00:00
1172
1173
        high_ref = [100.221,  # 2014-03-14 00:00:00+00:00
1174
                    103.725,  # 2014-03-17 00:00:00+00:00
1175
                    106.455,  # 2014-03-18 00:00:00+00:00
1176
                    102.803,  # 2014-03-19 00:00:00+00:00
1177
                    102.988,  # 2014-03-20 00:00:00+00:00
1178
                    123.773]  # 2014-03-21 00:00:00+00:00
1179
1180
        low_ref = [97.370,  # 2014-03-14 00:00:00+00:00
1181
                   93.964,  # 2014-03-17 00:00:00+00:00
1182
                   91.528,  # 2014-03-18 00:00:00+00:00
1183
                   98.510,  # 2014-03-19 00:00:00+00:00
1184
                   92.179,  # 2014-03-20 00:00:00+00:00
1185
                   105.353]  # 2014-03-21 00:00:00+00:00
1186
1187
        close_ref = [98.795,  # 2014-03-14 00:00:00+00:00
1188
                     98.844,  # 2014-03-17 00:00:00+00:00
1189
                     98.991,  # 2014-03-18 00:00:00+00:00
1190
                     100.657,  # 2014-03-19 00:00:00+00:00
1191
                     97.584,  # 2014-03-20 00:00:00+00:00
1192
                     115.771]  # 2014-03-21 00:00:00+00:00
1193
1194
        volume_ref = [950,  # 2014-03-14 00:00:00+00:00
1195
                      950,  # 2014-03-17 00:00:00+00:00
1196
                      972,  # 2014-03-18 00:00:00+00:00
1197
                      973,  # 2014-03-19 00:00:00+00:00
1198
                      1016,  # 2014-03-20 00:00:00+00:00
1199
                      14333]  # 2014-03-21 00:00:00+00:00
1200
1201
        check("open_price", open_ref)
1202
        check("high", high_ref)
1203
        check("low", low_ref)
1204
        check("close_price", close_ref)
1205
        check("price", close_ref)
1206
        check("volume", volume_ref)
1207
1208
    @parameterized.expand([('open', 0),
1209
                           ('high', 10000),
1210
                           ('low', 20000),
1211
                           ('close', 30000),
1212
                           ('price', 30000),
1213
                           ('volume', 40000)])
1214
    def test_futures_history_minutes(self, field, offset):
1215
        # our history data, for self.FUTURE_ASSET, is 10,000 bars starting at
1216
        # self.futures_start_dt.  Those 10k bars are 24/7.
1217
1218
        # = 2015-11-30 18:50 UTC, 13:50 Eastern = during market hours
1219
        futures_end_dt = \
1220
            self.futures_start_dates[self.FUTURE_ASSET] + \
1221
            timedelta(minutes=9999)
1222
1223
        window = self.get_portal().get_history_window(
1224
            [self.FUTURE_ASSET],
1225
            futures_end_dt,
1226
            1000,
1227
            "1m",
1228
            field
1229
        )
1230
1231
        # check the minutes are right
1232
        reference_minutes = self.env.market_minute_window(
1233
            futures_end_dt, 1000, step=-1
1234
        )[::-1]
1235
1236
        np.testing.assert_array_equal(window.index, reference_minutes)
1237
1238
        # check the values
1239
1240
        # 2015-11-24 18:41
1241
        # ...
1242
        # 2015-11-24 21:00
1243
        # 2015-11-25 14:31
1244
        # ...
1245
        # 2015-11-25 21:00
1246
        # 2015-11-27 14:31
1247
        # ...
1248
        # 2015-11-27 18:00  # early close
1249
        # 2015-11-30 14:31
1250
        # ...
1251
        # 2015-11-30 18:50
1252
1253
        reference_values = pd.date_range(
1254
            start=self.futures_start_dates[self.FUTURE_ASSET],
1255
            end=futures_end_dt,
1256
            freq="T"
1257
        )
1258
1259
        for idx, dt in enumerate(window.index):
1260
            date_val = reference_values.searchsorted(dt)
1261
            self.assertEqual(offset + date_val,
1262
                             window.iloc[idx][self.FUTURE_ASSET])
1263
1264
    def test_history_minute_blended(self):
1265
        window = self.get_portal().get_history_window(
1266
            [self.FUTURE_ASSET2, self.AAPL],
1267
            pd.Timestamp("2014-03-21 20:00", tz='UTC'),
1268
            200,
1269
            "1m",
1270
            "price"
1271
        )
1272
1273
        # just a sanity check
1274
        self.assertEqual(200, len(window[self.AAPL]))
1275
        self.assertEqual(200, len(window[self.FUTURE_ASSET2]))
1276
1277
    def test_futures_history_daily(self):
1278
        # get 3 days ending 11/30 10:00 am Eastern
1279
        # = 11/25, 11/27 (half day), 11/30 (partial)
1280
1281
        window = self.get_portal().get_history_window(
1282
            [self.env.asset_finder.retrieve_asset(self.FUTURE_ASSET)],
1283
            pd.Timestamp("2015-11-30 15:00", tz='UTC'),
1284
            3,
1285
            "1d",
1286
            "high"
1287
        )
1288
1289
        self.assertEqual(3, len(window[self.FUTURE_ASSET]))
1290
1291
        np.testing.assert_array_equal([12929.0, 15629.0, 19769.0],
1292
                                      window.values.T[0])
1293