Completed
Pull Request — master (#858)
by Eddie
02:39
created

tests.HistoryTestCase.create_fake_daily_data()   B

Complexity

Conditions 3

Size

Total Lines 25

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 3
dl 0
loc 25
rs 8.8571
1
from os.path import dirname, join, realpath
2
from textwrap import dedent
3
from unittest import TestCase
4
import bcolz
5
import os
6
from datetime import timedelta
7
from nose_parameterized import parameterized
8
from pandas.tslib import normalize_date
9
from testfixtures import TempDirectory
10
import numpy as np
11
from numpy import array
12
import pandas as pd
13
from pandas import (
14
    read_csv,
15
    Timestamp,
16
    DataFrame, DatetimeIndex)
17
18
from six import iteritems
19
from zipline import TradingAlgorithm
20
21
from zipline.data.data_portal import DataPortal
22
from zipline.data.us_equity_pricing import (
23
    DailyBarWriterFromCSVs,
24
    SQLiteAdjustmentWriter,
25
    SQLiteAdjustmentReader,
26
)
27
from zipline.errors import HistoryInInitialize
28
from zipline.utils.test_utils import (
29
    make_simple_asset_info,
30
    str_to_seconds,
31
    MockDailyBarReader
32
)
33
from zipline.data.future_pricing import FutureMinuteReader
34
from zipline.data.us_equity_pricing import BcolzDailyBarReader
35
from zipline.data.us_equity_minutes import (
36
    MinuteBarWriterFromCSVs,
37
    BcolzMinuteBarReader
38
)
39
from zipline.utils.tradingcalendar import trading_days
40
from zipline.finance.trading import (
41
    TradingEnvironment,
42
    SimulationParameters
43
)
44
45
TEST_MINUTE_RESOURCE_PATH = join(
46
    dirname(dirname(realpath(__file__))),  # zipline_repo/tests
47
    'tests',
48
    'resources',
49
    'history_inputs',
50
)
51
52
TEST_DAILY_RESOURCE_PATH = join(
53
    dirname(dirname(realpath(__file__))),  # zipline_repo/tests
54
    'tests',
55
    'resources',
56
    'pipeline_inputs',
57
)
58
59
60
class HistoryTestCase(TestCase):
61
    @classmethod
62
    def setUpClass(cls):
63
        cls.AAPL = 1
64
        cls.MSFT = 2
65
        cls.DELL = 3
66
        cls.TSLA = 4
67
        cls.BRKA = 5
68
        cls.IBM = 6
69
        cls.GS = 7
70
        cls.C = 8
71
        cls.DIVIDEND_SID = 9
72
        cls.FUTURE_ASSET = 10
73
        cls.FUTURE_ASSET2 = 11
74
        cls.FUTURE_ASSET3 = 12
75
        cls.FOO = 13
76
        cls.assets = [cls.AAPL, cls.MSFT, cls.DELL, cls.TSLA, cls.BRKA,
77
                      cls.IBM, cls.GS, cls.C, cls.DIVIDEND_SID, cls.FOO]
78
79
        asset_info = make_simple_asset_info(
80
            cls.assets,
81
            Timestamp('2014-03-03'),
82
            Timestamp('2014-08-30'),
83
            ['AAPL', 'MSFT', 'DELL', 'TSLA', 'BRKA', 'IBM', 'GS', 'C',
84
             'DIVIDEND_SID', 'FOO']
85
        )
86
        cls.env = TradingEnvironment()
87
88
        cls.env.write_data(
89
            equities_df=asset_info,
90
            futures_data={
91
                cls.FUTURE_ASSET: {
92
                    "start_date": pd.Timestamp('2015-11-23', tz='UTC'),
93
                    "end_date": pd.Timestamp('2014-12-01', tz='UTC'),
94
                    'symbol': 'TEST_FUTURE',
95
                    'asset_type': 'future',
96
                },
97
                cls.FUTURE_ASSET2: {
98
                    "start_date": pd.Timestamp('2014-03-19', tz='UTC'),
99
                    "end_date": pd.Timestamp('2014-03-22', tz='UTC'),
100
                    'symbol': 'TEST_FUTURE2',
101
                    'asset_type': 'future',
102
                },
103
                cls.FUTURE_ASSET3: {
104
                    "start_date": pd.Timestamp('2014-03-19', tz='UTC'),
105
                    "end_date": pd.Timestamp('2014-03-22', tz='UTC'),
106
                    'symbol': 'TEST_FUTURE3',
107
                    'asset_type': 'future',
108
                }
109
            }
110
        )
111
112
        cls.tempdir = TempDirectory()
113
        cls.tempdir.create()
114
115
        try:
116
            cls.create_fake_minute_data(cls.tempdir)
117
118
            cls.futures_start_dates = {
119
                cls.FUTURE_ASSET: pd.Timestamp("2015-11-23 20:11", tz='UTC'),
120
                cls.FUTURE_ASSET2: pd.Timestamp("2014-03-19 13:31", tz='UTC'),
121
                cls.FUTURE_ASSET3: pd.Timestamp("2014-03-19 13:31", tz='UTC')
122
            }
123
124
            futures_tempdir = os.path.join(cls.tempdir.path,
125
                                           'futures', 'minutes')
126
            os.makedirs(futures_tempdir)
127
            cls.create_fake_futures_minute_data(
128
                futures_tempdir,
129
                cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET),
130
                cls.futures_start_dates[cls.FUTURE_ASSET],
131
                cls.futures_start_dates[cls.FUTURE_ASSET] +
132
                timedelta(minutes=10000)
133
            )
134
135
            # build data for FUTURE_ASSET2 from 2014-03-19 13:31 to
136
            # 2014-03-21 20:00
137
            cls.create_fake_futures_minute_data(
138
                futures_tempdir,
139
                cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET2),
140
                cls.futures_start_dates[cls.FUTURE_ASSET2],
141
                cls.futures_start_dates[cls.FUTURE_ASSET2] +
142
                timedelta(minutes=3270)
143
            )
144
145
            # build data for FUTURE_ASSET3 from 2014-03-19 13:31 to
146
            # 2014-03-21 20:00.
147
            # Pause trading between 2014-03-20 14:00 and 2014-03-20 15:00
148
            gap_start = pd.Timestamp('2014-03-20 14:00', tz='UTC')
149
            gap_end = pd.Timestamp('2014-03-20 15:00', tz='UTC')
150
            cls.create_fake_futures_minute_data(
151
                futures_tempdir,
152
                cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET3),
153
                cls.futures_start_dates[cls.FUTURE_ASSET3],
154
                cls.futures_start_dates[cls.FUTURE_ASSET3] +
155
                timedelta(minutes=3270),
156
                gap_start_dt=gap_start,
157
                gap_end_dt=gap_end,
158
            )
159
160
            cls.create_fake_daily_data(cls.tempdir)
161
162
            splits = DataFrame([
163
                {'effective_date': str_to_seconds("2002-01-03"),
164
                 'ratio': 0.5,
165
                 'sid': cls.AAPL},
166
                {'effective_date': str_to_seconds("2014-03-20"),
167
                 'ratio': 0.5,
168
                 'sid': cls.AAPL},
169
                {'effective_date': str_to_seconds("2014-03-21"),
170
                 'ratio': 0.5,
171
                 'sid': cls.AAPL},
172
                {'effective_date': str_to_seconds("2014-04-01"),
173
                 'ratio': 0.5,
174
                 'sid': cls.IBM},
175
                {'effective_date': str_to_seconds("2014-07-01"),
176
                 'ratio': 0.5,
177
                 'sid': cls.IBM},
178
                {'effective_date': str_to_seconds("2014-07-07"),
179
                 'ratio': 0.5,
180
                 'sid': cls.IBM},
181
                {'effective_date': str_to_seconds("2002-03-21"),
182
                 'ratio': 0.5,
183
                 'sid': cls.FOO},
184
            ],
185
                columns=['effective_date', 'ratio', 'sid'],
186
            )
187
188
            mergers = DataFrame([
189
                {'effective_date': str_to_seconds("2014-07-16"),
190
                 'ratio': 0.5,
191
                 'sid': cls.C}
192
            ],
193
                columns=['effective_date', 'ratio', 'sid'])
194
195
            dividends = DataFrame([
196
                {'ex_date':
197
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
198
                 'record_date':
199
                 Timestamp("2014-03-19", tz='UTC').to_datetime64(),
200
                 'declared_date':
201
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
202
                 'pay_date':
203
                 Timestamp("2014-03-20", tz='UTC').to_datetime64(),
204
                 'amount': 2.0,
205
                 'sid': cls.DIVIDEND_SID},
206
                {'ex_date':
207
                 Timestamp("2014-03-20", tz='UTC').to_datetime64(),
208
                 'record_date':
209
                 Timestamp("2014-03-21", tz='UTC').to_datetime64(),
210
                 'declared_date':
211
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
212
                 'pay_date':
213
                 Timestamp("2014-03-23", tz='UTC').to_datetime64(),
214
                 'amount': 4.0,
215
                 'sid': cls.DIVIDEND_SID}],
216
                columns=['ex_date',
217
                         'record_date',
218
                         'declared_date',
219
                         'pay_date',
220
                         'amount',
221
                         'sid'])
222
223
            cls.create_fake_adjustments(cls.tempdir,
224
                                        "adjustments.sqlite",
225
                                        splits=splits,
226
                                        mergers=mergers,
227
                                        dividends=dividends)
228
229
            cls.data_portal = cls.get_portal(
230
                daily_equities_filename="test_daily_data.bcolz",
231
                adjustments_filename="adjustments.sqlite"
232
            )
233
        except:
234
            cls.tempdir.cleanup()
235
            raise
236
237
    @classmethod
238
    def tearDownClass(cls):
239
        cls.tempdir.cleanup()
240
241
    @classmethod
242
    def create_fake_futures_minute_data(cls, tempdir, asset, start_dt, end_dt,
243
                                        gap_start_dt=None, gap_end_dt=None):
244
        num_minutes = int((end_dt - start_dt).total_seconds() / 60)
245
246
        # need to prepend one 0 per minute between normalize_date(start_dt)
247
        # and start_dt
248
        zeroes_buffer = \
249
            [0] * int((start_dt -
250
                       normalize_date(start_dt)).total_seconds() / 60)
251
252
        future_df = pd.DataFrame({
253
            "open": np.array(zeroes_buffer +
254
                             list(range(0, num_minutes))) * 1000,
255
            "high": np.array(zeroes_buffer +
256
                             list(range(10000, 10000 + num_minutes))) * 1000,
257
            "low": np.array(zeroes_buffer +
258
                            list(range(20000, 20000 + num_minutes))) * 1000,
259
            "close": np.array(zeroes_buffer +
260
                              list(range(30000, 30000 + num_minutes))) * 1000,
261
            "volume": np.array(zeroes_buffer +
262
                               list(range(40000, 40000 + num_minutes)))
263
        })
264
265
        if gap_start_dt and gap_end_dt:
266
            minutes = pd.date_range(normalize_date(start_dt), end_dt, freq='T')
267
            gap_start_ix = minutes.get_loc(gap_start_dt)
268
            gap_end_ix = minutes.get_loc(gap_end_dt)
269
            future_df.iloc[gap_start_ix:gap_end_ix, :] = 0
270
271
        path = join(tempdir, "{0}.bcolz".format(asset.sid))
272
        ctable = bcolz.ctable.fromdataframe(future_df, rootdir=path)
273
274
        ctable.attrs["start_dt"] = start_dt.value / 1e9
275
        ctable.attrs["last_dt"] = end_dt.value / 1e9
276
277
    @classmethod
278
    def create_fake_minute_data(cls, tempdir):
279
        resources = {
280
            cls.AAPL: join(TEST_MINUTE_RESOURCE_PATH, 'AAPL_minute.csv.gz'),
281
            cls.MSFT: join(TEST_MINUTE_RESOURCE_PATH, 'MSFT_minute.csv.gz'),
282
            cls.DELL: join(TEST_MINUTE_RESOURCE_PATH, 'DELL_minute.csv.gz'),
283
            cls.TSLA: join(TEST_MINUTE_RESOURCE_PATH, "TSLA_minute.csv.gz"),
284
            cls.BRKA: join(TEST_MINUTE_RESOURCE_PATH, "BRKA_minute.csv.gz"),
285
            cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"),
286
            cls.GS:
287
            join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"),  # unused
288
            cls.C: join(TEST_MINUTE_RESOURCE_PATH, "C_minute.csv.gz"),
289
            cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH,
290
                                   "DIVIDEND_minute.csv.gz"),
291
            cls.FOO: join(TEST_MINUTE_RESOURCE_PATH,
292
                          "FOO_minute.csv.gz"),
293
        }
294
295
        equities_tempdir = os.path.join(tempdir.path, 'equity', 'minutes')
296
        os.makedirs(equities_tempdir)
297
298
        MinuteBarWriterFromCSVs(resources,
299
                                pd.Timestamp('2002-01-02', tz='UTC')).write(
300
                                    equities_tempdir, cls.assets)
301
302
    @classmethod
303
    def create_fake_daily_data(cls, tempdir):
304
        resources = {
305
            cls.AAPL: join(TEST_DAILY_RESOURCE_PATH, 'AAPL.csv'),
306
            cls.MSFT: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),
307
            cls.DELL: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),  # unused
308
            cls.TSLA: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),  # unused
309
            cls.BRKA: join(TEST_DAILY_RESOURCE_PATH, 'BRK-A.csv'),
310
            cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, 'IBM_daily.csv.gz'),
311
            cls.GS: join(TEST_MINUTE_RESOURCE_PATH, 'GS_daily.csv.gz'),
312
            cls.C: join(TEST_MINUTE_RESOURCE_PATH, 'C_daily.csv.gz'),
313
            cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH,
314
                                   'DIVIDEND_daily.csv.gz'),
315
            cls.FOO: join(TEST_MINUTE_RESOURCE_PATH, 'FOO_daily.csv.gz'),
316
        }
317
        raw_data = {
318
            asset: read_csv(path, parse_dates=['day']).set_index('day')
319
            for asset, path in iteritems(resources)
320
        }
321
        for frame in raw_data.values():
322
            frame['price'] = frame['close']
323
324
        writer = DailyBarWriterFromCSVs(resources)
325
        data_path = tempdir.getpath('test_daily_data.bcolz')
326
        writer.write(data_path, trading_days, cls.assets)
327
328
    @classmethod
329
    def create_fake_adjustments(cls, tempdir, filename,
330
                                splits=None, mergers=None, dividends=None):
331
        writer = SQLiteAdjustmentWriter(tempdir.getpath(filename),
332
                                        cls.env.trading_days,
333
                                        MockDailyBarReader())
334
335
        if dividends is None:
336
            dividends = DataFrame(
337
                {
338
                    # Hackery to make the dtypes correct on an empty frame.
339
                    'ex_date': array([], dtype='datetime64[ns]'),
340
                    'pay_date': array([], dtype='datetime64[ns]'),
341
                    'record_date': array([], dtype='datetime64[ns]'),
342
                    'declared_date': array([], dtype='datetime64[ns]'),
343
                    'amount': array([], dtype=float),
344
                    'sid': array([], dtype=int),
345
                },
346
                index=DatetimeIndex([], tz='UTC'),
347
                columns=['ex_date',
348
                         'pay_date',
349
                         'record_date',
350
                         'declared_date',
351
                         'amount',
352
                         'sid']
353
                )
354
355
        if splits is None:
356
            splits = DataFrame(
357
                {
358
                    # Hackery to make the dtypes correct on an empty frame.
359
                    'effective_date': array([], dtype=int),
360
                    'ratio': array([], dtype=float),
361
                    'sid': array([], dtype=int),
362
                },
363
                index=DatetimeIndex([], tz='UTC'))
364
365
        if mergers is None:
366
            mergers = DataFrame(
367
                {
368
                    # Hackery to make the dtypes correct on an empty frame.
369
                    'effective_date': array([], dtype=int),
370
                    'ratio': array([], dtype=float),
371
                    'sid': array([], dtype=int),
372
                },
373
                index=DatetimeIndex([], tz='UTC'))
374
375
        writer.write(splits, mergers, dividends)
376
377
    @classmethod
378
    def get_portal(cls,
379
                   daily_equities_filename="test_daily_data.bcolz",
380
                   adjustments_filename="adjustments.sqlite",
381
                   env=None):
382
383
        if env is None:
384
            env = cls.env
385
386
        temp_path = cls.tempdir.path
387
388
        minutes_path = os.path.join(temp_path, 'equity', 'minutes')
389
        futures_path = os.path.join(temp_path, 'futures', 'minutes')
390
391
        adjustment_reader = SQLiteAdjustmentReader(
392
            join(temp_path, adjustments_filename))
393
394
        equity_minute_reader = BcolzMinuteBarReader(minutes_path)
395
396
        equity_daily_reader = BcolzDailyBarReader(
397
            join(temp_path, daily_equities_filename))
398
399
        future_minute_reader = FutureMinuteReader(futures_path)
400
401
        return DataPortal(
402
            env,
403
            equity_minute_reader=equity_minute_reader,
404
            future_minute_reader=future_minute_reader,
405
            equity_daily_reader=equity_daily_reader,
406
            adjustment_reader=adjustment_reader
407
        )
408
409
    def test_history_in_initialize(self):
410
        algo_text = dedent(
411
            """\
412
            from zipline.api import history
413
414
            def initialize(context):
415
                history([24], 10, '1d', 'price')
416
417
            def handle_data(context, data):
418
                pass
419
            """
420
        )
421
422
        start = pd.Timestamp('2007-04-05', tz='UTC')
423
        end = pd.Timestamp('2007-04-10', tz='UTC')
424
425
        sim_params = SimulationParameters(
426
            period_start=start,
427
            period_end=end,
428
            capital_base=float("1.0e5"),
429
            data_frequency='minute',
430
            emission_rate='daily',
431
            env=self.env,
432
        )
433
434
        test_algo = TradingAlgorithm(
435
            script=algo_text,
436
            data_frequency='minute',
437
            sim_params=sim_params,
438
            env=self.env,
439
        )
440
441
        with self.assertRaises(HistoryInInitialize):
442
            test_algo.initialize()
443
444
    def test_minute_basic_functionality(self):
445
        # get a 5-bar minute history from the very end of the available data
446
        window = self.data_portal.get_history_window(
447
            [1],
448
            pd.Timestamp("2014-03-21 18:23:00+00:00", tz='UTC'),
449
            5,
450
            "1m",
451
            "open_price"
452
        )
453
454
        self.assertEqual(len(window), 5)
455
        reference = [534.469, 534.471, 534.475, 534.477, 534.477]
456
        for i in range(0, 4):
457
            self.assertEqual(window.iloc[-5 + i].loc[1], reference[i])
458
459
    def test_minute_splits(self):
460
        portal = self.data_portal
461
462
        window = portal.get_history_window(
463
            [1],
464
            pd.Timestamp("2014-03-21 18:30:00+00:00", tz='UTC'),
465
            1000,
466
            "1m",
467
            "open_price"
468
        )
469
470
        self.assertEqual(len(window), 1000)
471
472
        # there are two splits for AAPL (on 2014-03-20 and 2014-03-21),
473
        # each with ratio 0.5).
474
475
        day1_end = pd.Timestamp("2014-03-19 20:00", tz='UTC')
476
        day2_start = pd.Timestamp("2014-03-20 13:31", tz='UTC')
477
        day2_end = pd.Timestamp("2014-03-20 20:00", tz='UTC')
478
        day3_start = pd.Timestamp("2014-03-21 13:31", tz='UTC')
479
480
        self.assertEquals(window.loc[day1_end, 1], 533.086)
481
        self.assertEquals(window.loc[day2_start, 1], 533.087)
482
        self.assertEquals(window.loc[day2_end, 1], 533.853)
483
        self.assertEquals(window.loc[day3_start, 1], 533.854)
484
485
    def test_ffill_minute_equity_window_starts_with_nan(self):
486
        """
487
        Test that forward filling does not leave leading nan if there is data
488
        available before the start of the window.
489
        """
490
491
        window = self.data_portal.get_history_window(
492
            [self.FOO],
493
            pd.Timestamp("2014-03-21 13:41:00+00:00", tz='UTC'),
494
            20,
495
            "1m",
496
            "price"
497
        )
498
499
        # The previous value is on 2014-03-20, and there is a split between
500
        # the two dates, the spot price of the latest value is 1066.92, with
501
        # the expected result being 533.46 after the 2:1 split is applied.
502
        expected = np.append(np.full(19, 533.460),
503
                             np.array(529.601))
504
505
        np.testing.assert_allclose(window.loc[:, self.FOO], expected)
506
507
    def test_ffill_minute_future_window_starts_with_nan(self):
508
        """
509
        Test that forward filling does not leave leading nan if there is data
510
        available before the start of the window.
511
        """
512
513
        window = self.data_portal.get_history_window(
514
            [self.FUTURE_ASSET3],
515
            pd.Timestamp("2014-03-20 15:00:00+00:00", tz='UTC'),
516
            20,
517
            "1m",
518
            "price"
519
        )
520
521
        # 31468 is the value at 2014-03-20 13:59, and should be the forward
522
        # filled value until 2015-03-20 15:00
523
        expected = np.append(np.full(19, 31468),
524
                             np.array(31529))
525
526
        np.testing.assert_allclose(window.loc[:, self.FUTURE_ASSET3],
527
                                   expected)
528
529
    def test_ffill_daily_equity_window_starts_with_nan(self):
530
        """
531
        Test that forward filling does not leave leading nan if there is data
532
        available before the start of the window.
533
        """
534
        window = self.data_portal.get_history_window(
535
            [self.FOO],
536
            pd.Timestamp("2014-03-21 00:00:00+00:00", tz='UTC'),
537
            2,
538
            "1d",
539
            "price"
540
        )
541
542
        # The previous value is on 2014-03-20, and there is a split between
543
        # the two dates, the spot price of the latest value is 106.692, with
544
        # the expected result being 533.46 after the 2:1 split is applied.
545
        expected = np.array([
546
            53.346,
547
            52.95,
548
        ])
549
550
        np.testing.assert_allclose(window.loc[:, self.FOO], expected)
551
552
    def test_minute_window_starts_before_trading_start(self):
553
        portal = self.data_portal
554
555
        # get a 50-bar minute history for MSFT starting 5 minutes into 3/20,
556
        # its first trading day
557
        window = portal.get_history_window(
558
            [2],
559
            pd.Timestamp("2014-03-20 13:35:00", tz='UTC'),
560
            50,
561
            "1m",
562
            "high",
563
        )
564
565
        self.assertEqual(len(window), 50)
566
        reference = [107.081, 109.476, 102.316, 107.861, 106.040]
567
        for i in range(0, 4):
568
            self.assertEqual(window.iloc[-5 + i].loc[2], reference[i])
569
570
        # get history for two securities at the same time, where one starts
571
        # trading a day later than the other
572
        window2 = portal.get_history_window(
573
            [1, 2],
574
            pd.Timestamp("2014-03-20 13:35:00", tz='UTC'),
575
            50,
576
            "1m",
577
            "low",
578
        )
579
580
        self.assertEqual(len(window2), 50)
581
        reference2 = {
582
            1: [1059.318, 1055.914, 1061.136, 1063.698, 1055.964],
583
            2: [98.902, 99.841, 90.984, 99.891, 98.027]
584
        }
585
586
        for i in range(0, 45):
587
            self.assertFalse(np.isnan(window2.iloc[i].loc[1]))
588
589
            # there should be 45 NaNs for MSFT until it starts trading
590
            self.assertTrue(np.isnan(window2.iloc[i].loc[2]))
591
592
        for i in range(0, 4):
593
            self.assertEquals(window2.iloc[-5 + i].loc[1],
594
                              reference2[1][i])
595
            self.assertEquals(window2.iloc[-5 + i].loc[2],
596
                              reference2[2][i])
597
598
    def test_minute_window_ends_before_trading_start(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
599
        # entire window is before the trading start
600
        window = self.data_portal.get_history_window(
601
            [2],
602
            pd.Timestamp("2014-02-05 14:35:00", tz='UTC'),
603
            100,
604
            "1m",
605
            "high"
606
        )
607
608
        self.assertEqual(len(window), 100)
609
        for i in range(0, 100):
610
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
611
612
    def test_minute_window_ends_after_trading_end(self):
613
        portal = self.data_portal
614
615
        window = portal.get_history_window(
616
            [2],
617
            pd.Timestamp("2014-03-24 13:35:00", tz='UTC'),
618
            50,
619
            "1m",
620
            "high",
621
        )
622
623
        # should be 45 non-NaNs then 5 NaNs as MSFT has stopped trading at
624
        # the end of the day 2014-03-21 (and the 22nd and 23rd is weekend)
625
        self.assertEqual(len(window), 50)
626
627
        for i in range(0, 45):
628
            self.assertFalse(np.isnan(window.iloc[i].loc[2]))
629
630
        for i in range(46, 50):
631
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
632
633
    def test_minute_window_starts_after_trading_end(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
634
        # entire window is after the trading end
635
        window = self.data_portal.get_history_window(
636
            [2],
637
            pd.Timestamp("2014-04-02 14:35:00", tz='UTC'),
638
            100,
639
            "1m",
640
            "high"
641
        )
642
643
        self.assertEqual(len(window), 100)
644
        for i in range(0, 100):
645
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
646
647
    def test_minute_window_starts_before_1_2_2002(self):
648
        window = self.data_portal.get_history_window(
649
            [3],
650
            pd.Timestamp("2002-01-02 14:35:00", tz='UTC'),
651
            50,
652
            "1m",
653
            "close_price"
654
        )
655
656
        self.assertEqual(len(window), 50)
657
        for i in range(0, 45):
658
            self.assertTrue(np.isnan(window.iloc[i].loc[3]))
659
660
        for i in range(46, 50):
661
            self.assertFalse(np.isnan(window.iloc[i].loc[3]))
662
663
    def test_minute_early_close(self):
664
        # market was closed early on 7/3, and that's reflected in our
665
        # fake IBM minute data.  also, IBM had a split that takes effect
666
        # right after the early close.
667
668
        # five minutes into the day after an early close, get 20 1m bars
669
        window = self.data_portal.get_history_window(
670
            [self.IBM],
671
            pd.Timestamp("2014-07-07 13:35:00", tz='UTC'),
672
            20,
673
            "1m",
674
            "high"
675
        )
676
677
        self.assertEqual(len(window), 20)
678
679
        reference = [27134.486, 27134.802, 27134.660, 27132.813, 27130.964,
680
                     27133.767, 27133.268, 27131.510, 27134.946, 27132.400,
681
                     27134.350, 27130.588, 27132.528, 27130.418, 27131.040,
682
                     27132.664, 27131.307, 27133.978, 27132.779, 27134.476]
683
684
        for i in range(0, 20):
685
            self.assertAlmostEquals(window.iloc[i].loc[self.IBM], reference[i])
686
687
    def test_minute_merger(self):
688
        def check(field, ref):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
689
            window = self.data_portal.get_history_window(
690
                [self.C],
691
                pd.Timestamp("2014-07-16 13:35", tz='UTC'),
692
                10,
693
                "1m",
694
                field
695
            )
696
697
            self.assertEqual(len(window), len(ref))
698
699
            for i in range(0, len(ref) - 1):
700
                self.assertEquals(window.iloc[i].loc[self.C], ref[i])
701
702
        open_ref = [71.99, 71.991, 71.992, 71.996, 71.996,
703
                    72.000, 72.001, 72.002, 72.004, 72.005]
704
        high_ref = [77.334, 80.196, 80.387, 72.331, 79.184,
705
                    75.439, 81.176, 78.564, 80.498, 82.000]
706
        low_ref = [62.621, 70.427, 65.572, 68.357, 63.623,
707
                   69.805, 67.245, 64.238, 64.487, 71.864]
708
        close_ref = [69.977, 75.311, 72.979, 70.344, 71.403,
709
                     72.622, 74.210, 71.401, 72.492, 73.669]
710
        vol_ref = [12663, 12662, 12661, 12661, 12660, 12661,
711
                   12663, 12662, 12663, 12662]
712
713
        check("open_price", open_ref)
714
        check("high", high_ref)
715
        check("low", low_ref)
716
        check("close_price", close_ref)
717
        check("price", close_ref)
718
        check("volume", vol_ref)
719
720
    def test_minute_forward_fill(self):
721
        # only forward fill if ffill=True AND we are asking for "price"
722
723
        # our fake TSLA data (sid 4) is missing a bunch of minute bars
724
        # right after the open on 2002-01-02
725
726
        for field in ["open_price", "high", "low", "volume", "close_price"]:
727
            no_ffill = self.data_portal.get_history_window(
728
                [4],
729
                pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
730
                390,
731
                "1m",
732
                field
733
            )
734
735
            missing_bar_indices = [1, 3, 5, 7, 9, 11, 13]
736
            if field == 'volume':
737
                for bar_idx in missing_bar_indices:
738
                    self.assertEqual(no_ffill.iloc[bar_idx].loc[4], 0)
739
            else:
740
                for bar_idx in missing_bar_indices:
741
                    self.assertTrue(np.isnan(no_ffill.iloc[bar_idx].loc[4]))
742
743
        ffill_window = self.data_portal.get_history_window(
744
            [4],
745
            pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
746
            390,
747
            "1m",
748
            "price"
749
        )
750
751
        for i in range(0, 390):
752
            self.assertFalse(np.isnan(ffill_window.iloc[i].loc[4]))
753
754
        # 2002-01-02 14:31:00+00:00  126.183
755
        # 2002-01-02 14:32:00+00:00  126.183
756
        # 2002-01-02 14:33:00+00:00  125.648
757
        # 2002-01-02 14:34:00+00:00  125.648
758
        # 2002-01-02 14:35:00+00:00  126.016
759
        # 2002-01-02 14:36:00+00:00  126.016
760
        # 2002-01-02 14:37:00+00:00  127.918
761
        # 2002-01-02 14:38:00+00:00  127.918
762
        # 2002-01-02 14:39:00+00:00  126.423
763
        # 2002-01-02 14:40:00+00:00  126.423
764
        # 2002-01-02 14:41:00+00:00  129.825
765
        # 2002-01-02 14:42:00+00:00  129.825
766
        # 2002-01-02 14:43:00+00:00  125.392
767
        # 2002-01-02 14:44:00+00:00  125.392
768
769
        vals = [126.183, 125.648, 126.016, 127.918, 126.423, 129.825, 125.392]
770
        for idx, val in enumerate(vals):
771
            self.assertEqual(ffill_window.iloc[2 * idx].loc[4], val)
772
            self.assertEqual(ffill_window.iloc[(2 * idx) + 1].loc[4], val)
773
774
        # make sure that if we pass ffill=False with field="price", we do
775
        # not ffill
776
        really_no_ffill_window = self.data_portal.get_history_window(
777
            [4],
778
            pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
779
            390,
780
            "1m",
781
            "price",
782
            ffill=False
783
        )
784
785
        for idx, val in enumerate(vals):
786
            idx1 = 2 * idx
787
            idx2 = idx1 + 1
788
            self.assertEqual(really_no_ffill_window.iloc[idx1].loc[4], val)
789
            self.assertTrue(np.isnan(really_no_ffill_window.iloc[idx2].loc[4]))
790
791
    def test_daily_functionality(self):
792
        # 9 daily bars
793
        # 2014-03-10,183999.0,186400.0,183601.0,186400.0,400
794
        # 2014-03-11,186925.0,187490.0,185910.0,187101.0,600
795
        # 2014-03-12,186498.0,187832.0,186005.0,187750.0,300
796
        # 2014-03-13,188150.0,188852.0,185254.0,185750.0,700
797
        # 2014-03-14,185825.0,186507.0,183418.0,183860.0,600
798
        # 2014-03-17,184350.0,185790.0,184350.0,185050.0,400
799
        # 2014-03-18,185400.0,185400.0,183860.0,184860.0,200
800
        # 2014-03-19,184860.0,185489.0,182764.0,183860.0,200
801
        # 2014-03-20,183999.0,186742.0,183630.0,186540.0,300
802
803
        # 5 one-minute bars that will be aggregated
804
        # 2014-03-21 13:31:00+00:00,185422401,185426332,185413974,185420153,304
805
        # 2014-03-21 13:32:00+00:00,185422402,185424165,185417717,185420941,300
806
        # 2014-03-21 13:33:00+00:00,185422403,185430663,185419420,185425041,303
807
        # 2014-03-21 13:34:00+00:00,185422403,185431290,185417079,185424184,302
808
        # 2014-03-21 13:35:00+00:00,185422405,185430210,185416293,185423251,302
809
810
        def run_query(field, values):
811
            window = self.data_portal.get_history_window(
812
                [self.BRKA],
813
                pd.Timestamp("2014-03-21 13:35", tz='UTC'),
814
                10,
815
                "1d",
816
                field
817
            )
818
819
            self.assertEqual(len(window), 10)
820
821
            for i in range(0, 10):
822
                self.assertEquals(window.iloc[i].loc[self.BRKA],
823
                                  values[i])
824
825
        # last value is the first minute's open
826
        opens = [183999, 186925, 186498, 188150, 185825, 184350,
827
                 185400, 184860, 183999, 185422.401]
828
829
        # last value is the last minute's close
830
        closes = [186400, 187101, 187750, 185750, 183860, 185050,
831
                  184860, 183860, 186540, 185423.251]
832
833
        # last value is the highest high value
834
        highs = [186400, 187490, 187832, 188852, 186507, 185790,
835
                 185400, 185489, 186742, 185431.290]
836
837
        # last value is the lowest low value
838
        lows = [183601, 185910, 186005, 185254, 183418, 184350, 183860,
839
                182764, 183630, 185413.974]
840
841
        # last value is the sum of all the minute volumes
842
        volumes = [400, 600, 300, 700, 600, 400, 200, 200, 300, 1511]
843
844
        run_query("open_price", opens)
845
        run_query("close_price", closes)
846
        run_query("price", closes)
847
        run_query("high", highs)
848
        run_query("low", lows)
849
        run_query("volume", volumes)
850
851
    def test_daily_splits_with_no_minute_data(self):
852
        # scenario is that we have daily data for AAPL through 6/11,
853
        # but we have no minute data for AAPL on 6/11. there's also a split
854
        # for AAPL on 6/9.
855
        splits = DataFrame(
856
            [
857
                {
858
                    'effective_date': str_to_seconds('2014-06-09'),
859
                    'ratio': (1 / 7.0),
860
                    'sid': self.AAPL,
861
                }
862
            ],
863
            columns=['effective_date', 'ratio', 'sid'])
864
865
        self.create_fake_adjustments(self.tempdir,
866
                                     "adjustments2.sqlite",
867
                                     splits=splits)
868
869
        portal = self.get_portal(adjustments_filename="adjustments2.sqlite")
870
871
        def test_window(field, reference, ffill=True):
872
            window = portal.get_history_window(
873
                [self.AAPL],
874
                pd.Timestamp("2014-06-11 15:30", tz='UTC'),
875
                6,
876
                "1d",
877
                field,
878
                ffill
879
            )
880
881
            self.assertEqual(len(window), 6)
882
883
            for i in range(0, 5):
884
                self.assertEquals(window.iloc[i].loc[self.AAPL],
885
                                  reference[i])
886
887
            if ffill and field == "price":
888
                last_val = window.iloc[5].loc[self.AAPL]
889
                second_to_last_val = window.iloc[4].loc[self.AAPL]
890
891
                self.assertEqual(last_val, second_to_last_val)
892
            else:
893
                if field == "volume":
894
                    self.assertEqual(window.iloc[5].loc[self.AAPL], 0)
895
                else:
896
                    self.assertTrue(np.isnan(window.iloc[5].loc[self.AAPL]))
897
898
        # 2014-06-04,637.4400099999999,647.8899690000001,636.110046,644.819992,p
899
        # 2014-06-05,646.20005,649.370003,642.610008,647.349983,75951400
900
        # 2014-06-06,649.900002,651.259979,644.469971,645.570023,87484600
901
        # 2014-06-09,92.699997,93.879997,91.75,93.699997,75415000
902
        # 2014-06-10,94.730003,95.050003,93.57,94.25,62777000
903
        open_data = [91.063, 92.314, 92.843, 92.699, 94.730]
904
        test_window("open_price", open_data, ffill=False)
905
        test_window("open_price", open_data)
906
907
        high_data = [92.556, 92.767, 93.037, 93.879, 95.050]
908
        test_window("high", high_data, ffill=False)
909
        test_window("high", high_data)
910
911
        low_data = [90.873, 91.801, 92.067, 91.750, 93.570]
912
        test_window("low", low_data, ffill=False)
913
        test_window("low", low_data)
914
915
        close_data = [92.117, 92.478, 92.224, 93.699, 94.250]
916
        test_window("close_price", close_data, ffill=False)
917
        test_window("close_price", close_data)
918
        test_window("price", close_data, ffill=False)
919
        test_window("price", close_data)
920
921
        vol_data = [587093500, 531659800, 612392200, 75415000, 62777000]
922
        test_window("volume", vol_data)
923
        test_window("volume", vol_data, ffill=False)
924
925
    def test_daily_window_starts_before_trading_start(self):
926
        portal = self.data_portal
927
928
        # MSFT started on 3/3/2014, so try to go before that
929
        window = portal.get_history_window(
930
            [self.MSFT],
931
            pd.Timestamp("2014-03-05 13:35:00", tz='UTC'),
932
            5,
933
            "1d",
934
            "high"
935
        )
936
937
        self.assertEqual(len(window), 5)
938
939
        # should be two empty days, then 3/3 and 3/4, then
940
        # an empty day because we don't have minute data for 3/5
941
        self.assertTrue(np.isnan(window.iloc[0].loc[self.MSFT]))
942
        self.assertTrue(np.isnan(window.iloc[1].loc[self.MSFT]))
943
        self.assertEquals(window.iloc[2].loc[self.MSFT], 38.130)
944
        self.assertEquals(window.iloc[3].loc[self.MSFT], 38.48)
945
        self.assertTrue(np.isnan(window.iloc[4].loc[self.MSFT]))
946
947
    def test_daily_window_ends_before_trading_start(self):
948
        portal = self.data_portal
949
950
        # MSFT started on 3/3/2014, so try to go before that
951
        window = portal.get_history_window(
952
            [self.MSFT],
953
            pd.Timestamp("2014-02-28 13:35:00", tz='UTC'),
954
            5,
955
            "1d",
956
            "high"
957
        )
958
959
        self.assertEqual(len(window), 5)
960
        for i in range(0, 5):
961
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
962
963
    def test_daily_window_starts_after_trading_end(self):
964
        # MSFT stopped trading EOD Friday 8/29/2014
965
        window = self.data_portal.get_history_window(
966
            [self.MSFT],
967
            pd.Timestamp("2014-09-12 13:35:00", tz='UTC'),
968
            8,
969
            "1d",
970
            "high",
971
        )
972
973
        self.assertEqual(len(window), 8)
974
        for i in range(0, 8):
975
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
976
977
    def test_daily_window_ends_after_trading_end(self):
978
        # MSFT stopped trading EOD Friday 8/29/2014
979
        window = self.data_portal.get_history_window(
980
            [self.MSFT],
981
            pd.Timestamp("2014-09-04 13:35:00", tz='UTC'),
982
            10,
983
            "1d",
984
            "high",
985
        )
986
987
        # should be 7 non-NaNs (8/21-8/22, 8/25-8/29) and 3 NaNs (9/2 - 9/4)
988
        # (9/1/2014 is labor day)
989
        self.assertEqual(len(window), 10)
990
991
        for i in range(0, 7):
992
            self.assertFalse(np.isnan(window.iloc[i].loc[self.MSFT]))
993
994
        for i in range(7, 10):
995
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
996
997
    def test_empty_sid_list(self):
998
        portal = self.data_portal
999
1000
        fields = ["open_price",
1001
                  "close_price",
1002
                  "high",
1003
                  "low",
1004
                  "volume",
1005
                  "price"]
1006
        freqs = ["1m", "1d"]
1007
1008
        for field in fields:
1009
            for freq in freqs:
1010
                window = portal.get_history_window(
1011
                    [],
1012
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
1013
                    6,
1014
                    freq,
1015
                    field
1016
                )
1017
1018
                self.assertEqual(len(window), 6)
1019
1020
                for i in range(0, 6):
1021
                    self.assertEqual(len(window.iloc[i]), 0)
1022
1023
    def test_daily_window_starts_before_minute_data(self):
1024
1025
        env = TradingEnvironment()
1026
        asset_info = make_simple_asset_info(
1027
            [self.GS],
1028
            Timestamp('1999-04-05'),
1029
            Timestamp('2004-08-30'),
1030
            ['GS']
1031
        )
1032
        env.write_data(equities_df=asset_info)
1033
        portal = self.get_portal(env=env)
1034
1035
        window = portal.get_history_window(
1036
            [self.GS],
1037
            # 3rd day of daily data for GS, minute data starts in 2002.
1038
            pd.Timestamp("1999-04-07 14:35:00", tz='UTC'),
1039
            10,
1040
            "1d",
1041
            "low"
1042
        )
1043
1044
        # 12/20, 12/21, 12/24, 12/26, 12/27, 12/28, 12/31 should be NaNs
1045
        # 1/2 and 1/3 should be non-NaN
1046
        # 1/4 should be NaN (since we don't have minute data for it)
1047
1048
        self.assertEqual(len(window), 10)
1049
1050
        for i in range(0, 7):
1051
            self.assertTrue(np.isnan(window.iloc[i].loc[self.GS]))
1052
1053
        for i in range(8, 9):
1054
            self.assertFalse(np.isnan(window.iloc[i].loc[self.GS]))
1055
1056
        self.assertTrue(np.isnan(window.iloc[9].loc[self.GS]))
1057
1058
    def test_minute_window_ends_before_1_2_2002(self):
1059
        with self.assertRaises(ValueError):
1060
            self.data_portal.get_history_window(
1061
                [self.GS],
1062
                pd.Timestamp("2001-12-31 14:35:00", tz='UTC'),
1063
                50,
1064
                "1m",
1065
                "close_price"
1066
            )
1067
1068
    def test_bad_history_inputs(self):
1069
        portal = self.data_portal
1070
1071
        # bad fieldname
1072
        for field in ["foo", "bar", "", "5"]:
1073
            with self.assertRaises(ValueError):
1074
                portal.get_history_window(
1075
                    [self.AAPL],
1076
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
1077
                    6,
1078
                    "1d",
1079
                    field
1080
                )
1081
1082
        # bad frequency
1083
        for freq in ["2m", "30m", "3d", "300d", "", "5"]:
1084
            with self.assertRaises(ValueError):
1085
                portal.get_history_window(
1086
                    [self.AAPL],
1087
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
1088
                    6,
1089
                    freq,
1090
                    "volume"
1091
                )
1092
1093
    def test_daily_merger(self):
1094
        def check(field, ref):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1095
            window = self.data_portal.get_history_window(
1096
                [self.C],
1097
                pd.Timestamp("2014-07-17 13:35", tz='UTC'),
1098
                4,
1099
                "1d",
1100
                field
1101
            )
1102
1103
            self.assertEqual(len(window), len(ref),)
1104
1105
            for i in range(0, len(ref) - 1):
1106
                self.assertEquals(window.iloc[i].loc[self.C], ref[i], i)
1107
1108
        # 2014-07-14 00:00:00+00:00,139.18,139.14,139.2,139.17,12351
1109
        # 2014-07-15 00:00:00+00:00,139.2,139.2,139.18,139.19,12354
1110
        # 2014-07-16 00:00:00+00:00,69.58,69.56,69.57,69.565,12352
1111
        # 2014-07-17 13:31:00+00:00,72767,80146,63406,71776,12876
1112
        # 2014-07-17 13:32:00+00:00,72769,76943,68907,72925,12875
1113
        # 2014-07-17 13:33:00+00:00,72771,76127,63194,69660,12875
1114
        # 2014-07-17 13:34:00+00:00,72774,79349,69771,74560,12877
1115
        # 2014-07-17 13:35:00+00:00,72776,75340,68970,72155,12879
1116
1117
        open_ref = [69.59, 69.6, 69.58, 72.767]
1118
        high_ref = [69.57, 69.6, 69.56, 80.146]
1119
        low_ref = [69.6, 69.59, 69.57, 63.194]
1120
        close_ref = [69.585, 69.595, 69.565, 72.155]
1121
        vol_ref = [12351, 12354, 12352, 64382]
1122
1123
        check("open_price", open_ref)
1124
        check("high", high_ref)
1125
        check("low", low_ref)
1126
        check("close_price", close_ref)
1127
        check("price", close_ref)
1128
        check("volume", vol_ref)
1129
1130
    def test_minute_adjustments_as_of_lookback_date(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1131
        # AAPL has splits on 2014-03-20 and 2014-03-21
1132
        window_0320 = self.data_portal.get_history_window(
1133
            [self.AAPL],
1134
            pd.Timestamp("2014-03-20 13:35", tz='UTC'),
1135
            395,
1136
            "1m",
1137
            "open_price"
1138
        )
1139
1140
        window_0321 = self.data_portal.get_history_window(
1141
            [self.AAPL],
1142
            pd.Timestamp("2014-03-21 13:35", tz='UTC'),
1143
            785,
1144
            "1m",
1145
            "open_price"
1146
        )
1147
1148
        for i in range(0, 395):
1149
            # history on 3/20, since the 3/21 0.5 split hasn't
1150
            # happened yet, should return values 2x larger than history on
1151
            # 3/21
1152
            self.assertEqual(window_0320.iloc[i].loc[self.AAPL],
1153
                             window_0321.iloc[i].loc[self.AAPL] * 2)
1154
1155
    def test_daily_adjustments_as_of_lookback_date(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1156
        window_0402 = self.data_portal.get_history_window(
1157
            [self.IBM],
1158
            pd.Timestamp("2014-04-02 13:35", tz='UTC'),
1159
            23,
1160
            "1d",
1161
            "open_price"
1162
        )
1163
1164
        window_0702 = self.data_portal.get_history_window(
1165
            [self.IBM],
1166
            pd.Timestamp("2014-07-02 13:35", tz='UTC'),
1167
            86,
1168
            "1d",
1169
            "open_price"
1170
        )
1171
1172
        for i in range(0, 22):
1173
            self.assertEqual(window_0402.iloc[i].loc[self.IBM],
1174
                             window_0702.iloc[i].loc[self.IBM] * 2)
1175
1176
    def test_minute_dividends(self):
1177
        def check(field, ref):
1178
            window = self.data_portal.get_history_window(
1179
                [self.DIVIDEND_SID],
1180
                pd.Timestamp("2014-03-18 13:35", tz='UTC'),
1181
                10,
1182
                "1m",
1183
                field
1184
            )
1185
1186
            self.assertEqual(len(window), len(ref))
1187
1188
            np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref)
1189
1190
        # the DIVIDEND stock has dividends on 2014-03-18 (0.98)
1191
        # 2014-03-17 19:56:00+00:00,118923,123229,112445,117837,2273
1192
        # 2014-03-17 19:57:00+00:00,118927,122997,117911,120454,2274
1193
        # 2014-03-17 19:58:00+00:00,118930,129112,111136,120124,2274
1194
        # 2014-03-17 19:59:00+00:00,118932,126147,112112,119129,2276
1195
        # 2014-03-17 20:00:00+00:00,118932,124541,108717,116628,2275
1196
        # 2014-03-18 13:31:00+00:00,116457,120731,114148,117439,2274
1197
        # 2014-03-18 13:32:00+00:00,116461,116520,106572,111546,2275
1198
        # 2014-03-18 13:33:00+00:00,116461,117115,108506,112810,2274
1199
        # 2014-03-18 13:34:00+00:00,116461,119787,108861,114323,2273
1200
        # 2014-03-18 13:35:00+00:00,116464,117221,112698,114960,2272
1201
1202
        open_ref = [116.545,  # 2014-03-17 19:56:00+00:00
1203
                    116.548,  # 2014-03-17 19:57:00+00:00
1204
                    116.551,  # 2014-03-17 19:58:00+00:00
1205
                    116.553,  # 2014-03-17 19:59:00+00:00
1206
                    116.553,  # 2014-03-17 20:00:00+00:00
1207
                    116.457,  # 2014-03-18 13:31:00+00:00
1208
                    116.461,  # 2014-03-18 13:32:00+00:00
1209
                    116.461,  # 2014-03-18 13:33:00+00:00
1210
                    116.461,  # 2014-03-18 13:34:00+00:00
1211
                    116.464]  # 2014-03-18 13:35:00+00:00
1212
1213
        high_ref = [120.764,  # 2014-03-17 19:56:00+00:00
1214
                    120.537,  # 2014-03-17 19:57:00+00:00
1215
                    126.530,  # 2014-03-17 19:58:00+00:00
1216
                    123.624,  # 2014-03-17 19:59:00+00:00
1217
                    122.050,  # 2014-03-17 20:00:00+00:00
1218
                    120.731,  # 2014-03-18 13:31:00+00:00
1219
                    116.520,  # 2014-03-18 13:32:00+00:00
1220
                    117.115,  # 2014-03-18 13:33:00+00:00
1221
                    119.787,  # 2014-03-18 13:34:00+00:00
1222
                    117.221]  # 2014-03-18 13:35:00+00:00
1223
1224
        low_ref = [110.196,  # 2014-03-17 19:56:00+00:00
1225
                   115.553,  # 2014-03-17 19:57:00+00:00
1226
                   108.913,  # 2014-03-17 19:58:00+00:00
1227
                   109.870,  # 2014-03-17 19:59:00+00:00
1228
                   106.543,  # 2014-03-17 20:00:00+00:00
1229
                   114.148,  # 2014-03-18 13:31:00+00:00
1230
                   106.572,  # 2014-03-18 13:32:00+00:00
1231
                   108.506,  # 2014-03-18 13:33:00+00:00
1232
                   108.861,  # 2014-03-18 13:34:00+00:00
1233
                   112.698]  # 2014-03-18 13:35:00+00:00
1234
1235
        close_ref = [115.480,  # 2014-03-17 19:56:00+00:00
1236
                     118.045,  # 2014-03-17 19:57:00+00:00
1237
                     117.722,  # 2014-03-17 19:58:00+00:00
1238
                     116.746,  # 2014-03-17 19:59:00+00:00
1239
                     114.295,  # 2014-03-17 20:00:00+00:00
1240
                     117.439,  # 2014-03-18 13:31:00+00:00
1241
                     111.546,  # 2014-03-18 13:32:00+00:00
1242
                     112.810,  # 2014-03-18 13:33:00+00:00
1243
                     114.323,  # 2014-03-18 13:34:00+00:00
1244
                     114.960]  # 2014-03-18 13:35:00+00:00
1245
1246
        volume_ref = [2273,  # 2014-03-17 19:56:00+00:00
1247
                      2274,  # 2014-03-17 19:57:00+00:00
1248
                      2274,  # 2014-03-17 19:58:00+00:00
1249
                      2276,  # 2014-03-17 19:59:00+00:00
1250
                      2275,  # 2014-03-17 20:00:00+00:00
1251
                      2274,  # 2014-03-18 13:31:00+00:00
1252
                      2275,  # 2014-03-18 13:32:00+00:00
1253
                      2274,  # 2014-03-18 13:33:00+00:00
1254
                      2273,  # 2014-03-18 13:34:00+00:00
1255
                      2272]  # 2014-03-18 13:35:00+00:00
1256
1257
        check("open_price", open_ref)
1258
        check("high", high_ref)
1259
        check("low", low_ref)
1260
        check("close_price", close_ref)
1261
        check("price", close_ref)
1262
        check("volume", volume_ref)
1263
1264
    def test_daily_dividends(self):
1265
        def check(field, ref):
1266
            window = self.data_portal.get_history_window(
1267
                [self.DIVIDEND_SID],
1268
                pd.Timestamp("2014-03-21 13:35", tz='UTC'),
1269
                6,
1270
                "1d",
1271
                field
1272
            )
1273
1274
            self.assertEqual(len(window), len(ref))
1275
1276
            np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref)
1277
1278
        # 2014-03-14 00:00:00+00:00,106408,106527,103498,105012,950
1279
        # 2014-03-17 00:00:00+00:00,106411,110252,99877,105064,950
1280
        # 2014-03-18 00:00:00+00:00,104194,110891,95342,103116,972
1281
        # 2014-03-19 00:00:00+00:00,104198,107086,102615,104851,973
1282
        # 2014-03-20 00:00:00+00:00,100032,102989,92179,97584,1016
1283
        # 2014-03-21 13:31:00+00:00,114098,120818,110333,115575,2866
1284
        # 2014-03-21 13:32:00+00:00,114099,120157,105353,112755,2866
1285
        # 2014-03-21 13:33:00+00:00,114099,122263,108838,115550,2867
1286
        # 2014-03-21 13:34:00+00:00,114101,116620,106654,111637,2867
1287
        # 2014-03-21 13:35:00+00:00,114104,123773,107769,115771,2867
1288
1289
        open_ref = [100.108,  # 2014-03-14 00:00:00+00:00
1290
                    100.111,  # 2014-03-17 00:00:00+00:00
1291
                    100.026,  # 2014-03-18 00:00:00+00:00
1292
                    100.030,  # 2014-03-19 00:00:00+00:00
1293
                    100.032,  # 2014-03-20 00:00:00+00:00
1294
                    114.098]  # 2014-03-21 00:00:00+00:00
1295
1296
        high_ref = [100.221,  # 2014-03-14 00:00:00+00:00
1297
                    103.725,  # 2014-03-17 00:00:00+00:00
1298
                    106.455,  # 2014-03-18 00:00:00+00:00
1299
                    102.803,  # 2014-03-19 00:00:00+00:00
1300
                    102.988,  # 2014-03-20 00:00:00+00:00
1301
                    123.773]  # 2014-03-21 00:00:00+00:00
1302
1303
        low_ref = [97.370,  # 2014-03-14 00:00:00+00:00
1304
                   93.964,  # 2014-03-17 00:00:00+00:00
1305
                   91.528,  # 2014-03-18 00:00:00+00:00
1306
                   98.510,  # 2014-03-19 00:00:00+00:00
1307
                   92.179,  # 2014-03-20 00:00:00+00:00
1308
                   105.353]  # 2014-03-21 00:00:00+00:00
1309
1310
        close_ref = [98.795,  # 2014-03-14 00:00:00+00:00
1311
                     98.844,  # 2014-03-17 00:00:00+00:00
1312
                     98.991,  # 2014-03-18 00:00:00+00:00
1313
                     100.657,  # 2014-03-19 00:00:00+00:00
1314
                     97.584,  # 2014-03-20 00:00:00+00:00
1315
                     115.771]  # 2014-03-21 00:00:00+00:00
1316
1317
        volume_ref = [950,  # 2014-03-14 00:00:00+00:00
1318
                      950,  # 2014-03-17 00:00:00+00:00
1319
                      972,  # 2014-03-18 00:00:00+00:00
1320
                      973,  # 2014-03-19 00:00:00+00:00
1321
                      1016,  # 2014-03-20 00:00:00+00:00
1322
                      14333]  # 2014-03-21 00:00:00+00:00
1323
1324
        check("open_price", open_ref)
1325
        check("high", high_ref)
1326
        check("low", low_ref)
1327
        check("close_price", close_ref)
1328
        check("price", close_ref)
1329
        check("volume", volume_ref)
1330
1331
    @parameterized.expand([('open', 0),
1332
                           ('high', 10000),
1333
                           ('low', 20000),
1334
                           ('close', 30000),
1335
                           ('price', 30000),
1336
                           ('volume', 40000)])
1337
    def test_futures_history_minutes(self, field, offset):
1338
        # our history data, for self.FUTURE_ASSET, is 10,000 bars starting at
1339
        # self.futures_start_dt.  Those 10k bars are 24/7.
1340
1341
        # = 2015-11-30 18:50 UTC, 13:50 Eastern = during market hours
1342
        futures_end_dt = \
1343
            self.futures_start_dates[self.FUTURE_ASSET] + \
1344
            timedelta(minutes=9999)
1345
1346
        window = self.data_portal.get_history_window(
1347
            [self.FUTURE_ASSET],
1348
            futures_end_dt,
1349
            1000,
1350
            "1m",
1351
            field
1352
        )
1353
1354
        # check the minutes are right
1355
        reference_minutes = self.env.market_minute_window(
1356
            futures_end_dt, 1000, step=-1
1357
        )[::-1]
1358
1359
        np.testing.assert_array_equal(window.index, reference_minutes)
1360
1361
        # check the values
1362
1363
        # 2015-11-24 18:41
1364
        # ...
1365
        # 2015-11-24 21:00
1366
        # 2015-11-25 14:31
1367
        # ...
1368
        # 2015-11-25 21:00
1369
        # 2015-11-27 14:31
1370
        # ...
1371
        # 2015-11-27 18:00  # early close
1372
        # 2015-11-30 14:31
1373
        # ...
1374
        # 2015-11-30 18:50
1375
1376
        reference_values = pd.date_range(
1377
            start=self.futures_start_dates[self.FUTURE_ASSET],
1378
            end=futures_end_dt,
1379
            freq="T"
1380
        )
1381
1382
        for idx, dt in enumerate(window.index):
1383
            date_val = reference_values.searchsorted(dt)
1384
            self.assertEqual(offset + date_val,
1385
                             window.iloc[idx][self.FUTURE_ASSET])
1386
1387
    def test_history_minute_blended(self):
1388
        window = self.data_portal.get_history_window(
1389
            [self.FUTURE_ASSET2, self.AAPL],
1390
            pd.Timestamp("2014-03-21 20:00", tz='UTC'),
1391
            200,
1392
            "1m",
1393
            "price"
1394
        )
1395
1396
        # just a sanity check
1397
        self.assertEqual(200, len(window[self.AAPL]))
1398
        self.assertEqual(200, len(window[self.FUTURE_ASSET2]))
1399
1400
    def test_futures_history_daily(self):
1401
        # get 3 days ending 11/30 10:00 am Eastern
1402
        # = 11/25, 11/27 (half day), 11/30 (partial)
1403
1404
        window = self.data_portal.get_history_window(
1405
            [self.env.asset_finder.retrieve_asset(self.FUTURE_ASSET)],
1406
            pd.Timestamp("2015-11-30 15:00", tz='UTC'),
1407
            3,
1408
            "1d",
1409
            "high"
1410
        )
1411
1412
        self.assertEqual(3, len(window[self.FUTURE_ASSET]))
1413
1414
        np.testing.assert_array_equal([12929.0, 15629.0, 19769.0],
1415
                                      window.values.T[0])
1416