Completed
Pull Request — master (#858)
by Eddie
10:07 queued 01:13
created

tests.HistoryTestCase.check()   A

Complexity

Conditions 2

Size

Total Lines 13

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 2
dl 0
loc 13
rs 9.4286
1
from os.path import dirname, join, realpath
2
from textwrap import dedent
3
from unittest import TestCase
4
import bcolz
5
import os
6
from datetime import timedelta
7
from nose_parameterized import parameterized
8
from pandas.tslib import normalize_date
9
from testfixtures import TempDirectory
10
import numpy as np
11
from numpy import array
12
import pandas as pd
13
from pandas import (
14
    read_csv,
15
    Timestamp,
16
    DataFrame, DatetimeIndex)
17
18
from six import iteritems
19
from zipline import TradingAlgorithm
20
21
from zipline.data.data_portal import DataPortal
22
from zipline.data.us_equity_pricing import (
23
    DailyBarWriterFromCSVs,
24
    SQLiteAdjustmentWriter,
25
    SQLiteAdjustmentReader,
26
)
27
from zipline.errors import HistoryInInitialize
28
from zipline.utils.test_utils import (
29
    make_simple_asset_info,
30
    str_to_seconds,
31
    MockDailyBarReader
32
)
33
from zipline.data.future_pricing import FutureMinuteReader
34
from zipline.data.us_equity_pricing import BcolzDailyBarReader
35
from zipline.data.us_equity_minutes import (
36
    MinuteBarWriterFromCSVs,
37
    BcolzMinuteBarReader
38
)
39
from zipline.utils.tradingcalendar import trading_days
40
from zipline.finance.trading import (
41
    TradingEnvironment,
42
    SimulationParameters
43
)
44
45
TEST_MINUTE_RESOURCE_PATH = join(
46
    dirname(dirname(realpath(__file__))),  # zipline_repo/tests
47
    'tests',
48
    'resources',
49
    'history_inputs',
50
)
51
52
TEST_DAILY_RESOURCE_PATH = join(
53
    dirname(dirname(realpath(__file__))),  # zipline_repo/tests
54
    'tests',
55
    'resources',
56
    'pipeline_inputs',
57
)
58
59
60
class HistoryTestCase(TestCase):
61
    @classmethod
62
    def setUpClass(cls):
63
        cls.AAPL = 1
64
        cls.MSFT = 2
65
        cls.DELL = 3
66
        cls.TSLA = 4
67
        cls.BRKA = 5
68
        cls.IBM = 6
69
        cls.GS = 7
70
        cls.C = 8
71
        cls.DIVIDEND_SID = 9
72
        cls.FUTURE_ASSET = 10
73
        cls.FUTURE_ASSET2 = 11
74
        cls.FUTURE_ASSET3 = 12
75
        cls.FOO = 13
76
        cls.assets = [cls.AAPL, cls.MSFT, cls.DELL, cls.TSLA, cls.BRKA,
77
                      cls.IBM, cls.GS, cls.C, cls.DIVIDEND_SID, cls.FOO]
78
79
        asset_info = make_simple_asset_info(
80
            cls.assets,
81
            Timestamp('2014-03-03'),
82
            Timestamp('2014-08-30'),
83
            ['AAPL', 'MSFT', 'DELL', 'TSLA', 'BRKA', 'IBM', 'GS', 'C',
84
             'DIVIDEND_SID', 'FOO']
85
        )
86
        cls.env = TradingEnvironment()
87
88
        cls.env.write_data(
89
            equities_df=asset_info,
90
            futures_data={
91
                cls.FUTURE_ASSET: {
92
                    "start_date": pd.Timestamp('2015-11-23', tz='UTC'),
93
                    "end_date": pd.Timestamp('2014-12-01', tz='UTC'),
94
                    'symbol': 'TEST_FUTURE',
95
                    'asset_type': 'future',
96
                },
97
                cls.FUTURE_ASSET2: {
98
                    "start_date": pd.Timestamp('2014-03-19', tz='UTC'),
99
                    "end_date": pd.Timestamp('2014-03-22', tz='UTC'),
100
                    'symbol': 'TEST_FUTURE2',
101
                    'asset_type': 'future',
102
                },
103
                cls.FUTURE_ASSET3: {
104
                    "start_date": pd.Timestamp('2014-03-19', tz='UTC'),
105
                    "end_date": pd.Timestamp('2014-03-22', tz='UTC'),
106
                    'symbol': 'TEST_FUTURE3',
107
                    'asset_type': 'future',
108
                }
109
            }
110
        )
111
112
        cls.tempdir = TempDirectory()
113
        cls.tempdir.create()
114
115
        try:
116
            cls.create_fake_minute_data(cls.tempdir)
117
118
            cls.futures_start_dates = {
119
                cls.FUTURE_ASSET: pd.Timestamp("2015-11-23 20:11", tz='UTC'),
120
                cls.FUTURE_ASSET2: pd.Timestamp("2014-03-19 13:31", tz='UTC'),
121
                cls.FUTURE_ASSET3: pd.Timestamp("2014-03-19 13:31", tz='UTC')
122
            }
123
124
            futures_tempdir = os.path.join(cls.tempdir.path,
125
                                           'futures', 'minutes')
126
            os.makedirs(futures_tempdir)
127
            cls.create_fake_futures_minute_data(
128
                futures_tempdir,
129
                cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET),
130
                cls.futures_start_dates[cls.FUTURE_ASSET],
131
                cls.futures_start_dates[cls.FUTURE_ASSET] +
132
                timedelta(minutes=10000)
133
            )
134
135
            # build data for FUTURE_ASSET2 from 2014-03-19 13:31 to
136
            # 2014-03-21 20:00
137
            cls.create_fake_futures_minute_data(
138
                futures_tempdir,
139
                cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET2),
140
                cls.futures_start_dates[cls.FUTURE_ASSET2],
141
                cls.futures_start_dates[cls.FUTURE_ASSET2] +
142
                timedelta(minutes=3270)
143
            )
144
145
            # build data for FUTURE_ASSET3 from 2014-03-19 13:31 to
146
            # 2014-03-21 20:00.
147
            # Pause trading between 2014-03-20 14:00 and 2014-03-20 15:00
148
            gap_start = pd.Timestamp('2014-03-20 14:00', tz='UTC')
149
            gap_end = pd.Timestamp('2014-03-20 15:00', tz='UTC')
150
            cls.create_fake_futures_minute_data(
151
                futures_tempdir,
152
                cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET3),
153
                cls.futures_start_dates[cls.FUTURE_ASSET3],
154
                cls.futures_start_dates[cls.FUTURE_ASSET3] +
155
                timedelta(minutes=3270),
156
                gap_start_dt=gap_start,
157
                gap_end_dt=gap_end,
158
            )
159
160
            cls.create_fake_daily_data(cls.tempdir)
161
162
            splits = DataFrame([
163
                {'effective_date': str_to_seconds("2002-01-03"),
164
                 'ratio': 0.5,
165
                 'sid': cls.AAPL},
166
                {'effective_date': str_to_seconds("2014-03-20"),
167
                 'ratio': 0.5,
168
                 'sid': cls.AAPL},
169
                {'effective_date': str_to_seconds("2014-03-21"),
170
                 'ratio': 0.5,
171
                 'sid': cls.AAPL},
172
                {'effective_date': str_to_seconds("2014-04-01"),
173
                 'ratio': 0.5,
174
                 'sid': cls.IBM},
175
                {'effective_date': str_to_seconds("2014-07-01"),
176
                 'ratio': 0.5,
177
                 'sid': cls.IBM},
178
                {'effective_date': str_to_seconds("2014-07-07"),
179
                 'ratio': 0.5,
180
                 'sid': cls.IBM},
181
                {'effective_date': str_to_seconds("2002-03-21"),
182
                 'ratio': 0.5,
183
                 'sid': cls.FOO},
184
            ],
185
                columns=['effective_date', 'ratio', 'sid'],
186
            )
187
188
            mergers = DataFrame([
189
                {'effective_date': str_to_seconds("2014-07-16"),
190
                 'ratio': 0.5,
191
                 'sid': cls.C}
192
            ],
193
                columns=['effective_date', 'ratio', 'sid'])
194
195
            dividends = DataFrame([
196
                {'ex_date':
197
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
198
                 'record_date':
199
                 Timestamp("2014-03-19", tz='UTC').to_datetime64(),
200
                 'declared_date':
201
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
202
                 'pay_date':
203
                 Timestamp("2014-03-20", tz='UTC').to_datetime64(),
204
                 'amount': 2.0,
205
                 'sid': cls.DIVIDEND_SID},
206
                {'ex_date':
207
                 Timestamp("2014-03-20", tz='UTC').to_datetime64(),
208
                 'record_date':
209
                 Timestamp("2014-03-21", tz='UTC').to_datetime64(),
210
                 'declared_date':
211
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
212
                 'pay_date':
213
                 Timestamp("2014-03-23", tz='UTC').to_datetime64(),
214
                 'amount': 4.0,
215
                 'sid': cls.DIVIDEND_SID}],
216
                columns=['ex_date',
217
                         'record_date',
218
                         'declared_date',
219
                         'pay_date',
220
                         'amount',
221
                         'sid'])
222
223
            cls.create_fake_adjustments(cls.tempdir,
224
                                        "adjustments.sqlite",
225
                                        splits=splits,
226
                                        mergers=mergers,
227
                                        dividends=dividends)
228
229
            cls.data_portal = cls.get_portal(
230
                daily_equities_filename="test_daily_data.bcolz",
231
                adjustments_filename="adjustments.sqlite"
232
            )
233
        except:
234
            cls.tempdir.cleanup()
235
            raise
236
237
    @classmethod
238
    def tearDownClass(cls):
239
        cls.tempdir.cleanup()
240
241
    @classmethod
242
    def create_fake_futures_minute_data(cls, tempdir, asset, start_dt, end_dt,
243
                                        gap_start_dt=None, gap_end_dt=None):
244
        num_minutes = int((end_dt - start_dt).total_seconds() / 60)
245
246
        # need to prepend one 0 per minute between normalize_date(start_dt)
247
        # and start_dt
248
        zeroes_buffer = \
249
            [0] * int((start_dt -
250
                       normalize_date(start_dt)).total_seconds() / 60)
251
252
        future_df = pd.DataFrame({
253
            "open": np.array(zeroes_buffer +
254
                             list(range(0, num_minutes))) * 1000,
255
            "high": np.array(zeroes_buffer +
256
                             list(range(10000, 10000 + num_minutes))) * 1000,
257
            "low": np.array(zeroes_buffer +
258
                            list(range(20000, 20000 + num_minutes))) * 1000,
259
            "close": np.array(zeroes_buffer +
260
                              list(range(30000, 30000 + num_minutes))) * 1000,
261
            "volume": np.array(zeroes_buffer +
262
                               list(range(40000, 40000 + num_minutes)))
263
        })
264
265
        if gap_start_dt and gap_end_dt:
266
            minutes = pd.date_range(normalize_date(start_dt), end_dt, freq='T')
267
            gap_start_ix = minutes.get_loc(gap_start_dt)
268
            gap_end_ix = minutes.get_loc(gap_end_dt)
269
            future_df.iloc[gap_start_ix:gap_end_ix, :] = 0
270
271
        path = join(tempdir, "{0}.bcolz".format(asset.sid))
272
        ctable = bcolz.ctable.fromdataframe(future_df, rootdir=path)
273
274
        ctable.attrs["start_dt"] = start_dt.value / 1e9
275
        ctable.attrs["last_dt"] = end_dt.value / 1e9
276
277
    @classmethod
278
    def create_fake_minute_data(cls, tempdir):
279
        resources = {
280
            cls.AAPL: join(TEST_MINUTE_RESOURCE_PATH, 'AAPL_minute.csv.gz'),
281
            cls.MSFT: join(TEST_MINUTE_RESOURCE_PATH, 'MSFT_minute.csv.gz'),
282
            cls.DELL: join(TEST_MINUTE_RESOURCE_PATH, 'DELL_minute.csv.gz'),
283
            cls.TSLA: join(TEST_MINUTE_RESOURCE_PATH, "TSLA_minute.csv.gz"),
284
            cls.BRKA: join(TEST_MINUTE_RESOURCE_PATH, "BRKA_minute.csv.gz"),
285
            cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"),
286
            cls.GS:
287
            join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"),  # unused
288
            cls.C: join(TEST_MINUTE_RESOURCE_PATH, "C_minute.csv.gz"),
289
            cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH,
290
                                   "DIVIDEND_minute.csv.gz"),
291
            cls.FOO: join(TEST_MINUTE_RESOURCE_PATH,
292
                          "FOO_minute.csv.gz"),
293
        }
294
295
        equities_tempdir = os.path.join(tempdir.path, 'equity', 'minutes')
296
        os.makedirs(equities_tempdir)
297
298
        MinuteBarWriterFromCSVs(resources,
299
                                pd.Timestamp('2002-01-02', tz='UTC')).write(
300
                                    equities_tempdir, cls.assets)
301
302
    @classmethod
303
    def create_fake_daily_data(cls, tempdir):
304
        resources = {
305
            cls.AAPL: join(TEST_DAILY_RESOURCE_PATH, 'AAPL.csv'),
306
            cls.MSFT: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),
307
            cls.DELL: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),  # unused
308
            cls.TSLA: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),  # unused
309
            cls.BRKA: join(TEST_DAILY_RESOURCE_PATH, 'BRK-A.csv'),
310
            cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, 'IBM_daily.csv.gz'),
311
            cls.GS: join(TEST_MINUTE_RESOURCE_PATH, 'GS_daily.csv.gz'),
312
            cls.C: join(TEST_MINUTE_RESOURCE_PATH, 'C_daily.csv.gz'),
313
            cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH,
314
                                   'DIVIDEND_daily.csv.gz'),
315
            cls.FOO: join(TEST_MINUTE_RESOURCE_PATH, 'FOO_daily.csv.gz'),
316
        }
317
        raw_data = {
318
            asset: read_csv(path, parse_dates=['day']).set_index('day')
319
            for asset, path in iteritems(resources)
320
        }
321
        for frame in raw_data.values():
322
            frame['price'] = frame['close']
323
324
        writer = DailyBarWriterFromCSVs(resources)
325
        data_path = tempdir.getpath('test_daily_data.bcolz')
326
        writer.write(data_path, trading_days, cls.assets)
327
328
    @classmethod
329
    def create_fake_adjustments(cls, tempdir, filename,
330
                                splits=None, mergers=None, dividends=None):
331
        writer = SQLiteAdjustmentWriter(tempdir.getpath(filename),
332
                                        cls.env.trading_days,
333
                                        MockDailyBarReader())
334
335
        if dividends is None:
336
            dividends = DataFrame(
337
                {
338
                    # Hackery to make the dtypes correct on an empty frame.
339
                    'ex_date': array([], dtype='datetime64[ns]'),
340
                    'pay_date': array([], dtype='datetime64[ns]'),
341
                    'record_date': array([], dtype='datetime64[ns]'),
342
                    'declared_date': array([], dtype='datetime64[ns]'),
343
                    'amount': array([], dtype=float),
344
                    'sid': array([], dtype=int),
345
                },
346
                index=DatetimeIndex([], tz='UTC'),
347
                columns=['ex_date',
348
                         'pay_date',
349
                         'record_date',
350
                         'declared_date',
351
                         'amount',
352
                         'sid']
353
                )
354
355
        if splits is None:
356
            splits = DataFrame(
357
                {
358
                    # Hackery to make the dtypes correct on an empty frame.
359
                    'effective_date': array([], dtype=int),
360
                    'ratio': array([], dtype=float),
361
                    'sid': array([], dtype=int),
362
                },
363
                index=DatetimeIndex([], tz='UTC'))
364
365
        if mergers is None:
366
            mergers = DataFrame(
367
                {
368
                    # Hackery to make the dtypes correct on an empty frame.
369
                    'effective_date': array([], dtype=int),
370
                    'ratio': array([], dtype=float),
371
                    'sid': array([], dtype=int),
372
                },
373
                index=DatetimeIndex([], tz='UTC'))
374
375
        writer.write(splits, mergers, dividends)
376
377
    @classmethod
378
    def get_portal(cls,
379
                   daily_equities_filename="test_daily_data.bcolz",
380
                   adjustments_filename="adjustments.sqlite",
381
                   env=None):
382
383
        if env is None:
384
            env = cls.env
385
386
        temp_path = cls.tempdir.path
387
388
        minutes_path = os.path.join(temp_path, 'equity', 'minutes')
389
        futures_path = os.path.join(temp_path, 'futures', 'minutes')
390
391
        adjustment_reader = SQLiteAdjustmentReader(
392
            join(temp_path, adjustments_filename))
393
394
        equity_minute_reader = BcolzMinuteBarReader(minutes_path)
395
396
        equity_daily_reader = BcolzDailyBarReader(
397
            join(temp_path, daily_equities_filename))
398
399
        future_minute_reader = FutureMinuteReader(futures_path)
400
401
        return DataPortal(
402
            env,
403
            equity_minute_reader=equity_minute_reader,
404
            future_minute_reader=future_minute_reader,
405
            equity_daily_reader=equity_daily_reader,
406
            adjustment_reader=adjustment_reader
407
        )
408
409
    def test_history_in_initialize(self):
410
        algo_text = dedent(
411
            """\
412
            from zipline.api import history
413
414
            def initialize(context):
415
                history([24], 10, '1d', 'price')
416
417
            def handle_data(context, data):
418
                pass
419
            """
420
        )
421
422
        start = pd.Timestamp('2007-04-05', tz='UTC')
423
        end = pd.Timestamp('2007-04-10', tz='UTC')
424
425
        sim_params = SimulationParameters(
426
            period_start=start,
427
            period_end=end,
428
            capital_base=float("1.0e5"),
429
            data_frequency='minute',
430
            emission_rate='daily',
431
            env=self.env,
432
        )
433
434
        test_algo = TradingAlgorithm(
435
            script=algo_text,
436
            data_frequency='minute',
437
            sim_params=sim_params,
438
            env=self.env,
439
        )
440
441
        with self.assertRaises(HistoryInInitialize):
442
            test_algo.initialize()
443
444
    def test_minute_basic_functionality(self):
445
        # get a 5-bar minute history from the very end of the available data
446
        window = self.data_portal.get_history_window(
447
            [1],
448
            pd.Timestamp("2014-03-21 18:23:00+00:00", tz='UTC'),
449
            5,
450
            "1m",
451
            "open_price"
452
        )
453
454
        self.assertEqual(len(window), 5)
455
        reference = [534.469, 534.471, 534.475, 534.477, 534.477]
456
        for i in range(0, 4):
457
            self.assertEqual(window.iloc[-5 + i].loc[1], reference[i])
458
459
    def test_minute_splits(self):
460
        portal = self.data_portal
461
462
        window = portal.get_history_window(
463
            [1],
464
            pd.Timestamp("2014-03-21 18:30:00+00:00", tz='UTC'),
465
            1000,
466
            "1m",
467
            "open_price"
468
        )
469
470
        self.assertEqual(len(window), 1000)
471
472
        # there are two splits for AAPL (on 2014-03-20 and 2014-03-21),
473
        # each with ratio 0.5).
474
475
        day1_end = pd.Timestamp("2014-03-19 20:00", tz='UTC')
476
        day2_start = pd.Timestamp("2014-03-20 13:31", tz='UTC')
477
        day2_end = pd.Timestamp("2014-03-20 20:00", tz='UTC')
478
        day3_start = pd.Timestamp("2014-03-21 13:31", tz='UTC')
479
480
        self.assertEquals(window.loc[day1_end, 1], 533.086)
481
        self.assertEquals(window.loc[day2_start, 1], 533.087)
482
        self.assertEquals(window.loc[day2_end, 1], 533.853)
483
        self.assertEquals(window.loc[day3_start, 1], 533.854)
484
485
    def test_ffill_minute_equity_window_starts_with_nan(self):
486
        """
487
        Test that forward filling does not leave leading nan if there is data
488
        available before the start of the window.
489
        """
490
491
        window = self.data_portal.get_history_window(
492
            [self.FOO],
493
            pd.Timestamp("2014-03-21 13:41:00+00:00", tz='UTC'),
494
            20,
495
            "1m",
496
            "price"
497
        )
498
499
        # The previous value is on 2014-03-20, and there is a split between
500
        # the two dates, the spot price of the latest value is 1066.92, with
501
        # the expected result being 533.46 after the 2:1 split is applied.
502
        expected = np.append(np.full(19, 533.460),
503
                             np.array(529.601))
504
505
        np.testing.assert_allclose(window.loc[:, self.FOO], expected)
506
507
    def test_ffill_minute_equity_window_no_previous(self):
508
        """
509
        Test that forward filling handles the case where the window starts
510
        with a nan, and there are no previous values.
511
        """
512
513
        window = self.data_portal.get_history_window(
514
            [self.FOO],
515
            pd.Timestamp("2014-03-19 13:41:00+00:00", tz='UTC'),
516
            20,
517
            "1m",
518
            "price"
519
        )
520
521
        # There should be no values, since there is no data before 2014-03-20
522
        expected = np.full(20, np.nan)
523
524
        np.testing.assert_allclose(window.loc[:, self.FOO], expected)
525
526
    def test_ffill_minute_future_window_starts_with_nan(self):
527
        """
528
        Test that forward filling does not leave leading nan if there is data
529
        available before the start of the window.
530
        """
531
532
        window = self.data_portal.get_history_window(
533
            [self.FUTURE_ASSET3],
534
            pd.Timestamp("2014-03-20 15:00:00+00:00", tz='UTC'),
535
            20,
536
            "1m",
537
            "price"
538
        )
539
540
        # 31468 is the value at 2014-03-20 13:59, and should be the forward
541
        # filled value until 2015-03-20 15:00
542
        expected = np.append(np.full(19, 31468),
543
                             np.array(31529))
544
545
        np.testing.assert_allclose(window.loc[:, self.FUTURE_ASSET3],
546
                                   expected)
547
548
    def test_ffill_daily_equity_window_starts_with_nan(self):
549
        """
550
        Test that forward filling does not leave leading nan if there is data
551
        available before the start of the window.
552
        """
553
        window = self.data_portal.get_history_window(
554
            [self.FOO],
555
            pd.Timestamp("2014-03-21 00:00:00+00:00", tz='UTC'),
556
            2,
557
            "1d",
558
            "price"
559
        )
560
561
        # The previous value is on 2014-03-20, and there is a split between
562
        # the two dates, the spot price of the latest value is 106.692, with
563
        # the expected result being 533.46 after the 2:1 split is applied.
564
        expected = np.array([
565
            53.346,
566
            52.95,
567
        ])
568
569
        np.testing.assert_allclose(window.loc[:, self.FOO], expected)
570
571
    def test_minute_window_starts_before_trading_start(self):
572
        portal = self.data_portal
573
574
        # get a 50-bar minute history for MSFT starting 5 minutes into 3/20,
575
        # its first trading day
576
        window = portal.get_history_window(
577
            [2],
578
            pd.Timestamp("2014-03-20 13:35:00", tz='UTC'),
579
            50,
580
            "1m",
581
            "high",
582
        )
583
584
        self.assertEqual(len(window), 50)
585
        reference = [107.081, 109.476, 102.316, 107.861, 106.040]
586
        for i in range(0, 4):
587
            self.assertEqual(window.iloc[-5 + i].loc[2], reference[i])
588
589
        # get history for two securities at the same time, where one starts
590
        # trading a day later than the other
591
        window2 = portal.get_history_window(
592
            [1, 2],
593
            pd.Timestamp("2014-03-20 13:35:00", tz='UTC'),
594
            50,
595
            "1m",
596
            "low",
597
        )
598
599
        self.assertEqual(len(window2), 50)
600
        reference2 = {
601
            1: [1059.318, 1055.914, 1061.136, 1063.698, 1055.964],
602
            2: [98.902, 99.841, 90.984, 99.891, 98.027]
603
        }
604
605
        for i in range(0, 45):
606
            self.assertFalse(np.isnan(window2.iloc[i].loc[1]))
607
608
            # there should be 45 NaNs for MSFT until it starts trading
609
            self.assertTrue(np.isnan(window2.iloc[i].loc[2]))
610
611
        for i in range(0, 4):
612
            self.assertEquals(window2.iloc[-5 + i].loc[1],
613
                              reference2[1][i])
614
            self.assertEquals(window2.iloc[-5 + i].loc[2],
615
                              reference2[2][i])
616
617
    def test_minute_window_ends_before_trading_start(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
618
        # entire window is before the trading start
619
        window = self.data_portal.get_history_window(
620
            [2],
621
            pd.Timestamp("2014-02-05 14:35:00", tz='UTC'),
622
            100,
623
            "1m",
624
            "high"
625
        )
626
627
        self.assertEqual(len(window), 100)
628
        for i in range(0, 100):
629
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
630
631
    def test_minute_window_ends_after_trading_end(self):
632
        portal = self.data_portal
633
634
        window = portal.get_history_window(
635
            [2],
636
            pd.Timestamp("2014-03-24 13:35:00", tz='UTC'),
637
            50,
638
            "1m",
639
            "high",
640
        )
641
642
        # should be 45 non-NaNs then 5 NaNs as MSFT has stopped trading at
643
        # the end of the day 2014-03-21 (and the 22nd and 23rd is weekend)
644
        self.assertEqual(len(window), 50)
645
646
        for i in range(0, 45):
647
            self.assertFalse(np.isnan(window.iloc[i].loc[2]))
648
649
        for i in range(46, 50):
650
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
651
652
    def test_minute_window_starts_after_trading_end(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
653
        # entire window is after the trading end
654
        window = self.data_portal.get_history_window(
655
            [2],
656
            pd.Timestamp("2014-04-02 14:35:00", tz='UTC'),
657
            100,
658
            "1m",
659
            "high"
660
        )
661
662
        self.assertEqual(len(window), 100)
663
        for i in range(0, 100):
664
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
665
666
    def test_minute_window_starts_before_1_2_2002(self):
667
        window = self.data_portal.get_history_window(
668
            [3],
669
            pd.Timestamp("2002-01-02 14:35:00", tz='UTC'),
670
            50,
671
            "1m",
672
            "close_price"
673
        )
674
675
        self.assertEqual(len(window), 50)
676
        for i in range(0, 45):
677
            self.assertTrue(np.isnan(window.iloc[i].loc[3]))
678
679
        for i in range(46, 50):
680
            self.assertFalse(np.isnan(window.iloc[i].loc[3]))
681
682
    def test_minute_early_close(self):
683
        # market was closed early on 7/3, and that's reflected in our
684
        # fake IBM minute data.  also, IBM had a split that takes effect
685
        # right after the early close.
686
687
        # five minutes into the day after an early close, get 20 1m bars
688
        window = self.data_portal.get_history_window(
689
            [self.IBM],
690
            pd.Timestamp("2014-07-07 13:35:00", tz='UTC'),
691
            20,
692
            "1m",
693
            "high"
694
        )
695
696
        self.assertEqual(len(window), 20)
697
698
        reference = [27134.486, 27134.802, 27134.660, 27132.813, 27130.964,
699
                     27133.767, 27133.268, 27131.510, 27134.946, 27132.400,
700
                     27134.350, 27130.588, 27132.528, 27130.418, 27131.040,
701
                     27132.664, 27131.307, 27133.978, 27132.779, 27134.476]
702
703
        for i in range(0, 20):
704
            self.assertAlmostEquals(window.iloc[i].loc[self.IBM], reference[i])
705
706
    def test_minute_merger(self):
707
        def check(field, ref):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
708
            window = self.data_portal.get_history_window(
709
                [self.C],
710
                pd.Timestamp("2014-07-16 13:35", tz='UTC'),
711
                10,
712
                "1m",
713
                field
714
            )
715
716
            self.assertEqual(len(window), len(ref))
717
718
            for i in range(0, len(ref) - 1):
719
                self.assertEquals(window.iloc[i].loc[self.C], ref[i])
720
721
        open_ref = [71.99, 71.991, 71.992, 71.996, 71.996,
722
                    72.000, 72.001, 72.002, 72.004, 72.005]
723
        high_ref = [77.334, 80.196, 80.387, 72.331, 79.184,
724
                    75.439, 81.176, 78.564, 80.498, 82.000]
725
        low_ref = [62.621, 70.427, 65.572, 68.357, 63.623,
726
                   69.805, 67.245, 64.238, 64.487, 71.864]
727
        close_ref = [69.977, 75.311, 72.979, 70.344, 71.403,
728
                     72.622, 74.210, 71.401, 72.492, 73.669]
729
        vol_ref = [12663, 12662, 12661, 12661, 12660, 12661,
730
                   12663, 12662, 12663, 12662]
731
732
        check("open_price", open_ref)
733
        check("high", high_ref)
734
        check("low", low_ref)
735
        check("close_price", close_ref)
736
        check("price", close_ref)
737
        check("volume", vol_ref)
738
739
    def test_minute_forward_fill(self):
740
        # only forward fill if ffill=True AND we are asking for "price"
741
742
        # our fake TSLA data (sid 4) is missing a bunch of minute bars
743
        # right after the open on 2002-01-02
744
745
        for field in ["open_price", "high", "low", "volume", "close_price"]:
746
            no_ffill = self.data_portal.get_history_window(
747
                [4],
748
                pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
749
                390,
750
                "1m",
751
                field
752
            )
753
754
            missing_bar_indices = [1, 3, 5, 7, 9, 11, 13]
755
            if field == 'volume':
756
                for bar_idx in missing_bar_indices:
757
                    self.assertEqual(no_ffill.iloc[bar_idx].loc[4], 0)
758
            else:
759
                for bar_idx in missing_bar_indices:
760
                    self.assertTrue(np.isnan(no_ffill.iloc[bar_idx].loc[4]))
761
762
        ffill_window = self.data_portal.get_history_window(
763
            [4],
764
            pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
765
            390,
766
            "1m",
767
            "price"
768
        )
769
770
        for i in range(0, 390):
771
            self.assertFalse(np.isnan(ffill_window.iloc[i].loc[4]))
772
773
        # 2002-01-02 14:31:00+00:00  126.183
774
        # 2002-01-02 14:32:00+00:00  126.183
775
        # 2002-01-02 14:33:00+00:00  125.648
776
        # 2002-01-02 14:34:00+00:00  125.648
777
        # 2002-01-02 14:35:00+00:00  126.016
778
        # 2002-01-02 14:36:00+00:00  126.016
779
        # 2002-01-02 14:37:00+00:00  127.918
780
        # 2002-01-02 14:38:00+00:00  127.918
781
        # 2002-01-02 14:39:00+00:00  126.423
782
        # 2002-01-02 14:40:00+00:00  126.423
783
        # 2002-01-02 14:41:00+00:00  129.825
784
        # 2002-01-02 14:42:00+00:00  129.825
785
        # 2002-01-02 14:43:00+00:00  125.392
786
        # 2002-01-02 14:44:00+00:00  125.392
787
788
        vals = [126.183, 125.648, 126.016, 127.918, 126.423, 129.825, 125.392]
789
        for idx, val in enumerate(vals):
790
            self.assertEqual(ffill_window.iloc[2 * idx].loc[4], val)
791
            self.assertEqual(ffill_window.iloc[(2 * idx) + 1].loc[4], val)
792
793
        # make sure that if we pass ffill=False with field="price", we do
794
        # not ffill
795
        really_no_ffill_window = self.data_portal.get_history_window(
796
            [4],
797
            pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
798
            390,
799
            "1m",
800
            "price",
801
            ffill=False
802
        )
803
804
        for idx, val in enumerate(vals):
805
            idx1 = 2 * idx
806
            idx2 = idx1 + 1
807
            self.assertEqual(really_no_ffill_window.iloc[idx1].loc[4], val)
808
            self.assertTrue(np.isnan(really_no_ffill_window.iloc[idx2].loc[4]))
809
810
    def test_daily_functionality(self):
811
        # 9 daily bars
812
        # 2014-03-10,183999.0,186400.0,183601.0,186400.0,400
813
        # 2014-03-11,186925.0,187490.0,185910.0,187101.0,600
814
        # 2014-03-12,186498.0,187832.0,186005.0,187750.0,300
815
        # 2014-03-13,188150.0,188852.0,185254.0,185750.0,700
816
        # 2014-03-14,185825.0,186507.0,183418.0,183860.0,600
817
        # 2014-03-17,184350.0,185790.0,184350.0,185050.0,400
818
        # 2014-03-18,185400.0,185400.0,183860.0,184860.0,200
819
        # 2014-03-19,184860.0,185489.0,182764.0,183860.0,200
820
        # 2014-03-20,183999.0,186742.0,183630.0,186540.0,300
821
822
        # 5 one-minute bars that will be aggregated
823
        # 2014-03-21 13:31:00+00:00,185422401,185426332,185413974,185420153,304
824
        # 2014-03-21 13:32:00+00:00,185422402,185424165,185417717,185420941,300
825
        # 2014-03-21 13:33:00+00:00,185422403,185430663,185419420,185425041,303
826
        # 2014-03-21 13:34:00+00:00,185422403,185431290,185417079,185424184,302
827
        # 2014-03-21 13:35:00+00:00,185422405,185430210,185416293,185423251,302
828
829
        def run_query(field, values):
830
            window = self.data_portal.get_history_window(
831
                [self.BRKA],
832
                pd.Timestamp("2014-03-21 13:35", tz='UTC'),
833
                10,
834
                "1d",
835
                field
836
            )
837
838
            self.assertEqual(len(window), 10)
839
840
            for i in range(0, 10):
841
                self.assertEquals(window.iloc[i].loc[self.BRKA],
842
                                  values[i])
843
844
        # last value is the first minute's open
845
        opens = [183999, 186925, 186498, 188150, 185825, 184350,
846
                 185400, 184860, 183999, 185422.401]
847
848
        # last value is the last minute's close
849
        closes = [186400, 187101, 187750, 185750, 183860, 185050,
850
                  184860, 183860, 186540, 185423.251]
851
852
        # last value is the highest high value
853
        highs = [186400, 187490, 187832, 188852, 186507, 185790,
854
                 185400, 185489, 186742, 185431.290]
855
856
        # last value is the lowest low value
857
        lows = [183601, 185910, 186005, 185254, 183418, 184350, 183860,
858
                182764, 183630, 185413.974]
859
860
        # last value is the sum of all the minute volumes
861
        volumes = [400, 600, 300, 700, 600, 400, 200, 200, 300, 1511]
862
863
        run_query("open_price", opens)
864
        run_query("close_price", closes)
865
        run_query("price", closes)
866
        run_query("high", highs)
867
        run_query("low", lows)
868
        run_query("volume", volumes)
869
870
    def test_daily_splits_with_no_minute_data(self):
871
        # scenario is that we have daily data for AAPL through 6/11,
872
        # but we have no minute data for AAPL on 6/11. there's also a split
873
        # for AAPL on 6/9.
874
        splits = DataFrame(
875
            [
876
                {
877
                    'effective_date': str_to_seconds('2014-06-09'),
878
                    'ratio': (1 / 7.0),
879
                    'sid': self.AAPL,
880
                }
881
            ],
882
            columns=['effective_date', 'ratio', 'sid'])
883
884
        self.create_fake_adjustments(self.tempdir,
885
                                     "adjustments2.sqlite",
886
                                     splits=splits)
887
888
        portal = self.get_portal(adjustments_filename="adjustments2.sqlite")
889
890
        def test_window(field, reference, ffill=True):
891
            window = portal.get_history_window(
892
                [self.AAPL],
893
                pd.Timestamp("2014-06-11 15:30", tz='UTC'),
894
                6,
895
                "1d",
896
                field,
897
                ffill
898
            )
899
900
            self.assertEqual(len(window), 6)
901
902
            for i in range(0, 5):
903
                self.assertEquals(window.iloc[i].loc[self.AAPL],
904
                                  reference[i])
905
906
            if ffill and field == "price":
907
                last_val = window.iloc[5].loc[self.AAPL]
908
                second_to_last_val = window.iloc[4].loc[self.AAPL]
909
910
                self.assertEqual(last_val, second_to_last_val)
911
            else:
912
                if field == "volume":
913
                    self.assertEqual(window.iloc[5].loc[self.AAPL], 0)
914
                else:
915
                    self.assertTrue(np.isnan(window.iloc[5].loc[self.AAPL]))
916
917
        # 2014-06-04,637.4400099999999,647.8899690000001,636.110046,644.819992,p
918
        # 2014-06-05,646.20005,649.370003,642.610008,647.349983,75951400
919
        # 2014-06-06,649.900002,651.259979,644.469971,645.570023,87484600
920
        # 2014-06-09,92.699997,93.879997,91.75,93.699997,75415000
921
        # 2014-06-10,94.730003,95.050003,93.57,94.25,62777000
922
        open_data = [91.063, 92.314, 92.843, 92.699, 94.730]
923
        test_window("open_price", open_data, ffill=False)
924
        test_window("open_price", open_data)
925
926
        high_data = [92.556, 92.767, 93.037, 93.879, 95.050]
927
        test_window("high", high_data, ffill=False)
928
        test_window("high", high_data)
929
930
        low_data = [90.873, 91.801, 92.067, 91.750, 93.570]
931
        test_window("low", low_data, ffill=False)
932
        test_window("low", low_data)
933
934
        close_data = [92.117, 92.478, 92.224, 93.699, 94.250]
935
        test_window("close_price", close_data, ffill=False)
936
        test_window("close_price", close_data)
937
        test_window("price", close_data, ffill=False)
938
        test_window("price", close_data)
939
940
        vol_data = [587093500, 531659800, 612392200, 75415000, 62777000]
941
        test_window("volume", vol_data)
942
        test_window("volume", vol_data, ffill=False)
943
944
    def test_daily_window_starts_before_trading_start(self):
945
        portal = self.data_portal
946
947
        # MSFT started on 3/3/2014, so try to go before that
948
        window = portal.get_history_window(
949
            [self.MSFT],
950
            pd.Timestamp("2014-03-05 13:35:00", tz='UTC'),
951
            5,
952
            "1d",
953
            "high"
954
        )
955
956
        self.assertEqual(len(window), 5)
957
958
        # should be two empty days, then 3/3 and 3/4, then
959
        # an empty day because we don't have minute data for 3/5
960
        self.assertTrue(np.isnan(window.iloc[0].loc[self.MSFT]))
961
        self.assertTrue(np.isnan(window.iloc[1].loc[self.MSFT]))
962
        self.assertEquals(window.iloc[2].loc[self.MSFT], 38.130)
963
        self.assertEquals(window.iloc[3].loc[self.MSFT], 38.48)
964
        self.assertTrue(np.isnan(window.iloc[4].loc[self.MSFT]))
965
966
    def test_daily_window_ends_before_trading_start(self):
967
        portal = self.data_portal
968
969
        # MSFT started on 3/3/2014, so try to go before that
970
        window = portal.get_history_window(
971
            [self.MSFT],
972
            pd.Timestamp("2014-02-28 13:35:00", tz='UTC'),
973
            5,
974
            "1d",
975
            "high"
976
        )
977
978
        self.assertEqual(len(window), 5)
979
        for i in range(0, 5):
980
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
981
982
    def test_daily_window_starts_after_trading_end(self):
983
        # MSFT stopped trading EOD Friday 8/29/2014
984
        window = self.data_portal.get_history_window(
985
            [self.MSFT],
986
            pd.Timestamp("2014-09-12 13:35:00", tz='UTC'),
987
            8,
988
            "1d",
989
            "high",
990
        )
991
992
        self.assertEqual(len(window), 8)
993
        for i in range(0, 8):
994
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
995
996
    def test_daily_window_ends_after_trading_end(self):
997
        # MSFT stopped trading EOD Friday 8/29/2014
998
        window = self.data_portal.get_history_window(
999
            [self.MSFT],
1000
            pd.Timestamp("2014-09-04 13:35:00", tz='UTC'),
1001
            10,
1002
            "1d",
1003
            "high",
1004
        )
1005
1006
        # should be 7 non-NaNs (8/21-8/22, 8/25-8/29) and 3 NaNs (9/2 - 9/4)
1007
        # (9/1/2014 is labor day)
1008
        self.assertEqual(len(window), 10)
1009
1010
        for i in range(0, 7):
1011
            self.assertFalse(np.isnan(window.iloc[i].loc[self.MSFT]))
1012
1013
        for i in range(7, 10):
1014
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
1015
1016
    def test_empty_sid_list(self):
1017
        portal = self.data_portal
1018
1019
        fields = ["open_price",
1020
                  "close_price",
1021
                  "high",
1022
                  "low",
1023
                  "volume",
1024
                  "price"]
1025
        freqs = ["1m", "1d"]
1026
1027
        for field in fields:
1028
            for freq in freqs:
1029
                window = portal.get_history_window(
1030
                    [],
1031
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
1032
                    6,
1033
                    freq,
1034
                    field
1035
                )
1036
1037
                self.assertEqual(len(window), 6)
1038
1039
                for i in range(0, 6):
1040
                    self.assertEqual(len(window.iloc[i]), 0)
1041
1042
    def test_daily_window_starts_before_minute_data(self):
1043
1044
        env = TradingEnvironment()
1045
        asset_info = make_simple_asset_info(
1046
            [self.GS],
1047
            Timestamp('1999-04-05'),
1048
            Timestamp('2004-08-30'),
1049
            ['GS']
1050
        )
1051
        env.write_data(equities_df=asset_info)
1052
        portal = self.get_portal(env=env)
1053
1054
        window = portal.get_history_window(
1055
            [self.GS],
1056
            # 3rd day of daily data for GS, minute data starts in 2002.
1057
            pd.Timestamp("1999-04-07 14:35:00", tz='UTC'),
1058
            10,
1059
            "1d",
1060
            "low"
1061
        )
1062
1063
        # 12/20, 12/21, 12/24, 12/26, 12/27, 12/28, 12/31 should be NaNs
1064
        # 1/2 and 1/3 should be non-NaN
1065
        # 1/4 should be NaN (since we don't have minute data for it)
1066
1067
        self.assertEqual(len(window), 10)
1068
1069
        for i in range(0, 7):
1070
            self.assertTrue(np.isnan(window.iloc[i].loc[self.GS]))
1071
1072
        for i in range(8, 9):
1073
            self.assertFalse(np.isnan(window.iloc[i].loc[self.GS]))
1074
1075
        self.assertTrue(np.isnan(window.iloc[9].loc[self.GS]))
1076
1077
    def test_minute_window_ends_before_1_2_2002(self):
1078
        with self.assertRaises(ValueError):
1079
            self.data_portal.get_history_window(
1080
                [self.GS],
1081
                pd.Timestamp("2001-12-31 14:35:00", tz='UTC'),
1082
                50,
1083
                "1m",
1084
                "close_price"
1085
            )
1086
1087
    def test_bad_history_inputs(self):
1088
        portal = self.data_portal
1089
1090
        # bad fieldname
1091
        for field in ["foo", "bar", "", "5"]:
1092
            with self.assertRaises(ValueError):
1093
                portal.get_history_window(
1094
                    [self.AAPL],
1095
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
1096
                    6,
1097
                    "1d",
1098
                    field
1099
                )
1100
1101
        # bad frequency
1102
        for freq in ["2m", "30m", "3d", "300d", "", "5"]:
1103
            with self.assertRaises(ValueError):
1104
                portal.get_history_window(
1105
                    [self.AAPL],
1106
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
1107
                    6,
1108
                    freq,
1109
                    "volume"
1110
                )
1111
1112
    def test_daily_merger(self):
1113
        def check(field, ref):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1114
            window = self.data_portal.get_history_window(
1115
                [self.C],
1116
                pd.Timestamp("2014-07-17 13:35", tz='UTC'),
1117
                4,
1118
                "1d",
1119
                field
1120
            )
1121
1122
            self.assertEqual(len(window), len(ref),)
1123
1124
            for i in range(0, len(ref) - 1):
1125
                self.assertEquals(window.iloc[i].loc[self.C], ref[i], i)
1126
1127
        # 2014-07-14 00:00:00+00:00,139.18,139.14,139.2,139.17,12351
1128
        # 2014-07-15 00:00:00+00:00,139.2,139.2,139.18,139.19,12354
1129
        # 2014-07-16 00:00:00+00:00,69.58,69.56,69.57,69.565,12352
1130
        # 2014-07-17 13:31:00+00:00,72767,80146,63406,71776,12876
1131
        # 2014-07-17 13:32:00+00:00,72769,76943,68907,72925,12875
1132
        # 2014-07-17 13:33:00+00:00,72771,76127,63194,69660,12875
1133
        # 2014-07-17 13:34:00+00:00,72774,79349,69771,74560,12877
1134
        # 2014-07-17 13:35:00+00:00,72776,75340,68970,72155,12879
1135
1136
        open_ref = [69.59, 69.6, 69.58, 72.767]
1137
        high_ref = [69.57, 69.6, 69.56, 80.146]
1138
        low_ref = [69.6, 69.59, 69.57, 63.194]
1139
        close_ref = [69.585, 69.595, 69.565, 72.155]
1140
        vol_ref = [12351, 12354, 12352, 64382]
1141
1142
        check("open_price", open_ref)
1143
        check("high", high_ref)
1144
        check("low", low_ref)
1145
        check("close_price", close_ref)
1146
        check("price", close_ref)
1147
        check("volume", vol_ref)
1148
1149
    def test_minute_adjustments_as_of_lookback_date(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1150
        # AAPL has splits on 2014-03-20 and 2014-03-21
1151
        window_0320 = self.data_portal.get_history_window(
1152
            [self.AAPL],
1153
            pd.Timestamp("2014-03-20 13:35", tz='UTC'),
1154
            395,
1155
            "1m",
1156
            "open_price"
1157
        )
1158
1159
        window_0321 = self.data_portal.get_history_window(
1160
            [self.AAPL],
1161
            pd.Timestamp("2014-03-21 13:35", tz='UTC'),
1162
            785,
1163
            "1m",
1164
            "open_price"
1165
        )
1166
1167
        for i in range(0, 395):
1168
            # history on 3/20, since the 3/21 0.5 split hasn't
1169
            # happened yet, should return values 2x larger than history on
1170
            # 3/21
1171
            self.assertEqual(window_0320.iloc[i].loc[self.AAPL],
1172
                             window_0321.iloc[i].loc[self.AAPL] * 2)
1173
1174
    def test_daily_adjustments_as_of_lookback_date(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1175
        window_0402 = self.data_portal.get_history_window(
1176
            [self.IBM],
1177
            pd.Timestamp("2014-04-02 13:35", tz='UTC'),
1178
            23,
1179
            "1d",
1180
            "open_price"
1181
        )
1182
1183
        window_0702 = self.data_portal.get_history_window(
1184
            [self.IBM],
1185
            pd.Timestamp("2014-07-02 13:35", tz='UTC'),
1186
            86,
1187
            "1d",
1188
            "open_price"
1189
        )
1190
1191
        for i in range(0, 22):
1192
            self.assertEqual(window_0402.iloc[i].loc[self.IBM],
1193
                             window_0702.iloc[i].loc[self.IBM] * 2)
1194
1195
    def test_minute_dividends(self):
1196
        def check(field, ref):
1197
            window = self.data_portal.get_history_window(
1198
                [self.DIVIDEND_SID],
1199
                pd.Timestamp("2014-03-18 13:35", tz='UTC'),
1200
                10,
1201
                "1m",
1202
                field
1203
            )
1204
1205
            self.assertEqual(len(window), len(ref))
1206
1207
            np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref)
1208
1209
        # the DIVIDEND stock has dividends on 2014-03-18 (0.98)
1210
        # 2014-03-17 19:56:00+00:00,118923,123229,112445,117837,2273
1211
        # 2014-03-17 19:57:00+00:00,118927,122997,117911,120454,2274
1212
        # 2014-03-17 19:58:00+00:00,118930,129112,111136,120124,2274
1213
        # 2014-03-17 19:59:00+00:00,118932,126147,112112,119129,2276
1214
        # 2014-03-17 20:00:00+00:00,118932,124541,108717,116628,2275
1215
        # 2014-03-18 13:31:00+00:00,116457,120731,114148,117439,2274
1216
        # 2014-03-18 13:32:00+00:00,116461,116520,106572,111546,2275
1217
        # 2014-03-18 13:33:00+00:00,116461,117115,108506,112810,2274
1218
        # 2014-03-18 13:34:00+00:00,116461,119787,108861,114323,2273
1219
        # 2014-03-18 13:35:00+00:00,116464,117221,112698,114960,2272
1220
1221
        open_ref = [116.545,  # 2014-03-17 19:56:00+00:00
1222
                    116.548,  # 2014-03-17 19:57:00+00:00
1223
                    116.551,  # 2014-03-17 19:58:00+00:00
1224
                    116.553,  # 2014-03-17 19:59:00+00:00
1225
                    116.553,  # 2014-03-17 20:00:00+00:00
1226
                    116.457,  # 2014-03-18 13:31:00+00:00
1227
                    116.461,  # 2014-03-18 13:32:00+00:00
1228
                    116.461,  # 2014-03-18 13:33:00+00:00
1229
                    116.461,  # 2014-03-18 13:34:00+00:00
1230
                    116.464]  # 2014-03-18 13:35:00+00:00
1231
1232
        high_ref = [120.764,  # 2014-03-17 19:56:00+00:00
1233
                    120.537,  # 2014-03-17 19:57:00+00:00
1234
                    126.530,  # 2014-03-17 19:58:00+00:00
1235
                    123.624,  # 2014-03-17 19:59:00+00:00
1236
                    122.050,  # 2014-03-17 20:00:00+00:00
1237
                    120.731,  # 2014-03-18 13:31:00+00:00
1238
                    116.520,  # 2014-03-18 13:32:00+00:00
1239
                    117.115,  # 2014-03-18 13:33:00+00:00
1240
                    119.787,  # 2014-03-18 13:34:00+00:00
1241
                    117.221]  # 2014-03-18 13:35:00+00:00
1242
1243
        low_ref = [110.196,  # 2014-03-17 19:56:00+00:00
1244
                   115.553,  # 2014-03-17 19:57:00+00:00
1245
                   108.913,  # 2014-03-17 19:58:00+00:00
1246
                   109.870,  # 2014-03-17 19:59:00+00:00
1247
                   106.543,  # 2014-03-17 20:00:00+00:00
1248
                   114.148,  # 2014-03-18 13:31:00+00:00
1249
                   106.572,  # 2014-03-18 13:32:00+00:00
1250
                   108.506,  # 2014-03-18 13:33:00+00:00
1251
                   108.861,  # 2014-03-18 13:34:00+00:00
1252
                   112.698]  # 2014-03-18 13:35:00+00:00
1253
1254
        close_ref = [115.480,  # 2014-03-17 19:56:00+00:00
1255
                     118.045,  # 2014-03-17 19:57:00+00:00
1256
                     117.722,  # 2014-03-17 19:58:00+00:00
1257
                     116.746,  # 2014-03-17 19:59:00+00:00
1258
                     114.295,  # 2014-03-17 20:00:00+00:00
1259
                     117.439,  # 2014-03-18 13:31:00+00:00
1260
                     111.546,  # 2014-03-18 13:32:00+00:00
1261
                     112.810,  # 2014-03-18 13:33:00+00:00
1262
                     114.323,  # 2014-03-18 13:34:00+00:00
1263
                     114.960]  # 2014-03-18 13:35:00+00:00
1264
1265
        volume_ref = [2273,  # 2014-03-17 19:56:00+00:00
1266
                      2274,  # 2014-03-17 19:57:00+00:00
1267
                      2274,  # 2014-03-17 19:58:00+00:00
1268
                      2276,  # 2014-03-17 19:59:00+00:00
1269
                      2275,  # 2014-03-17 20:00:00+00:00
1270
                      2274,  # 2014-03-18 13:31:00+00:00
1271
                      2275,  # 2014-03-18 13:32:00+00:00
1272
                      2274,  # 2014-03-18 13:33:00+00:00
1273
                      2273,  # 2014-03-18 13:34:00+00:00
1274
                      2272]  # 2014-03-18 13:35:00+00:00
1275
1276
        check("open_price", open_ref)
1277
        check("high", high_ref)
1278
        check("low", low_ref)
1279
        check("close_price", close_ref)
1280
        check("price", close_ref)
1281
        check("volume", volume_ref)
1282
1283
    def test_daily_dividends(self):
1284
        def check(field, ref):
1285
            window = self.data_portal.get_history_window(
1286
                [self.DIVIDEND_SID],
1287
                pd.Timestamp("2014-03-21 13:35", tz='UTC'),
1288
                6,
1289
                "1d",
1290
                field
1291
            )
1292
1293
            self.assertEqual(len(window), len(ref))
1294
1295
            np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref)
1296
1297
        # 2014-03-14 00:00:00+00:00,106408,106527,103498,105012,950
1298
        # 2014-03-17 00:00:00+00:00,106411,110252,99877,105064,950
1299
        # 2014-03-18 00:00:00+00:00,104194,110891,95342,103116,972
1300
        # 2014-03-19 00:00:00+00:00,104198,107086,102615,104851,973
1301
        # 2014-03-20 00:00:00+00:00,100032,102989,92179,97584,1016
1302
        # 2014-03-21 13:31:00+00:00,114098,120818,110333,115575,2866
1303
        # 2014-03-21 13:32:00+00:00,114099,120157,105353,112755,2866
1304
        # 2014-03-21 13:33:00+00:00,114099,122263,108838,115550,2867
1305
        # 2014-03-21 13:34:00+00:00,114101,116620,106654,111637,2867
1306
        # 2014-03-21 13:35:00+00:00,114104,123773,107769,115771,2867
1307
1308
        open_ref = [100.108,  # 2014-03-14 00:00:00+00:00
1309
                    100.111,  # 2014-03-17 00:00:00+00:00
1310
                    100.026,  # 2014-03-18 00:00:00+00:00
1311
                    100.030,  # 2014-03-19 00:00:00+00:00
1312
                    100.032,  # 2014-03-20 00:00:00+00:00
1313
                    114.098]  # 2014-03-21 00:00:00+00:00
1314
1315
        high_ref = [100.221,  # 2014-03-14 00:00:00+00:00
1316
                    103.725,  # 2014-03-17 00:00:00+00:00
1317
                    106.455,  # 2014-03-18 00:00:00+00:00
1318
                    102.803,  # 2014-03-19 00:00:00+00:00
1319
                    102.988,  # 2014-03-20 00:00:00+00:00
1320
                    123.773]  # 2014-03-21 00:00:00+00:00
1321
1322
        low_ref = [97.370,  # 2014-03-14 00:00:00+00:00
1323
                   93.964,  # 2014-03-17 00:00:00+00:00
1324
                   91.528,  # 2014-03-18 00:00:00+00:00
1325
                   98.510,  # 2014-03-19 00:00:00+00:00
1326
                   92.179,  # 2014-03-20 00:00:00+00:00
1327
                   105.353]  # 2014-03-21 00:00:00+00:00
1328
1329
        close_ref = [98.795,  # 2014-03-14 00:00:00+00:00
1330
                     98.844,  # 2014-03-17 00:00:00+00:00
1331
                     98.991,  # 2014-03-18 00:00:00+00:00
1332
                     100.657,  # 2014-03-19 00:00:00+00:00
1333
                     97.584,  # 2014-03-20 00:00:00+00:00
1334
                     115.771]  # 2014-03-21 00:00:00+00:00
1335
1336
        volume_ref = [950,  # 2014-03-14 00:00:00+00:00
1337
                      950,  # 2014-03-17 00:00:00+00:00
1338
                      972,  # 2014-03-18 00:00:00+00:00
1339
                      973,  # 2014-03-19 00:00:00+00:00
1340
                      1016,  # 2014-03-20 00:00:00+00:00
1341
                      14333]  # 2014-03-21 00:00:00+00:00
1342
1343
        check("open_price", open_ref)
1344
        check("high", high_ref)
1345
        check("low", low_ref)
1346
        check("close_price", close_ref)
1347
        check("price", close_ref)
1348
        check("volume", volume_ref)
1349
1350
    @parameterized.expand([('open', 0),
1351
                           ('high', 10000),
1352
                           ('low', 20000),
1353
                           ('close', 30000),
1354
                           ('price', 30000),
1355
                           ('volume', 40000)])
1356
    def test_futures_history_minutes(self, field, offset):
1357
        # our history data, for self.FUTURE_ASSET, is 10,000 bars starting at
1358
        # self.futures_start_dt.  Those 10k bars are 24/7.
1359
1360
        # = 2015-11-30 18:50 UTC, 13:50 Eastern = during market hours
1361
        futures_end_dt = \
1362
            self.futures_start_dates[self.FUTURE_ASSET] + \
1363
            timedelta(minutes=9999)
1364
1365
        window = self.data_portal.get_history_window(
1366
            [self.FUTURE_ASSET],
1367
            futures_end_dt,
1368
            1000,
1369
            "1m",
1370
            field
1371
        )
1372
1373
        # check the minutes are right
1374
        reference_minutes = self.env.market_minute_window(
1375
            futures_end_dt, 1000, step=-1
1376
        )[::-1]
1377
1378
        np.testing.assert_array_equal(window.index, reference_minutes)
1379
1380
        # check the values
1381
1382
        # 2015-11-24 18:41
1383
        # ...
1384
        # 2015-11-24 21:00
1385
        # 2015-11-25 14:31
1386
        # ...
1387
        # 2015-11-25 21:00
1388
        # 2015-11-27 14:31
1389
        # ...
1390
        # 2015-11-27 18:00  # early close
1391
        # 2015-11-30 14:31
1392
        # ...
1393
        # 2015-11-30 18:50
1394
1395
        reference_values = pd.date_range(
1396
            start=self.futures_start_dates[self.FUTURE_ASSET],
1397
            end=futures_end_dt,
1398
            freq="T"
1399
        )
1400
1401
        for idx, dt in enumerate(window.index):
1402
            date_val = reference_values.searchsorted(dt)
1403
            self.assertEqual(offset + date_val,
1404
                             window.iloc[idx][self.FUTURE_ASSET])
1405
1406
    def test_history_minute_blended(self):
1407
        window = self.data_portal.get_history_window(
1408
            [self.FUTURE_ASSET2, self.AAPL],
1409
            pd.Timestamp("2014-03-21 20:00", tz='UTC'),
1410
            200,
1411
            "1m",
1412
            "price"
1413
        )
1414
1415
        # just a sanity check
1416
        self.assertEqual(200, len(window[self.AAPL]))
1417
        self.assertEqual(200, len(window[self.FUTURE_ASSET2]))
1418
1419
    def test_futures_history_daily(self):
1420
        # get 3 days ending 11/30 10:00 am Eastern
1421
        # = 11/25, 11/27 (half day), 11/30 (partial)
1422
1423
        window = self.data_portal.get_history_window(
1424
            [self.env.asset_finder.retrieve_asset(self.FUTURE_ASSET)],
1425
            pd.Timestamp("2015-11-30 15:00", tz='UTC'),
1426
            3,
1427
            "1d",
1428
            "high"
1429
        )
1430
1431
        self.assertEqual(3, len(window[self.FUTURE_ASSET]))
1432
1433
        np.testing.assert_array_equal([12929.0, 15629.0, 19769.0],
1434
                                      window.values.T[0])
1435