Completed
Pull Request — master (#858)
by Eddie
01:55
created

tests.HistoryTestCase.test_daily_dividends()   A

Complexity

Conditions 2

Size

Total Lines 66

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 2
dl 0
loc 66
rs 9.3191

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
from os.path import dirname, join, realpath
2
from textwrap import dedent
3
from unittest import TestCase
4
import bcolz
5
import os
6
from datetime import timedelta
7
from nose_parameterized import parameterized
8
from pandas.tslib import normalize_date
9
from testfixtures import TempDirectory
10
import numpy as np
11
from numpy import array
12
import pandas as pd
13
from pandas import (
14
    read_csv,
15
    Timestamp,
16
    DataFrame, DatetimeIndex)
17
18
from six import iteritems
19
from zipline import TradingAlgorithm
20
21
from zipline.data.data_portal import DataPortal
22
from zipline.data.us_equity_pricing import (
23
    DailyBarWriterFromCSVs,
24
    SQLiteAdjustmentWriter,
25
    SQLiteAdjustmentReader,
26
)
27
from zipline.errors import HistoryInInitialize
28
from zipline.utils.test_utils import (
29
    make_simple_asset_info,
30
    str_to_seconds,
31
    MockDailyBarReader
32
)
33
from zipline.data.future_pricing import FutureMinuteReader
34
from zipline.data.us_equity_pricing import BcolzDailyBarReader
35
from zipline.data.us_equity_minutes import (
36
    MinuteBarWriterFromCSVs,
37
    BcolzMinuteBarReader
38
)
39
from zipline.utils.tradingcalendar import trading_days
40
from zipline.finance.trading import (
41
    TradingEnvironment,
42
    SimulationParameters
43
)
44
45
TEST_MINUTE_RESOURCE_PATH = join(
46
    dirname(dirname(realpath(__file__))),  # zipline_repo/tests
47
    'tests',
48
    'resources',
49
    'history_inputs',
50
)
51
52
TEST_DAILY_RESOURCE_PATH = join(
53
    dirname(dirname(realpath(__file__))),  # zipline_repo/tests
54
    'tests',
55
    'resources',
56
    'pipeline_inputs',
57
)
58
59
60
class HistoryTestCase(TestCase):
61
    @classmethod
62
    def setUpClass(cls):
63
        cls.AAPL = 1
64
        cls.MSFT = 2
65
        cls.DELL = 3
66
        cls.TSLA = 4
67
        cls.BRKA = 5
68
        cls.IBM = 6
69
        cls.GS = 7
70
        cls.C = 8
71
        cls.DIVIDEND_SID = 9
72
        cls.FUTURE_ASSET = 10
73
        cls.FUTURE_ASSET2 = 11
74
        cls.assets = [cls.AAPL, cls.MSFT, cls.DELL, cls.TSLA, cls.BRKA,
75
                      cls.IBM, cls.GS, cls.C, cls.DIVIDEND_SID]
76
77
        asset_info = make_simple_asset_info(
78
            cls.assets,
79
            Timestamp('2014-03-03'),
80
            Timestamp('2014-08-30'),
81
            ['AAPL', 'MSFT', 'DELL', 'TSLA', 'BRKA', 'IBM', 'GS', 'C',
82
             'DIVIDEND_SID']
83
        )
84
        cls.env = TradingEnvironment()
85
86
        cls.env.write_data(
87
            equities_df=asset_info,
88
            futures_data={
89
                cls.FUTURE_ASSET: {
90
                    "start_date": pd.Timestamp('2015-11-23', tz='UTC'),
91
                    "end_date": pd.Timestamp('2014-12-01', tz='UTC'),
92
                    'symbol': 'TEST_FUTURE',
93
                    'asset_type': 'future',
94
                },
95
                cls.FUTURE_ASSET2: {
96
                    "start_date": pd.Timestamp('2014-03-19', tz='UTC'),
97
                    "end_date": pd.Timestamp('2014-03-22', tz='UTC'),
98
                    'symbol': 'TEST_FUTURE2',
99
                    'asset_type': 'future',
100
                }
101
            }
102
        )
103
104
        cls.tempdir = TempDirectory()
105
        cls.tempdir.create()
106
107
        try:
108
            cls.create_fake_minute_data(cls.tempdir)
109
110
            cls.futures_start_dates = {
111
                cls.FUTURE_ASSET: pd.Timestamp("2015-11-23 20:11", tz='UTC'),
112
                cls.FUTURE_ASSET2: pd.Timestamp("2014-03-19 13:31", tz='UTC')
113
            }
114
115
            futures_tempdir = os.path.join(cls.tempdir.path,
116
                                           'futures', 'minutes')
117
            os.makedirs(futures_tempdir)
118
            cls.create_fake_futures_minute_data(
119
                futures_tempdir,
120
                cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET),
121
                cls.futures_start_dates[cls.FUTURE_ASSET],
122
                cls.futures_start_dates[cls.FUTURE_ASSET] +
123
                timedelta(minutes=10000)
124
            )
125
126
            # build data for FUTURE_ASSET2 from 2014-03-19 13:31 to
127
            # 2014-03-21 20:00
128
            cls.create_fake_futures_minute_data(
129
                futures_tempdir,
130
                cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET2),
131
                cls.futures_start_dates[cls.FUTURE_ASSET2],
132
                cls.futures_start_dates[cls.FUTURE_ASSET2] +
133
                timedelta(minutes=3270)
134
            )
135
136
            cls.create_fake_daily_data(cls.tempdir)
137
138
            splits = DataFrame([
139
                {'effective_date': str_to_seconds("2002-01-03"),
140
                 'ratio': 0.5,
141
                 'sid': cls.AAPL},
142
                {'effective_date': str_to_seconds("2014-03-20"),
143
                 'ratio': 0.5,
144
                 'sid': cls.AAPL},
145
                {'effective_date': str_to_seconds("2014-03-21"),
146
                 'ratio': 0.5,
147
                 'sid': cls.AAPL},
148
                {'effective_date': str_to_seconds("2014-04-01"),
149
                 'ratio': 0.5,
150
                 'sid': cls.IBM},
151
                {'effective_date': str_to_seconds("2014-07-01"),
152
                 'ratio': 0.5,
153
                 'sid': cls.IBM},
154
                {'effective_date': str_to_seconds("2014-07-07"),
155
                 'ratio': 0.5,
156
                 'sid': cls.IBM}],
157
                columns=['effective_date', 'ratio', 'sid'],
158
            )
159
160
            mergers = DataFrame([
161
                {'effective_date': str_to_seconds("2014-07-16"),
162
                 'ratio': 0.5,
163
                 'sid': cls.C}
164
            ],
165
                columns=['effective_date', 'ratio', 'sid'])
166
167
            dividends = DataFrame([
168
                {'ex_date':
169
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
170
                 'record_date':
171
                 Timestamp("2014-03-19", tz='UTC').to_datetime64(),
172
                 'declared_date':
173
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
174
                 'pay_date':
175
                 Timestamp("2014-03-20", tz='UTC').to_datetime64(),
176
                 'amount': 2.0,
177
                 'sid': cls.DIVIDEND_SID},
178
                {'ex_date':
179
                 Timestamp("2014-03-20", tz='UTC').to_datetime64(),
180
                 'record_date':
181
                 Timestamp("2014-03-21", tz='UTC').to_datetime64(),
182
                 'declared_date':
183
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
184
                 'pay_date':
185
                 Timestamp("2014-03-23", tz='UTC').to_datetime64(),
186
                 'amount': 4.0,
187
                 'sid': cls.DIVIDEND_SID}],
188
                columns=['ex_date',
189
                         'record_date',
190
                         'declared_date',
191
                         'pay_date',
192
                         'amount',
193
                         'sid'])
194
195
            cls.create_fake_adjustments(cls.tempdir,
196
                                        "adjustments.sqlite",
197
                                        splits=splits,
198
                                        mergers=mergers,
199
                                        dividends=dividends)
200
        except:
201
            cls.tempdir.cleanup()
202
            raise
203
204
    @classmethod
205
    def tearDownClass(cls):
206
        cls.tempdir.cleanup()
207
208
    @classmethod
209
    def create_fake_futures_minute_data(cls, tempdir, asset, start_dt, end_dt):
210
        num_minutes = int((end_dt - start_dt).total_seconds() / 60)
211
212
        # need to prepend one 0 per minute between normalize_date(start_dt)
213
        # and start_dt
214
        zeroes_buffer = \
215
            [0] * int((start_dt -
216
                       normalize_date(start_dt)).total_seconds() / 60)
217
218
        future_df = pd.DataFrame({
219
            "open": np.array(zeroes_buffer +
220
                             list(range(0, num_minutes))) * 1000,
221
            "high": np.array(zeroes_buffer +
222
                             list(range(10000, 10000 + num_minutes))) * 1000,
223
            "low": np.array(zeroes_buffer +
224
                            list(range(20000, 20000 + num_minutes))) * 1000,
225
            "close": np.array(zeroes_buffer +
226
                              list(range(30000, 30000 + num_minutes))) * 1000,
227
            "volume": np.array(zeroes_buffer +
228
                               list(range(40000, 40000 + num_minutes)))
229
        })
230
231
        path = join(tempdir, "{0}.bcolz".format(asset.sid))
232
        ctable = bcolz.ctable.fromdataframe(future_df, rootdir=path)
233
234
        ctable.attrs["start_dt"] = start_dt.value / 1e9
235
        ctable.attrs["last_dt"] = end_dt.value / 1e9
236
237
    @classmethod
238
    def create_fake_minute_data(cls, tempdir):
239
        resources = {
240
            cls.AAPL: join(TEST_MINUTE_RESOURCE_PATH, 'AAPL_minute.csv.gz'),
241
            cls.MSFT: join(TEST_MINUTE_RESOURCE_PATH, 'MSFT_minute.csv.gz'),
242
            cls.DELL: join(TEST_MINUTE_RESOURCE_PATH, 'DELL_minute.csv.gz'),
243
            cls.TSLA: join(TEST_MINUTE_RESOURCE_PATH, "TSLA_minute.csv.gz"),
244
            cls.BRKA: join(TEST_MINUTE_RESOURCE_PATH, "BRKA_minute.csv.gz"),
245
            cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"),
246
            cls.GS:
247
            join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"),  # unused
248
            cls.C: join(TEST_MINUTE_RESOURCE_PATH, "C_minute.csv.gz"),
249
            cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH,
250
                                   "DIVIDEND_minute.csv.gz"),
251
        }
252
253
        equities_tempdir = os.path.join(tempdir.path, 'equity', 'minutes')
254
        os.makedirs(equities_tempdir)
255
256
        MinuteBarWriterFromCSVs(resources,
257
                                pd.Timestamp('2002-01-02', tz='UTC')).write(
258
                                    equities_tempdir, cls.assets)
259
260
    @classmethod
261
    def create_fake_daily_data(cls, tempdir):
262
        resources = {
263
            cls.AAPL: join(TEST_DAILY_RESOURCE_PATH, 'AAPL.csv'),
264
            cls.MSFT: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),
265
            cls.DELL: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),  # unused
266
            cls.TSLA: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),  # unused
267
            cls.BRKA: join(TEST_DAILY_RESOURCE_PATH, 'BRK-A.csv'),
268
            cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, 'IBM_daily.csv.gz'),
269
            cls.GS: join(TEST_MINUTE_RESOURCE_PATH, 'GS_daily.csv.gz'),
270
            cls.C: join(TEST_MINUTE_RESOURCE_PATH, 'C_daily.csv.gz'),
271
            cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH,
272
                                   'DIVIDEND_daily.csv.gz')
273
        }
274
        raw_data = {
275
            asset: read_csv(path, parse_dates=['day']).set_index('day')
276
            for asset, path in iteritems(resources)
277
        }
278
        for frame in raw_data.values():
279
            frame['price'] = frame['close']
280
281
        writer = DailyBarWriterFromCSVs(resources)
282
        data_path = tempdir.getpath('test_daily_data.bcolz')
283
        writer.write(data_path, trading_days, cls.assets)
284
285
    @classmethod
286
    def create_fake_adjustments(cls, tempdir, filename,
287
                                splits=None, mergers=None, dividends=None):
288
        writer = SQLiteAdjustmentWriter(tempdir.getpath(filename),
289
                                        cls.env.trading_days,
290
                                        MockDailyBarReader())
291
292
        if dividends is None:
293
            dividends = DataFrame(
294
                {
295
                    # Hackery to make the dtypes correct on an empty frame.
296
                    'ex_date': array([], dtype='datetime64[ns]'),
297
                    'pay_date': array([], dtype='datetime64[ns]'),
298
                    'record_date': array([], dtype='datetime64[ns]'),
299
                    'declared_date': array([], dtype='datetime64[ns]'),
300
                    'amount': array([], dtype=float),
301
                    'sid': array([], dtype=int),
302
                },
303
                index=DatetimeIndex([], tz='UTC'),
304
                columns=['ex_date',
305
                         'pay_date',
306
                         'record_date',
307
                         'declared_date',
308
                         'amount',
309
                         'sid']
310
                )
311
312
        if splits is None:
313
            splits = DataFrame(
314
                {
315
                    # Hackery to make the dtypes correct on an empty frame.
316
                    'effective_date': array([], dtype=int),
317
                    'ratio': array([], dtype=float),
318
                    'sid': array([], dtype=int),
319
                },
320
                index=DatetimeIndex([], tz='UTC'))
321
322
        if mergers is None:
323
            mergers = DataFrame(
324
                {
325
                    # Hackery to make the dtypes correct on an empty frame.
326
                    'effective_date': array([], dtype=int),
327
                    'ratio': array([], dtype=float),
328
                    'sid': array([], dtype=int),
329
                },
330
                index=DatetimeIndex([], tz='UTC'))
331
332
        writer.write(splits, mergers, dividends)
333
334
    def get_portal(self,
335
                   daily_equities_filename="test_daily_data.bcolz",
336
                   adjustments_filename="adjustments.sqlite",
337
                   env=None):
338
339
        if env is None:
340
            env = self.env
341
342
        temp_path = self.tempdir.path
343
344
        minutes_path = os.path.join(temp_path, 'equity', 'minutes')
345
        futures_path = os.path.join(temp_path, 'futures', 'minutes')
346
347
        adjustment_reader = SQLiteAdjustmentReader(
348
            join(temp_path, adjustments_filename))
349
350
        equity_minute_reader = BcolzMinuteBarReader(minutes_path)
351
352
        equity_daily_reader = BcolzDailyBarReader(
353
            join(temp_path, daily_equities_filename))
354
355
        future_minute_reader = FutureMinuteReader(futures_path)
356
357
        return DataPortal(
358
            env,
359
            equity_minute_reader=equity_minute_reader,
360
            future_minute_reader=future_minute_reader,
361
            equity_daily_reader=equity_daily_reader,
362
            adjustment_reader=adjustment_reader
363
        )
364
365
    def test_history_in_initialize(self):
366
        algo_text = dedent(
367
            """\
368
            from zipline.api import history
369
370
            def initialize(context):
371
                history([24], 10, '1d', 'price')
372
373
            def handle_data(context, data):
374
                pass
375
            """
376
        )
377
378
        start = pd.Timestamp('2007-04-05', tz='UTC')
379
        end = pd.Timestamp('2007-04-10', tz='UTC')
380
381
        sim_params = SimulationParameters(
382
            period_start=start,
383
            period_end=end,
384
            capital_base=float("1.0e5"),
385
            data_frequency='minute',
386
            emission_rate='daily',
387
            env=self.env,
388
        )
389
390
        test_algo = TradingAlgorithm(
391
            script=algo_text,
392
            data_frequency='minute',
393
            sim_params=sim_params,
394
            env=self.env,
395
        )
396
397
        with self.assertRaises(HistoryInInitialize):
398
            test_algo.initialize()
399
400
    def test_minute_basic_functionality(self):
401
        # get a 5-bar minute history from the very end of the available data
402
        window = self.get_portal().get_history_window(
403
            [1],
404
            pd.Timestamp("2014-03-21 18:23:00+00:00", tz='UTC'),
405
            5,
406
            "1m",
407
            "open_price"
408
        )
409
410
        self.assertEqual(len(window), 5)
411
        reference = [534.469, 534.471, 534.475, 534.477, 534.477]
412
        for i in range(0, 4):
413
            self.assertEqual(window.iloc[-5 + i].loc[1], reference[i])
414
415
    def test_minute_splits(self):
416
        portal = self.get_portal()
417
418
        window = portal.get_history_window(
419
            [1],
420
            pd.Timestamp("2014-03-21 18:30:00+00:00", tz='UTC'),
421
            1000,
422
            "1m",
423
            "open_price"
424
        )
425
426
        self.assertEqual(len(window), 1000)
427
428
        # there are two splits for AAPL (on 2014-03-20 and 2014-03-21),
429
        # each with ratio 0.5).
430
431
        day1_end = pd.Timestamp("2014-03-19 20:00", tz='UTC')
432
        day2_start = pd.Timestamp("2014-03-20 13:31", tz='UTC')
433
        day2_end = pd.Timestamp("2014-03-20 20:00", tz='UTC')
434
        day3_start = pd.Timestamp("2014-03-21 13:31", tz='UTC')
435
436
        self.assertEquals(window.loc[day1_end, 1], 533.086)
437
        self.assertEquals(window.loc[day2_start, 1], 533.087)
438
        self.assertEquals(window.loc[day2_end, 1], 533.853)
439
        self.assertEquals(window.loc[day3_start, 1], 533.854)
440
441
    def test_minute_window_starts_before_trading_start(self):
442
        portal = self.get_portal()
443
444
        # get a 50-bar minute history for MSFT starting 5 minutes into 3/20,
445
        # its first trading day
446
        window = portal.get_history_window(
447
            [2],
448
            pd.Timestamp("2014-03-20 13:35:00", tz='UTC'),
449
            50,
450
            "1m",
451
            "high",
452
        )
453
454
        self.assertEqual(len(window), 50)
455
        reference = [107.081, 109.476, 102.316, 107.861, 106.040]
456
        for i in range(0, 4):
457
            self.assertEqual(window.iloc[-5 + i].loc[2], reference[i])
458
459
        # get history for two securities at the same time, where one starts
460
        # trading a day later than the other
461
        window2 = portal.get_history_window(
462
            [1, 2],
463
            pd.Timestamp("2014-03-20 13:35:00", tz='UTC'),
464
            50,
465
            "1m",
466
            "low",
467
        )
468
469
        self.assertEqual(len(window2), 50)
470
        reference2 = {
471
            1: [1059.318, 1055.914, 1061.136, 1063.698, 1055.964],
472
            2: [98.902, 99.841, 90.984, 99.891, 98.027]
473
        }
474
475
        for i in range(0, 45):
476
            self.assertFalse(np.isnan(window2.iloc[i].loc[1]))
477
478
            # there should be 45 NaNs for MSFT until it starts trading
479
            self.assertTrue(np.isnan(window2.iloc[i].loc[2]))
480
481
        for i in range(0, 4):
482
            self.assertEquals(window2.iloc[-5 + i].loc[1],
483
                              reference2[1][i])
484
            self.assertEquals(window2.iloc[-5 + i].loc[2],
485
                              reference2[2][i])
486
487
    def test_minute_window_ends_before_trading_start(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
488
        # entire window is before the trading start
489
        window = self.get_portal().get_history_window(
490
            [2],
491
            pd.Timestamp("2014-02-05 14:35:00", tz='UTC'),
492
            100,
493
            "1m",
494
            "high"
495
        )
496
497
        self.assertEqual(len(window), 100)
498
        for i in range(0, 100):
499
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
500
501
    def test_minute_window_ends_after_trading_end(self):
502
        portal = self.get_portal()
503
504
        window = portal.get_history_window(
505
            [2],
506
            pd.Timestamp("2014-03-24 13:35:00", tz='UTC'),
507
            50,
508
            "1m",
509
            "high",
510
        )
511
512
        # should be 45 non-NaNs then 5 NaNs as MSFT has stopped trading at
513
        # the end of the day 2014-03-21 (and the 22nd and 23rd is weekend)
514
        self.assertEqual(len(window), 50)
515
516
        for i in range(0, 45):
517
            self.assertFalse(np.isnan(window.iloc[i].loc[2]))
518
519
        for i in range(46, 50):
520
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
521
522
    def test_minute_window_starts_after_trading_end(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
523
        # entire window is after the trading end
524
        window = self.get_portal().get_history_window(
525
            [2],
526
            pd.Timestamp("2014-04-02 14:35:00", tz='UTC'),
527
            100,
528
            "1m",
529
            "high"
530
        )
531
532
        self.assertEqual(len(window), 100)
533
        for i in range(0, 100):
534
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
535
536
    def test_minute_window_starts_before_1_2_2002(self):
537
        window = self.get_portal().get_history_window(
538
            [3],
539
            pd.Timestamp("2002-01-02 14:35:00", tz='UTC'),
540
            50,
541
            "1m",
542
            "close_price"
543
        )
544
545
        self.assertEqual(len(window), 50)
546
        for i in range(0, 45):
547
            self.assertTrue(np.isnan(window.iloc[i].loc[3]))
548
549
        for i in range(46, 50):
550
            self.assertFalse(np.isnan(window.iloc[i].loc[3]))
551
552
    def test_minute_early_close(self):
553
        # market was closed early on 7/3, and that's reflected in our
554
        # fake IBM minute data.  also, IBM had a split that takes effect
555
        # right after the early close.
556
557
        # five minutes into the day after an early close, get 20 1m bars
558
        window = self.get_portal().get_history_window(
559
            [self.IBM],
560
            pd.Timestamp("2014-07-07 13:35:00", tz='UTC'),
561
            20,
562
            "1m",
563
            "high"
564
        )
565
566
        self.assertEqual(len(window), 20)
567
568
        reference = [27134.486, 27134.802, 27134.660, 27132.813, 27130.964,
569
                     27133.767, 27133.268, 27131.510, 27134.946, 27132.400,
570
                     27134.350, 27130.588, 27132.528, 27130.418, 27131.040,
571
                     27132.664, 27131.307, 27133.978, 27132.779, 27134.476]
572
573
        for i in range(0, 20):
574
            self.assertAlmostEquals(window.iloc[i].loc[self.IBM], reference[i])
575
576
    def test_minute_merger(self):
577
        def check(field, ref):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
578
            window = self.get_portal().get_history_window(
579
                [self.C],
580
                pd.Timestamp("2014-07-16 13:35", tz='UTC'),
581
                10,
582
                "1m",
583
                field
584
            )
585
586
            self.assertEqual(len(window), len(ref))
587
588
            for i in range(0, len(ref) - 1):
589
                self.assertEquals(window.iloc[i].loc[self.C], ref[i])
590
591
        open_ref = [71.99, 71.991, 71.992, 71.996, 71.996,
592
                    72.000, 72.001, 72.002, 72.004, 72.005]
593
        high_ref = [77.334, 80.196, 80.387, 72.331, 79.184,
594
                    75.439, 81.176, 78.564, 80.498, 82.000]
595
        low_ref = [62.621, 70.427, 65.572, 68.357, 63.623,
596
                   69.805, 67.245, 64.238, 64.487, 71.864]
597
        close_ref = [69.977, 75.311, 72.979, 70.344, 71.403,
598
                     72.622, 74.210, 71.401, 72.492, 73.669]
599
        vol_ref = [12663, 12662, 12661, 12661, 12660, 12661,
600
                   12663, 12662, 12663, 12662]
601
602
        check("open_price", open_ref)
603
        check("high", high_ref)
604
        check("low", low_ref)
605
        check("close_price", close_ref)
606
        check("price", close_ref)
607
        check("volume", vol_ref)
608
609
    def test_minute_forward_fill(self):
610
        # only forward fill if ffill=True AND we are asking for "price"
611
612
        # our fake TSLA data (sid 4) is missing a bunch of minute bars
613
        # right after the open on 2002-01-02
614
615
        for field in ["open_price", "high", "low", "volume", "close_price"]:
616
            no_ffill = self.get_portal().get_history_window(
617
                [4],
618
                pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
619
                390,
620
                "1m",
621
                field
622
            )
623
624
            missing_bar_indices = [1, 3, 5, 7, 9, 11, 13]
625
            if field == 'volume':
626
                for bar_idx in missing_bar_indices:
627
                    self.assertEqual(no_ffill.iloc[bar_idx].loc[4], 0)
628
            else:
629
                for bar_idx in missing_bar_indices:
630
                    self.assertTrue(np.isnan(no_ffill.iloc[bar_idx].loc[4]))
631
632
        ffill_window = self.get_portal().get_history_window(
633
            [4],
634
            pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
635
            390,
636
            "1m",
637
            "price"
638
        )
639
640
        for i in range(0, 390):
641
            self.assertFalse(np.isnan(ffill_window.iloc[i].loc[4]))
642
643
        # 2002-01-02 14:31:00+00:00  126.183
644
        # 2002-01-02 14:32:00+00:00  126.183
645
        # 2002-01-02 14:33:00+00:00  125.648
646
        # 2002-01-02 14:34:00+00:00  125.648
647
        # 2002-01-02 14:35:00+00:00  126.016
648
        # 2002-01-02 14:36:00+00:00  126.016
649
        # 2002-01-02 14:37:00+00:00  127.918
650
        # 2002-01-02 14:38:00+00:00  127.918
651
        # 2002-01-02 14:39:00+00:00  126.423
652
        # 2002-01-02 14:40:00+00:00  126.423
653
        # 2002-01-02 14:41:00+00:00  129.825
654
        # 2002-01-02 14:42:00+00:00  129.825
655
        # 2002-01-02 14:43:00+00:00  125.392
656
        # 2002-01-02 14:44:00+00:00  125.392
657
658
        vals = [126.183, 125.648, 126.016, 127.918, 126.423, 129.825, 125.392]
659
        for idx, val in enumerate(vals):
660
            self.assertEqual(ffill_window.iloc[2 * idx].loc[4], val)
661
            self.assertEqual(ffill_window.iloc[(2 * idx) + 1].loc[4], val)
662
663
        # make sure that if we pass ffill=False with field="price", we do
664
        # not ffill
665
        really_no_ffill_window = self.get_portal().get_history_window(
666
            [4],
667
            pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
668
            390,
669
            "1m",
670
            "price",
671
            ffill=False
672
        )
673
674
        for idx, val in enumerate(vals):
675
            idx1 = 2 * idx
676
            idx2 = idx1 + 1
677
            self.assertEqual(really_no_ffill_window.iloc[idx1].loc[4], val)
678
            self.assertTrue(np.isnan(really_no_ffill_window.iloc[idx2].loc[4]))
679
680
    def test_daily_functionality(self):
681
        # 9 daily bars
682
        # 2014-03-10,183999.0,186400.0,183601.0,186400.0,400
683
        # 2014-03-11,186925.0,187490.0,185910.0,187101.0,600
684
        # 2014-03-12,186498.0,187832.0,186005.0,187750.0,300
685
        # 2014-03-13,188150.0,188852.0,185254.0,185750.0,700
686
        # 2014-03-14,185825.0,186507.0,183418.0,183860.0,600
687
        # 2014-03-17,184350.0,185790.0,184350.0,185050.0,400
688
        # 2014-03-18,185400.0,185400.0,183860.0,184860.0,200
689
        # 2014-03-19,184860.0,185489.0,182764.0,183860.0,200
690
        # 2014-03-20,183999.0,186742.0,183630.0,186540.0,300
691
692
        # 5 one-minute bars that will be aggregated
693
        # 2014-03-21 13:31:00+00:00,185422401,185426332,185413974,185420153,304
694
        # 2014-03-21 13:32:00+00:00,185422402,185424165,185417717,185420941,300
695
        # 2014-03-21 13:33:00+00:00,185422403,185430663,185419420,185425041,303
696
        # 2014-03-21 13:34:00+00:00,185422403,185431290,185417079,185424184,302
697
        # 2014-03-21 13:35:00+00:00,185422405,185430210,185416293,185423251,302
698
699
        def run_query(field, values):
700
            window = self.get_portal().get_history_window(
701
                [self.BRKA],
702
                pd.Timestamp("2014-03-21 13:35", tz='UTC'),
703
                10,
704
                "1d",
705
                field
706
            )
707
708
            self.assertEqual(len(window), 10)
709
710
            for i in range(0, 10):
711
                self.assertEquals(window.iloc[i].loc[self.BRKA],
712
                                  values[i])
713
714
        # last value is the first minute's open
715
        opens = [183999, 186925, 186498, 188150, 185825, 184350,
716
                 185400, 184860, 183999, 185422.401]
717
718
        # last value is the last minute's close
719
        closes = [186400, 187101, 187750, 185750, 183860, 185050,
720
                  184860, 183860, 186540, 185423.251]
721
722
        # last value is the highest high value
723
        highs = [186400, 187490, 187832, 188852, 186507, 185790,
724
                 185400, 185489, 186742, 185431.290]
725
726
        # last value is the lowest low value
727
        lows = [183601, 185910, 186005, 185254, 183418, 184350, 183860,
728
                182764, 183630, 185413.974]
729
730
        # last value is the sum of all the minute volumes
731
        volumes = [400, 600, 300, 700, 600, 400, 200, 200, 300, 1511]
732
733
        run_query("open_price", opens)
734
        run_query("close_price", closes)
735
        run_query("price", closes)
736
        run_query("high", highs)
737
        run_query("low", lows)
738
        run_query("volume", volumes)
739
740
    def test_daily_splits_with_no_minute_data(self):
741
        # scenario is that we have daily data for AAPL through 6/11,
742
        # but we have no minute data for AAPL on 6/11. there's also a split
743
        # for AAPL on 6/9.
744
        splits = DataFrame(
745
            [
746
                {
747
                    'effective_date': str_to_seconds('2014-06-09'),
748
                    'ratio': (1 / 7.0),
749
                    'sid': self.AAPL,
750
                }
751
            ],
752
            columns=['effective_date', 'ratio', 'sid'])
753
754
        self.create_fake_adjustments(self.tempdir,
755
                                     "adjustments2.sqlite",
756
                                     splits=splits)
757
758
        portal = self.get_portal(adjustments_filename="adjustments2.sqlite")
759
760
        def test_window(field, reference, ffill=True):
761
            window = portal.get_history_window(
762
                [self.AAPL],
763
                pd.Timestamp("2014-06-11 15:30", tz='UTC'),
764
                6,
765
                "1d",
766
                field,
767
                ffill
768
            )
769
770
            self.assertEqual(len(window), 6)
771
772
            for i in range(0, 5):
773
                self.assertEquals(window.iloc[i].loc[self.AAPL],
774
                                  reference[i])
775
776
            if ffill and field == "price":
777
                last_val = window.iloc[5].loc[self.AAPL]
778
                second_to_last_val = window.iloc[4].loc[self.AAPL]
779
780
                self.assertEqual(last_val, second_to_last_val)
781
            else:
782
                if field == "volume":
783
                    self.assertEqual(window.iloc[5].loc[self.AAPL], 0)
784
                else:
785
                    self.assertTrue(np.isnan(window.iloc[5].loc[self.AAPL]))
786
787
        # 2014-06-04,637.4400099999999,647.8899690000001,636.110046,644.819992,p
788
        # 2014-06-05,646.20005,649.370003,642.610008,647.349983,75951400
789
        # 2014-06-06,649.900002,651.259979,644.469971,645.570023,87484600
790
        # 2014-06-09,92.699997,93.879997,91.75,93.699997,75415000
791
        # 2014-06-10,94.730003,95.050003,93.57,94.25,62777000
792
        open_data = [91.063, 92.314, 92.843, 92.699, 94.730]
793
        test_window("open_price", open_data, ffill=False)
794
        test_window("open_price", open_data)
795
796
        high_data = [92.556, 92.767, 93.037, 93.879, 95.050]
797
        test_window("high", high_data, ffill=False)
798
        test_window("high", high_data)
799
800
        low_data = [90.873, 91.801, 92.067, 91.750, 93.570]
801
        test_window("low", low_data, ffill=False)
802
        test_window("low", low_data)
803
804
        close_data = [92.117, 92.478, 92.224, 93.699, 94.250]
805
        test_window("close_price", close_data, ffill=False)
806
        test_window("close_price", close_data)
807
        test_window("price", close_data, ffill=False)
808
        test_window("price", close_data)
809
810
        vol_data = [587093500, 531659800, 612392200, 75415000, 62777000]
811
        test_window("volume", vol_data)
812
        test_window("volume", vol_data, ffill=False)
813
814
    def test_daily_window_starts_before_trading_start(self):
815
        portal = self.get_portal()
816
817
        # MSFT started on 3/3/2014, so try to go before that
818
        window = portal.get_history_window(
819
            [self.MSFT],
820
            pd.Timestamp("2014-03-05 13:35:00", tz='UTC'),
821
            5,
822
            "1d",
823
            "high"
824
        )
825
826
        self.assertEqual(len(window), 5)
827
828
        # should be two empty days, then 3/3 and 3/4, then
829
        # an empty day because we don't have minute data for 3/5
830
        self.assertTrue(np.isnan(window.iloc[0].loc[self.MSFT]))
831
        self.assertTrue(np.isnan(window.iloc[1].loc[self.MSFT]))
832
        self.assertEquals(window.iloc[2].loc[self.MSFT], 38.130)
833
        self.assertEquals(window.iloc[3].loc[self.MSFT], 38.48)
834
        self.assertTrue(np.isnan(window.iloc[4].loc[self.MSFT]))
835
836
    def test_daily_window_ends_before_trading_start(self):
837
        portal = self.get_portal()
838
839
        # MSFT started on 3/3/2014, so try to go before that
840
        window = portal.get_history_window(
841
            [self.MSFT],
842
            pd.Timestamp("2014-02-28 13:35:00", tz='UTC'),
843
            5,
844
            "1d",
845
            "high"
846
        )
847
848
        self.assertEqual(len(window), 5)
849
        for i in range(0, 5):
850
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
851
852
    def test_daily_window_starts_after_trading_end(self):
853
        # MSFT stopped trading EOD Friday 8/29/2014
854
        window = self.get_portal().get_history_window(
855
            [self.MSFT],
856
            pd.Timestamp("2014-09-12 13:35:00", tz='UTC'),
857
            8,
858
            "1d",
859
            "high",
860
        )
861
862
        self.assertEqual(len(window), 8)
863
        for i in range(0, 8):
864
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
865
866
    def test_daily_window_ends_after_trading_end(self):
867
        # MSFT stopped trading EOD Friday 8/29/2014
868
        window = self.get_portal().get_history_window(
869
            [self.MSFT],
870
            pd.Timestamp("2014-09-04 13:35:00", tz='UTC'),
871
            10,
872
            "1d",
873
            "high",
874
        )
875
876
        # should be 7 non-NaNs (8/21-8/22, 8/25-8/29) and 3 NaNs (9/2 - 9/4)
877
        # (9/1/2014 is labor day)
878
        self.assertEqual(len(window), 10)
879
880
        for i in range(0, 7):
881
            self.assertFalse(np.isnan(window.iloc[i].loc[self.MSFT]))
882
883
        for i in range(7, 10):
884
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
885
886
    def test_empty_sid_list(self):
887
        portal = self.get_portal()
888
889
        fields = ["open_price",
890
                  "close_price",
891
                  "high",
892
                  "low",
893
                  "volume",
894
                  "price"]
895
        freqs = ["1m", "1d"]
896
897
        for field in fields:
898
            for freq in freqs:
899
                window = portal.get_history_window(
900
                    [],
901
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
902
                    6,
903
                    freq,
904
                    field
905
                )
906
907
                self.assertEqual(len(window), 6)
908
909
                for i in range(0, 6):
910
                    self.assertEqual(len(window.iloc[i]), 0)
911
912
    def test_daily_window_starts_before_minute_data(self):
913
914
        env = TradingEnvironment()
915
        asset_info = make_simple_asset_info(
916
            [self.GS],
917
            Timestamp('1999-04-05'),
918
            Timestamp('2004-08-30'),
919
            ['GS']
920
        )
921
        env.write_data(equities_df=asset_info)
922
        portal = self.get_portal(env=env)
923
924
        window = portal.get_history_window(
925
            [self.GS],
926
            # 3rd day of daily data for GS, minute data starts in 2002.
927
            pd.Timestamp("1999-04-07 14:35:00", tz='UTC'),
928
            10,
929
            "1d",
930
            "low"
931
        )
932
933
        # 12/20, 12/21, 12/24, 12/26, 12/27, 12/28, 12/31 should be NaNs
934
        # 1/2 and 1/3 should be non-NaN
935
        # 1/4 should be NaN (since we don't have minute data for it)
936
937
        self.assertEqual(len(window), 10)
938
939
        for i in range(0, 7):
940
            self.assertTrue(np.isnan(window.iloc[i].loc[self.GS]))
941
942
        for i in range(8, 9):
943
            self.assertFalse(np.isnan(window.iloc[i].loc[self.GS]))
944
945
        self.assertTrue(np.isnan(window.iloc[9].loc[self.GS]))
946
947
    def test_minute_window_ends_before_1_2_2002(self):
948
        with self.assertRaises(ValueError):
949
            self.get_portal().get_history_window(
950
                [self.GS],
951
                pd.Timestamp("2001-12-31 14:35:00", tz='UTC'),
952
                50,
953
                "1m",
954
                "close_price"
955
            )
956
957
    def test_bad_history_inputs(self):
958
        portal = self.get_portal()
959
960
        # bad fieldname
961
        for field in ["foo", "bar", "", "5"]:
962
            with self.assertRaises(ValueError):
963
                portal.get_history_window(
964
                    [self.AAPL],
965
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
966
                    6,
967
                    "1d",
968
                    field
969
                )
970
971
        # bad frequency
972
        for freq in ["2m", "30m", "3d", "300d", "", "5"]:
973
            with self.assertRaises(ValueError):
974
                portal.get_history_window(
975
                    [self.AAPL],
976
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
977
                    6,
978
                    freq,
979
                    "volume"
980
                )
981
982
    def test_daily_merger(self):
983
        def check(field, ref):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
984
            window = self.get_portal().get_history_window(
985
                [self.C],
986
                pd.Timestamp("2014-07-17 13:35", tz='UTC'),
987
                4,
988
                "1d",
989
                field
990
            )
991
992
            self.assertEqual(len(window), len(ref),)
993
994
            for i in range(0, len(ref) - 1):
995
                self.assertEquals(window.iloc[i].loc[self.C], ref[i], i)
996
997
        # 2014-07-14 00:00:00+00:00,139.18,139.14,139.2,139.17,12351
998
        # 2014-07-15 00:00:00+00:00,139.2,139.2,139.18,139.19,12354
999
        # 2014-07-16 00:00:00+00:00,69.58,69.56,69.57,69.565,12352
1000
        # 2014-07-17 13:31:00+00:00,72767,80146,63406,71776,12876
1001
        # 2014-07-17 13:32:00+00:00,72769,76943,68907,72925,12875
1002
        # 2014-07-17 13:33:00+00:00,72771,76127,63194,69660,12875
1003
        # 2014-07-17 13:34:00+00:00,72774,79349,69771,74560,12877
1004
        # 2014-07-17 13:35:00+00:00,72776,75340,68970,72155,12879
1005
1006
        open_ref = [69.59, 69.6, 69.58, 72.767]
1007
        high_ref = [69.57, 69.6, 69.56, 80.146]
1008
        low_ref = [69.6, 69.59, 69.57, 63.194]
1009
        close_ref = [69.585, 69.595, 69.565, 72.155]
1010
        vol_ref = [12351, 12354, 12352, 64382]
1011
1012
        check("open_price", open_ref)
1013
        check("high", high_ref)
1014
        check("low", low_ref)
1015
        check("close_price", close_ref)
1016
        check("price", close_ref)
1017
        check("volume", vol_ref)
1018
1019
    def test_minute_adjustments_as_of_lookback_date(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1020
        # AAPL has splits on 2014-03-20 and 2014-03-21
1021
        window_0320 = self.get_portal().get_history_window(
1022
            [self.AAPL],
1023
            pd.Timestamp("2014-03-20 13:35", tz='UTC'),
1024
            395,
1025
            "1m",
1026
            "open_price"
1027
        )
1028
1029
        window_0321 = self.get_portal().get_history_window(
1030
            [self.AAPL],
1031
            pd.Timestamp("2014-03-21 13:35", tz='UTC'),
1032
            785,
1033
            "1m",
1034
            "open_price"
1035
        )
1036
1037
        for i in range(0, 395):
1038
            # history on 3/20, since the 3/21 0.5 split hasn't
1039
            # happened yet, should return values 2x larger than history on
1040
            # 3/21
1041
            self.assertEqual(window_0320.iloc[i].loc[self.AAPL],
1042
                             window_0321.iloc[i].loc[self.AAPL] * 2)
1043
1044
    def test_daily_adjustments_as_of_lookback_date(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1045
        window_0402 = self.get_portal().get_history_window(
1046
            [self.IBM],
1047
            pd.Timestamp("2014-04-02 13:35", tz='UTC'),
1048
            23,
1049
            "1d",
1050
            "open_price"
1051
        )
1052
1053
        window_0702 = self.get_portal().get_history_window(
1054
            [self.IBM],
1055
            pd.Timestamp("2014-07-02 13:35", tz='UTC'),
1056
            86,
1057
            "1d",
1058
            "open_price"
1059
        )
1060
1061
        for i in range(0, 22):
1062
            self.assertEqual(window_0402.iloc[i].loc[self.IBM],
1063
                             window_0702.iloc[i].loc[self.IBM] * 2)
1064
1065
    def test_minute_dividends(self):
1066
        def check(field, ref):
1067
            window = self.get_portal().get_history_window(
1068
                [self.DIVIDEND_SID],
1069
                pd.Timestamp("2014-03-18 13:35", tz='UTC'),
1070
                10,
1071
                "1m",
1072
                field
1073
            )
1074
1075
            self.assertEqual(len(window), len(ref))
1076
1077
            np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref)
1078
1079
        # the DIVIDEND stock has dividends on 2014-03-18 (0.98)
1080
        # 2014-03-17 19:56:00+00:00,118923,123229,112445,117837,2273
1081
        # 2014-03-17 19:57:00+00:00,118927,122997,117911,120454,2274
1082
        # 2014-03-17 19:58:00+00:00,118930,129112,111136,120124,2274
1083
        # 2014-03-17 19:59:00+00:00,118932,126147,112112,119129,2276
1084
        # 2014-03-17 20:00:00+00:00,118932,124541,108717,116628,2275
1085
        # 2014-03-18 13:31:00+00:00,116457,120731,114148,117439,2274
1086
        # 2014-03-18 13:32:00+00:00,116461,116520,106572,111546,2275
1087
        # 2014-03-18 13:33:00+00:00,116461,117115,108506,112810,2274
1088
        # 2014-03-18 13:34:00+00:00,116461,119787,108861,114323,2273
1089
        # 2014-03-18 13:35:00+00:00,116464,117221,112698,114960,2272
1090
1091
        open_ref = [116.545,  # 2014-03-17 19:56:00+00:00
1092
                    116.548,  # 2014-03-17 19:57:00+00:00
1093
                    116.551,  # 2014-03-17 19:58:00+00:00
1094
                    116.553,  # 2014-03-17 19:59:00+00:00
1095
                    116.553,  # 2014-03-17 20:00:00+00:00
1096
                    116.457,  # 2014-03-18 13:31:00+00:00
1097
                    116.461,  # 2014-03-18 13:32:00+00:00
1098
                    116.461,  # 2014-03-18 13:33:00+00:00
1099
                    116.461,  # 2014-03-18 13:34:00+00:00
1100
                    116.464]  # 2014-03-18 13:35:00+00:00
1101
1102
        high_ref = [120.764,  # 2014-03-17 19:56:00+00:00
1103
                    120.537,  # 2014-03-17 19:57:00+00:00
1104
                    126.530,  # 2014-03-17 19:58:00+00:00
1105
                    123.624,  # 2014-03-17 19:59:00+00:00
1106
                    122.050,  # 2014-03-17 20:00:00+00:00
1107
                    120.731,  # 2014-03-18 13:31:00+00:00
1108
                    116.520,  # 2014-03-18 13:32:00+00:00
1109
                    117.115,  # 2014-03-18 13:33:00+00:00
1110
                    119.787,  # 2014-03-18 13:34:00+00:00
1111
                    117.221]  # 2014-03-18 13:35:00+00:00
1112
1113
        low_ref = [110.196,  # 2014-03-17 19:56:00+00:00
1114
                   115.553,  # 2014-03-17 19:57:00+00:00
1115
                   108.913,  # 2014-03-17 19:58:00+00:00
1116
                   109.870,  # 2014-03-17 19:59:00+00:00
1117
                   106.543,  # 2014-03-17 20:00:00+00:00
1118
                   114.148,  # 2014-03-18 13:31:00+00:00
1119
                   106.572,  # 2014-03-18 13:32:00+00:00
1120
                   108.506,  # 2014-03-18 13:33:00+00:00
1121
                   108.861,  # 2014-03-18 13:34:00+00:00
1122
                   112.698]  # 2014-03-18 13:35:00+00:00
1123
1124
        close_ref = [115.480,  # 2014-03-17 19:56:00+00:00
1125
                     118.045,  # 2014-03-17 19:57:00+00:00
1126
                     117.722,  # 2014-03-17 19:58:00+00:00
1127
                     116.746,  # 2014-03-17 19:59:00+00:00
1128
                     114.295,  # 2014-03-17 20:00:00+00:00
1129
                     117.439,  # 2014-03-18 13:31:00+00:00
1130
                     111.546,  # 2014-03-18 13:32:00+00:00
1131
                     112.810,  # 2014-03-18 13:33:00+00:00
1132
                     114.323,  # 2014-03-18 13:34:00+00:00
1133
                     114.960]  # 2014-03-18 13:35:00+00:00
1134
1135
        volume_ref = [2273,  # 2014-03-17 19:56:00+00:00
1136
                      2274,  # 2014-03-17 19:57:00+00:00
1137
                      2274,  # 2014-03-17 19:58:00+00:00
1138
                      2276,  # 2014-03-17 19:59:00+00:00
1139
                      2275,  # 2014-03-17 20:00:00+00:00
1140
                      2274,  # 2014-03-18 13:31:00+00:00
1141
                      2275,  # 2014-03-18 13:32:00+00:00
1142
                      2274,  # 2014-03-18 13:33:00+00:00
1143
                      2273,  # 2014-03-18 13:34:00+00:00
1144
                      2272]  # 2014-03-18 13:35:00+00:00
1145
1146
        check("open_price", open_ref)
1147
        check("high", high_ref)
1148
        check("low", low_ref)
1149
        check("close_price", close_ref)
1150
        check("price", close_ref)
1151
        check("volume", volume_ref)
1152
1153
    def test_daily_dividends(self):
1154
        def check(field, ref):
1155
            window = self.get_portal().get_history_window(
1156
                [self.DIVIDEND_SID],
1157
                pd.Timestamp("2014-03-21 13:35", tz='UTC'),
1158
                6,
1159
                "1d",
1160
                field
1161
            )
1162
1163
            self.assertEqual(len(window), len(ref))
1164
1165
            np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref)
1166
1167
        # 2014-03-14 00:00:00+00:00,106408,106527,103498,105012,950
1168
        # 2014-03-17 00:00:00+00:00,106411,110252,99877,105064,950
1169
        # 2014-03-18 00:00:00+00:00,104194,110891,95342,103116,972
1170
        # 2014-03-19 00:00:00+00:00,104198,107086,102615,104851,973
1171
        # 2014-03-20 00:00:00+00:00,100032,102989,92179,97584,1016
1172
        # 2014-03-21 13:31:00+00:00,114098,120818,110333,115575,2866
1173
        # 2014-03-21 13:32:00+00:00,114099,120157,105353,112755,2866
1174
        # 2014-03-21 13:33:00+00:00,114099,122263,108838,115550,2867
1175
        # 2014-03-21 13:34:00+00:00,114101,116620,106654,111637,2867
1176
        # 2014-03-21 13:35:00+00:00,114104,123773,107769,115771,2867
1177
1178
        open_ref = [100.108,  # 2014-03-14 00:00:00+00:00
1179
                    100.111,  # 2014-03-17 00:00:00+00:00
1180
                    100.026,  # 2014-03-18 00:00:00+00:00
1181
                    100.030,  # 2014-03-19 00:00:00+00:00
1182
                    100.032,  # 2014-03-20 00:00:00+00:00
1183
                    114.098]  # 2014-03-21 00:00:00+00:00
1184
1185
        high_ref = [100.221,  # 2014-03-14 00:00:00+00:00
1186
                    103.725,  # 2014-03-17 00:00:00+00:00
1187
                    106.455,  # 2014-03-18 00:00:00+00:00
1188
                    102.803,  # 2014-03-19 00:00:00+00:00
1189
                    102.988,  # 2014-03-20 00:00:00+00:00
1190
                    123.773]  # 2014-03-21 00:00:00+00:00
1191
1192
        low_ref = [97.370,  # 2014-03-14 00:00:00+00:00
1193
                   93.964,  # 2014-03-17 00:00:00+00:00
1194
                   91.528,  # 2014-03-18 00:00:00+00:00
1195
                   98.510,  # 2014-03-19 00:00:00+00:00
1196
                   92.179,  # 2014-03-20 00:00:00+00:00
1197
                   105.353]  # 2014-03-21 00:00:00+00:00
1198
1199
        close_ref = [98.795,  # 2014-03-14 00:00:00+00:00
1200
                     98.844,  # 2014-03-17 00:00:00+00:00
1201
                     98.991,  # 2014-03-18 00:00:00+00:00
1202
                     100.657,  # 2014-03-19 00:00:00+00:00
1203
                     97.584,  # 2014-03-20 00:00:00+00:00
1204
                     115.771]  # 2014-03-21 00:00:00+00:00
1205
1206
        volume_ref = [950,  # 2014-03-14 00:00:00+00:00
1207
                      950,  # 2014-03-17 00:00:00+00:00
1208
                      972,  # 2014-03-18 00:00:00+00:00
1209
                      973,  # 2014-03-19 00:00:00+00:00
1210
                      1016,  # 2014-03-20 00:00:00+00:00
1211
                      14333]  # 2014-03-21 00:00:00+00:00
1212
1213
        check("open_price", open_ref)
1214
        check("high", high_ref)
1215
        check("low", low_ref)
1216
        check("close_price", close_ref)
1217
        check("price", close_ref)
1218
        check("volume", volume_ref)
1219
1220
    @parameterized.expand([('open', 0),
1221
                           ('high', 10000),
1222
                           ('low', 20000),
1223
                           ('close', 30000),
1224
                           ('price', 30000),
1225
                           ('volume', 40000)])
1226
    def test_futures_history_minutes(self, field, offset):
1227
        # our history data, for self.FUTURE_ASSET, is 10,000 bars starting at
1228
        # self.futures_start_dt.  Those 10k bars are 24/7.
1229
1230
        # = 2015-11-30 18:50 UTC, 13:50 Eastern = during market hours
1231
        futures_end_dt = \
1232
            self.futures_start_dates[self.FUTURE_ASSET] + \
1233
            timedelta(minutes=9999)
1234
1235
        window = self.get_portal().get_history_window(
1236
            [self.FUTURE_ASSET],
1237
            futures_end_dt,
1238
            1000,
1239
            "1m",
1240
            field
1241
        )
1242
1243
        # check the minutes are right
1244
        reference_minutes = self.env.market_minute_window(
1245
            futures_end_dt, 1000, step=-1
1246
        )[::-1]
1247
1248
        np.testing.assert_array_equal(window.index, reference_minutes)
1249
1250
        # check the values
1251
1252
        # 2015-11-24 18:41
1253
        # ...
1254
        # 2015-11-24 21:00
1255
        # 2015-11-25 14:31
1256
        # ...
1257
        # 2015-11-25 21:00
1258
        # 2015-11-27 14:31
1259
        # ...
1260
        # 2015-11-27 18:00  # early close
1261
        # 2015-11-30 14:31
1262
        # ...
1263
        # 2015-11-30 18:50
1264
1265
        reference_values = pd.date_range(
1266
            start=self.futures_start_dates[self.FUTURE_ASSET],
1267
            end=futures_end_dt,
1268
            freq="T"
1269
        )
1270
1271
        for idx, dt in enumerate(window.index):
1272
            date_val = reference_values.searchsorted(dt)
1273
            self.assertEqual(offset + date_val,
1274
                             window.iloc[idx][self.FUTURE_ASSET])
1275
1276
    def test_history_minute_blended(self):
1277
        window = self.get_portal().get_history_window(
1278
            [self.FUTURE_ASSET2, self.AAPL],
1279
            pd.Timestamp("2014-03-21 20:00", tz='UTC'),
1280
            200,
1281
            "1m",
1282
            "price"
1283
        )
1284
1285
        # just a sanity check
1286
        self.assertEqual(200, len(window[self.AAPL]))
1287
        self.assertEqual(200, len(window[self.FUTURE_ASSET2]))
1288
1289
    def test_futures_history_daily(self):
1290
        # get 3 days ending 11/30 10:00 am Eastern
1291
        # = 11/25, 11/27 (half day), 11/30 (partial)
1292
1293
        window = self.get_portal().get_history_window(
1294
            [self.env.asset_finder.retrieve_asset(self.FUTURE_ASSET)],
1295
            pd.Timestamp("2015-11-30 15:00", tz='UTC'),
1296
            3,
1297
            "1d",
1298
            "high"
1299
        )
1300
1301
        self.assertEqual(3, len(window[self.FUTURE_ASSET]))
1302
1303
        np.testing.assert_array_equal([12929.0, 15629.0, 19769.0],
1304
                                      window.values.T[0])
1305