Completed
Pull Request — master (#858)
by Eddie
03:13
created

tests.TestHistoryAlgo   A

Complexity

Total Complexity 27

Size/Duplication

Total Lines 1046
Duplicated Lines 0 %
Metric Value
dl 0
loc 1046
rs 9.4991
wmc 27

28 Methods

Rating   Name   Duplication   Size   Complexity  
B tests.HistoryTestCase.test_daily_splits_with_no_minute_data() 0 73 6
B tests.HistoryTestCase.test_empty_sid_list() 0 25 4
B tests.HistoryTestCase.test_minute_merger() 0 32 3
A tests.HistoryTestCase.test_daily_window_ends_after_trading_end() 0 19 3
A tests.HistoryTestCase.test_minute_window_starts_before_1_2_2002() 0 15 3
A tests.HistoryTestCase.test_daily_window_starts_after_trading_end() 0 13 2
C tests.HistoryTestCase.test_minute_forward_fill() 0 70 8
B tests.HistoryTestCase.test_window() 0 26 5
A tests.HistoryTestCase.run_query() 0 14 2
A tests.HistoryTestCase.test_daily_functionality() 0 59 3
A tests.HistoryTestCase.test_minute_early_close() 0 23 2
A tests.HistoryTestCase.test_minute_window_ends_before_trading_start() 0 13 2
A tests.HistoryTestCase.test_daily_window_starts_before_trading_start() 0 21 1
A tests.HistoryTestCase.test_minute_window_starts_after_trading_end() 0 13 2
A tests.HistoryTestCase.test_minute_window_ends_after_trading_end() 0 20 3
A tests.HistoryTestCase.test_daily_window_ends_before_trading_start() 0 15 2
A tests.HistoryTestCase.test_daily_adjustments_as_of_lookback_date() 0 20 2
B tests.HistoryTestCase.test_daily_window_starts_before_minute_data() 0 34 3
A tests.HistoryTestCase.test_futures_history_minutes() 0 55 2
B tests.HistoryTestCase.test_daily_merger() 0 36 3
B tests.HistoryTestCase.test_bad_history_inputs() 0 23 5
B tests.HistoryTestCase.test_minute_adjustments_as_of_lookback_date() 0 24 2
A tests.HistoryTestCase.test_minute_window_ends_before_1_2_2002() 0 8 2
A tests.HistoryTestCase.check() 0 13 2
A tests.HistoryTestCase.test_futures_history_daily() 0 16 1
A tests.HistoryTestCase.test_history_minute_blended() 0 12 1
B tests.HistoryTestCase.test_minute_dividends() 0 87 2
A tests.HistoryTestCase.test_daily_dividends() 0 66 2
1
from os.path import dirname, join, realpath
2
from textwrap import dedent
3
from unittest import TestCase
4
import bcolz
5
from datetime import timedelta
6
from nose_parameterized import parameterized
7
from pandas.tslib import normalize_date
8
from testfixtures import TempDirectory
9
import numpy as np
10
from numpy import array
11
import pandas as pd
12
from pandas import (
13
    read_csv,
14
    Timestamp,
15
    DataFrame, DatetimeIndex)
16
17
from six import iteritems
18
from zipline import TradingAlgorithm
19
20
from zipline.data.data_portal import DataPortal
21
from zipline.data.us_equity_pricing import (
22
    DailyBarWriterFromCSVs,
23
    SQLiteAdjustmentWriter,
24
    SQLiteAdjustmentReader,
25
)
26
from zipline.errors import HistoryInInitialize
27
from zipline.utils.test_utils import (
28
    make_simple_asset_info,
29
    str_to_seconds,
30
    MockDailyBarReader
31
)
32
from zipline.data.minute_writer import MinuteBarWriterFromCSVs
33
from zipline.utils.tradingcalendar import trading_days
34
from zipline.finance.trading import (
35
    TradingEnvironment,
36
    SimulationParameters
37
)
38
39
TEST_MINUTE_RESOURCE_PATH = join(
40
    dirname(dirname(realpath(__file__))),  # zipline_repo/tests
41
    'tests',
42
    'resources',
43
    'history_inputs',
44
)
45
46
TEST_DAILY_RESOURCE_PATH = join(
47
    dirname(dirname(realpath(__file__))),  # zipline_repo/tests
48
    'tests',
49
    'resources',
50
    'pipeline_inputs',
51
)
52
53
54
class HistoryTestCase(TestCase):
55
    @classmethod
56
    def setUpClass(cls):
57
        cls.AAPL = 1
58
        cls.MSFT = 2
59
        cls.DELL = 3
60
        cls.TSLA = 4
61
        cls.BRKA = 5
62
        cls.IBM = 6
63
        cls.GS = 7
64
        cls.C = 8
65
        cls.DIVIDEND_SID = 9
66
        cls.FUTURE_ASSET = 10
67
        cls.FUTURE_ASSET2 = 11
68
        cls.assets = [cls.AAPL, cls.MSFT, cls.DELL, cls.TSLA, cls.BRKA,
69
                      cls.IBM, cls.GS, cls.C, cls.DIVIDEND_SID]
70
71
        asset_info = make_simple_asset_info(
72
            cls.assets,
73
            Timestamp('2014-03-03'),
74
            Timestamp('2014-08-30'),
75
            ['AAPL', 'MSFT', 'DELL', 'TSLA', 'BRKA', 'IBM', 'GS', 'C',
76
             'DIVIDEND_SID']
77
        )
78
        cls.env = TradingEnvironment()
79
80
        cls.env.write_data(
81
            equities_df=asset_info,
82
            futures_data={
83
                cls.FUTURE_ASSET: {
84
                    "start_date": pd.Timestamp('2015-11-23', tz='UTC'),
85
                    "end_date": pd.Timestamp('2014-12-01', tz='UTC'),
86
                    'symbol': 'TEST_FUTURE',
87
                    'asset_type': 'future',
88
                },
89
                cls.FUTURE_ASSET2: {
90
                    "start_date": pd.Timestamp('2014-03-19', tz='UTC'),
91
                    "end_date": pd.Timestamp('2014-03-22', tz='UTC'),
92
                    'symbol': 'TEST_FUTURE2',
93
                    'asset_type': 'future',
94
                }
95
            }
96
        )
97
98
        cls.tempdir = TempDirectory()
99
        cls.tempdir.create()
100
101
        try:
102
            cls.create_fake_minute_data(cls.tempdir)
103
104
            cls.futures_start_dates = {
105
                cls.FUTURE_ASSET: pd.Timestamp("2015-11-23 20:11", tz='UTC'),
106
                cls.FUTURE_ASSET2: pd.Timestamp("2014-03-19 13:31", tz='UTC')
107
            }
108
109
            cls.create_fake_futures_minute_data(
110
                cls.tempdir,
111
                cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET),
112
                cls.futures_start_dates[cls.FUTURE_ASSET],
113
                cls.futures_start_dates[cls.FUTURE_ASSET] +
114
                timedelta(minutes=10000)
115
            )
116
117
            # build data for FUTURE_ASSET2 from 2014-03-19 13:31 to
118
            # 2014-03-21 20:00
119
            cls.create_fake_futures_minute_data(
120
                cls.tempdir,
121
                cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET2),
122
                cls.futures_start_dates[cls.FUTURE_ASSET2],
123
                cls.futures_start_dates[cls.FUTURE_ASSET2] +
124
                timedelta(minutes=3270)
125
            )
126
127
            cls.create_fake_daily_data(cls.tempdir)
128
129
            splits = DataFrame([
130
                {'effective_date': str_to_seconds("2002-01-03"),
131
                 'ratio': 0.5,
132
                 'sid': cls.AAPL},
133
                {'effective_date': str_to_seconds("2014-03-20"),
134
                 'ratio': 0.5,
135
                 'sid': cls.AAPL},
136
                {'effective_date': str_to_seconds("2014-03-21"),
137
                 'ratio': 0.5,
138
                 'sid': cls.AAPL},
139
                {'effective_date': str_to_seconds("2014-04-01"),
140
                 'ratio': 0.5,
141
                 'sid': cls.IBM},
142
                {'effective_date': str_to_seconds("2014-07-01"),
143
                 'ratio': 0.5,
144
                 'sid': cls.IBM},
145
                {'effective_date': str_to_seconds("2014-07-07"),
146
                 'ratio': 0.5,
147
                 'sid': cls.IBM}],
148
                columns=['effective_date', 'ratio', 'sid'],
149
            )
150
151
            mergers = DataFrame([
152
                {'effective_date': str_to_seconds("2014-07-16"),
153
                 'ratio': 0.5,
154
                 'sid': cls.C}
155
            ],
156
                columns=['effective_date', 'ratio', 'sid'])
157
158
            dividends = DataFrame([
159
                {'ex_date':
160
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
161
                 'record_date':
162
                 Timestamp("2014-03-19", tz='UTC').to_datetime64(),
163
                 'declared_date':
164
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
165
                 'pay_date':
166
                 Timestamp("2014-03-20", tz='UTC').to_datetime64(),
167
                 'amount': 2.0,
168
                 'sid': cls.DIVIDEND_SID},
169
                {'ex_date':
170
                 Timestamp("2014-03-20", tz='UTC').to_datetime64(),
171
                 'record_date':
172
                 Timestamp("2014-03-21", tz='UTC').to_datetime64(),
173
                 'declared_date':
174
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
175
                 'pay_date':
176
                 Timestamp("2014-03-23", tz='UTC').to_datetime64(),
177
                 'amount': 4.0,
178
                 'sid': cls.DIVIDEND_SID}],
179
                columns=['ex_date',
180
                         'record_date',
181
                         'declared_date',
182
                         'pay_date',
183
                         'amount',
184
                         'sid'])
185
186
            cls.create_fake_adjustments(cls.tempdir,
187
                                        "adjustments.sqlite",
188
                                        splits=splits,
189
                                        mergers=mergers,
190
                                        dividends=dividends)
191
        except:
192
            cls.tempdir.cleanup()
193
            raise
194
195
    @classmethod
196
    def tearDownClass(cls):
197
        cls.tempdir.cleanup()
198
199
    @classmethod
200
    def create_fake_futures_minute_data(cls, tempdir, asset, start_dt, end_dt):
201
        num_minutes = int((end_dt - start_dt).total_seconds() / 60)
202
203
        # need to prepend one 0 per minute between normalize_date(start_dt)
204
        # and start_dt
205
        zeroes_buffer = \
206
            [0] * int((start_dt -
207
                       normalize_date(start_dt)).total_seconds() / 60)
208
209
        future_df = pd.DataFrame({
210
            "open": np.array(zeroes_buffer +
211
                             list(range(0, num_minutes))) * 1000,
212
            "high": np.array(zeroes_buffer +
213
                             list(range(10000, 10000 + num_minutes))) * 1000,
214
            "low": np.array(zeroes_buffer +
215
                            list(range(20000, 20000 + num_minutes))) * 1000,
216
            "close": np.array(zeroes_buffer +
217
                              list(range(30000, 30000 + num_minutes))) * 1000,
218
            "volume": np.array(zeroes_buffer +
219
                               list(range(40000, 40000 + num_minutes)))
220
        })
221
222
        path = join(tempdir.path, "{0}.bcolz".format(asset.sid))
223
        ctable = bcolz.ctable.fromdataframe(future_df, rootdir=path)
224
225
        ctable.attrs["start_dt"] = start_dt.value / 1e9
226
        ctable.attrs["last_dt"] = end_dt.value / 1e9
227
228
    @classmethod
229
    def create_fake_minute_data(cls, tempdir):
230
        resources = {
231
            cls.AAPL: join(TEST_MINUTE_RESOURCE_PATH, 'AAPL_minute.csv.gz'),
232
            cls.MSFT: join(TEST_MINUTE_RESOURCE_PATH, 'MSFT_minute.csv.gz'),
233
            cls.DELL: join(TEST_MINUTE_RESOURCE_PATH, 'DELL_minute.csv.gz'),
234
            cls.TSLA: join(TEST_MINUTE_RESOURCE_PATH, "TSLA_minute.csv.gz"),
235
            cls.BRKA: join(TEST_MINUTE_RESOURCE_PATH, "BRKA_minute.csv.gz"),
236
            cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"),
237
            cls.GS:
238
            join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"),  # unused
239
            cls.C: join(TEST_MINUTE_RESOURCE_PATH, "C_minute.csv.gz"),
240
            cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH,
241
                                   "DIVIDEND_minute.csv.gz"),
242
        }
243
244
        MinuteBarWriterFromCSVs(resources).write(tempdir.path, cls.assets)
245
246
    @classmethod
247
    def create_fake_daily_data(cls, tempdir):
248
        resources = {
249
            cls.AAPL: join(TEST_DAILY_RESOURCE_PATH, 'AAPL.csv'),
250
            cls.MSFT: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),
251
            cls.DELL: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),  # unused
252
            cls.TSLA: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),  # unused
253
            cls.BRKA: join(TEST_DAILY_RESOURCE_PATH, 'BRK-A.csv'),
254
            cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, 'IBM_daily.csv.gz'),
255
            cls.GS: join(TEST_MINUTE_RESOURCE_PATH, 'GS_daily.csv.gz'),
256
            cls.C: join(TEST_MINUTE_RESOURCE_PATH, 'C_daily.csv.gz'),
257
            cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH,
258
                                   'DIVIDEND_daily.csv.gz')
259
        }
260
        raw_data = {
261
            asset: read_csv(path, parse_dates=['day']).set_index('day')
262
            for asset, path in iteritems(resources)
263
        }
264
        for frame in raw_data.values():
265
            frame['price'] = frame['close']
266
267
        writer = DailyBarWriterFromCSVs(resources)
268
        data_path = tempdir.getpath('test_daily_data.bcolz')
269
        writer.write(data_path, trading_days, cls.assets)
270
271
    @classmethod
272
    def create_fake_adjustments(cls, tempdir, filename,
273
                                splits=None, mergers=None, dividends=None):
274
        writer = SQLiteAdjustmentWriter(tempdir.getpath(filename),
275
                                        cls.env.trading_days,
276
                                        MockDailyBarReader())
277
278
        if dividends is None:
279
            dividends = DataFrame(
280
                {
281
                    # Hackery to make the dtypes correct on an empty frame.
282
                    'ex_date': array([], dtype='datetime64[ns]'),
283
                    'pay_date': array([], dtype='datetime64[ns]'),
284
                    'record_date': array([], dtype='datetime64[ns]'),
285
                    'declared_date': array([], dtype='datetime64[ns]'),
286
                    'amount': array([], dtype=float),
287
                    'sid': array([], dtype=int),
288
                },
289
                index=DatetimeIndex([], tz='UTC'),
290
                columns=['ex_date',
291
                         'pay_date',
292
                         'record_date',
293
                         'declared_date',
294
                         'amount',
295
                         'sid']
296
                )
297
298
        if splits is None:
299
            splits = DataFrame(
300
                {
301
                    # Hackery to make the dtypes correct on an empty frame.
302
                    'effective_date': array([], dtype=int),
303
                    'ratio': array([], dtype=float),
304
                    'sid': array([], dtype=int),
305
                },
306
                index=DatetimeIndex([], tz='UTC'))
307
308
        if mergers is None:
309
            mergers = DataFrame(
310
                {
311
                    # Hackery to make the dtypes correct on an empty frame.
312
                    'effective_date': array([], dtype=int),
313
                    'ratio': array([], dtype=float),
314
                    'sid': array([], dtype=int),
315
                },
316
                index=DatetimeIndex([], tz='UTC'))
317
318
        writer.write(splits, mergers, dividends)
319
320
    def get_portal(self,
321
                   daily_equities_filename="test_daily_data.bcolz",
322
                   adjustments_filename="adjustments.sqlite",
323
                   env=None):
324
325
        if env is None:
326
            env = self.env
327
328
        temp_path = self.tempdir.path
329
330
        adjustment_reader = SQLiteAdjustmentReader(
331
            join(temp_path, adjustments_filename))
332
333
        return DataPortal(
334
            env,
335
            minutes_equities_path=temp_path,
336
            daily_equities_path=join(temp_path, daily_equities_filename),
337
            adjustment_reader=adjustment_reader
338
        )
339
340
    def test_history_in_initialize(self):
341
        algo_text = dedent(
342
            """\
343
            from zipline.api import history
344
345
            def initialize(context):
346
                history([24], 10, '1d', 'price')
347
348
            def handle_data(context, data):
349
                pass
350
            """
351
        )
352
353
        start = pd.Timestamp('2007-04-05', tz='UTC')
354
        end = pd.Timestamp('2007-04-10', tz='UTC')
355
356
        sim_params = SimulationParameters(
357
            period_start=start,
358
            period_end=end,
359
            capital_base=float("1.0e5"),
360
            data_frequency='minute',
361
            emission_rate='daily',
362
            env=self.env,
363
        )
364
365
        test_algo = TradingAlgorithm(
366
            script=algo_text,
367
            data_frequency='minute',
368
            sim_params=sim_params,
369
            env=self.env,
370
        )
371
372
        with self.assertRaises(HistoryInInitialize):
373
            test_algo.initialize()
374
375
    def test_minute_basic_functionality(self):
376
        # get a 5-bar minute history from the very end of the available data
377
        window = self.get_portal().get_history_window(
378
            [1],
379
            pd.Timestamp("2014-03-21 18:23:00+00:00", tz='UTC'),
380
            5,
381
            "1m",
382
            "open_price"
383
        )
384
385
        self.assertEqual(len(window), 5)
386
        reference = [534.469, 534.471, 534.475, 534.477, 534.477]
387
        for i in range(0, 4):
388
            self.assertEqual(window.iloc[-5 + i].loc[1], reference[i])
389
390
    def test_minute_splits(self):
391
        portal = self.get_portal()
392
393
        window = portal.get_history_window(
394
            [1],
395
            pd.Timestamp("2014-03-21 18:30:00+00:00", tz='UTC'),
396
            1000,
397
            "1m",
398
            "open_price"
399
        )
400
401
        self.assertEqual(len(window), 1000)
402
403
        # there are two splits for AAPL (on 2014-03-20 and 2014-03-21),
404
        # each with ratio 0.5).
405
406
        day1_end = pd.Timestamp("2014-03-19 20:00", tz='UTC')
407
        day2_start = pd.Timestamp("2014-03-20 13:31", tz='UTC')
408
        day2_end = pd.Timestamp("2014-03-20 20:00", tz='UTC')
409
        day3_start = pd.Timestamp("2014-03-21 13:31", tz='UTC')
410
411
        self.assertEquals(window.loc[day1_end, 1], 533.086)
412
        self.assertEquals(window.loc[day2_start, 1], 533.087)
413
        self.assertEquals(window.loc[day2_end, 1], 533.853)
414
        self.assertEquals(window.loc[day3_start, 1], 533.854)
415
416
    def test_minute_window_starts_before_trading_start(self):
417
        portal = self.get_portal()
418
419
        # get a 50-bar minute history for MSFT starting 5 minutes into 3/20,
420
        # its first trading day
421
        window = portal.get_history_window(
422
            [2],
423
            pd.Timestamp("2014-03-20 13:35:00", tz='UTC'),
424
            50,
425
            "1m",
426
            "high",
427
        )
428
429
        self.assertEqual(len(window), 50)
430
        reference = [107.081, 109.476, 102.316, 107.861, 106.040]
431
        for i in range(0, 4):
432
            self.assertEqual(window.iloc[-5 + i].loc[2], reference[i])
433
434
        # get history for two securities at the same time, where one starts
435
        # trading a day later than the other
436
        window2 = portal.get_history_window(
437
            [1, 2],
438
            pd.Timestamp("2014-03-20 13:35:00", tz='UTC'),
439
            50,
440
            "1m",
441
            "low",
442
        )
443
444
        self.assertEqual(len(window2), 50)
445
        reference2 = {
446
            1: [1059.318, 1055.914, 1061.136, 1063.698, 1055.964],
447
            2: [98.902, 99.841, 90.984, 99.891, 98.027]
448
        }
449
450
        for i in range(0, 45):
451
            self.assertFalse(np.isnan(window2.iloc[i].loc[1]))
452
453
            # there should be 45 NaNs for MSFT until it starts trading
454
            self.assertTrue(np.isnan(window2.iloc[i].loc[2]))
455
456
        for i in range(0, 4):
457
            self.assertEquals(window2.iloc[-5 + i].loc[1],
458
                              reference2[1][i])
459
            self.assertEquals(window2.iloc[-5 + i].loc[2],
460
                              reference2[2][i])
461
462
    def test_minute_window_ends_before_trading_start(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
463
        # entire window is before the trading start
464
        window = self.get_portal().get_history_window(
465
            [2],
466
            pd.Timestamp("2014-02-05 14:35:00", tz='UTC'),
467
            100,
468
            "1m",
469
            "high"
470
        )
471
472
        self.assertEqual(len(window), 100)
473
        for i in range(0, 100):
474
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
475
476
    def test_minute_window_ends_after_trading_end(self):
477
        portal = self.get_portal()
478
479
        window = portal.get_history_window(
480
            [2],
481
            pd.Timestamp("2014-03-24 13:35:00", tz='UTC'),
482
            50,
483
            "1m",
484
            "high",
485
        )
486
487
        # should be 45 non-NaNs then 5 NaNs as MSFT has stopped trading at
488
        # the end of the day 2014-03-21 (and the 22nd and 23rd is weekend)
489
        self.assertEqual(len(window), 50)
490
491
        for i in range(0, 45):
492
            self.assertFalse(np.isnan(window.iloc[i].loc[2]))
493
494
        for i in range(46, 50):
495
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
496
497
    def test_minute_window_starts_after_trading_end(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
498
        # entire window is after the trading end
499
        window = self.get_portal().get_history_window(
500
            [2],
501
            pd.Timestamp("2014-04-02 14:35:00", tz='UTC'),
502
            100,
503
            "1m",
504
            "high"
505
        )
506
507
        self.assertEqual(len(window), 100)
508
        for i in range(0, 100):
509
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
510
511
    def test_minute_window_starts_before_1_2_2002(self):
512
        window = self.get_portal().get_history_window(
513
            [3],
514
            pd.Timestamp("2002-01-02 14:35:00", tz='UTC'),
515
            50,
516
            "1m",
517
            "close_price"
518
        )
519
520
        self.assertEqual(len(window), 50)
521
        for i in range(0, 45):
522
            self.assertTrue(np.isnan(window.iloc[i].loc[3]))
523
524
        for i in range(46, 50):
525
            self.assertFalse(np.isnan(window.iloc[i].loc[3]))
526
527
    def test_minute_early_close(self):
528
        # market was closed early on 7/3, and that's reflected in our
529
        # fake IBM minute data.  also, IBM had a split that takes effect
530
        # right after the early close.
531
532
        # five minutes into the day after an early close, get 20 1m bars
533
        window = self.get_portal().get_history_window(
534
            [self.IBM],
535
            pd.Timestamp("2014-07-07 13:35:00", tz='UTC'),
536
            20,
537
            "1m",
538
            "high"
539
        )
540
541
        self.assertEqual(len(window), 20)
542
543
        reference = [27134.486, 27134.802, 27134.660, 27132.813, 27130.964,
544
                     27133.767, 27133.268, 27131.510, 27134.946, 27132.400,
545
                     27134.350, 27130.588, 27132.528, 27130.418, 27131.040,
546
                     27132.664, 27131.307, 27133.978, 27132.779, 27134.476]
547
548
        for i in range(0, 20):
549
            self.assertAlmostEquals(window.iloc[i].loc[self.IBM], reference[i])
550
551
    def test_minute_merger(self):
552
        def check(field, ref):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
553
            window = self.get_portal().get_history_window(
554
                [self.C],
555
                pd.Timestamp("2014-07-16 13:35", tz='UTC'),
556
                10,
557
                "1m",
558
                field
559
            )
560
561
            self.assertEqual(len(window), len(ref))
562
563
            for i in range(0, len(ref) - 1):
564
                self.assertEquals(window.iloc[i].loc[self.C], ref[i])
565
566
        open_ref = [71.99, 71.991, 71.992, 71.996, 71.996,
567
                    72.000, 72.001, 72.002, 72.004, 72.005]
568
        high_ref = [77.334, 80.196, 80.387, 72.331, 79.184,
569
                    75.439, 81.176, 78.564, 80.498, 82.000]
570
        low_ref = [62.621, 70.427, 65.572, 68.357, 63.623,
571
                   69.805, 67.245, 64.238, 64.487, 71.864]
572
        close_ref = [69.977, 75.311, 72.979, 70.344, 71.403,
573
                     72.622, 74.210, 71.401, 72.492, 73.669]
574
        vol_ref = [12663, 12662, 12661, 12661, 12660, 12661,
575
                   12663, 12662, 12663, 12662]
576
577
        check("open_price", open_ref)
578
        check("high", high_ref)
579
        check("low", low_ref)
580
        check("close_price", close_ref)
581
        check("price", close_ref)
582
        check("volume", vol_ref)
583
584
    def test_minute_forward_fill(self):
585
        # only forward fill if ffill=True AND we are asking for "price"
586
587
        # our fake TSLA data (sid 4) is missing a bunch of minute bars
588
        # right after the open on 2002-01-02
589
590
        for field in ["open_price", "high", "low", "volume", "close_price"]:
591
            no_ffill = self.get_portal().get_history_window(
592
                [4],
593
                pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
594
                390,
595
                "1m",
596
                field
597
            )
598
599
            missing_bar_indices = [1, 3, 5, 7, 9, 11, 13]
600
            if field == 'volume':
601
                for bar_idx in missing_bar_indices:
602
                    self.assertEqual(no_ffill.iloc[bar_idx].loc[4], 0)
603
            else:
604
                for bar_idx in missing_bar_indices:
605
                    self.assertTrue(np.isnan(no_ffill.iloc[bar_idx].loc[4]))
606
607
        ffill_window = self.get_portal().get_history_window(
608
            [4],
609
            pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
610
            390,
611
            "1m",
612
            "price"
613
        )
614
615
        for i in range(0, 390):
616
            self.assertFalse(np.isnan(ffill_window.iloc[i].loc[4]))
617
618
        # 2002-01-02 14:31:00+00:00  126.183
619
        # 2002-01-02 14:32:00+00:00  126.183
620
        # 2002-01-02 14:33:00+00:00  125.648
621
        # 2002-01-02 14:34:00+00:00  125.648
622
        # 2002-01-02 14:35:00+00:00  126.016
623
        # 2002-01-02 14:36:00+00:00  126.016
624
        # 2002-01-02 14:37:00+00:00  127.918
625
        # 2002-01-02 14:38:00+00:00  127.918
626
        # 2002-01-02 14:39:00+00:00  126.423
627
        # 2002-01-02 14:40:00+00:00  126.423
628
        # 2002-01-02 14:41:00+00:00  129.825
629
        # 2002-01-02 14:42:00+00:00  129.825
630
        # 2002-01-02 14:43:00+00:00  125.392
631
        # 2002-01-02 14:44:00+00:00  125.392
632
633
        vals = [126.183, 125.648, 126.016, 127.918, 126.423, 129.825, 125.392]
634
        for idx, val in enumerate(vals):
635
            self.assertEqual(ffill_window.iloc[2 * idx].loc[4], val)
636
            self.assertEqual(ffill_window.iloc[(2 * idx) + 1].loc[4], val)
637
638
        # make sure that if we pass ffill=False with field="price", we do
639
        # not ffill
640
        really_no_ffill_window = self.get_portal().get_history_window(
641
            [4],
642
            pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
643
            390,
644
            "1m",
645
            "price",
646
            ffill=False
647
        )
648
649
        for idx, val in enumerate(vals):
650
            idx1 = 2 * idx
651
            idx2 = idx1 + 1
652
            self.assertEqual(really_no_ffill_window.iloc[idx1].loc[4], val)
653
            self.assertTrue(np.isnan(really_no_ffill_window.iloc[idx2].loc[4]))
654
655
    def test_daily_functionality(self):
656
        # 9 daily bars
657
        # 2014-03-10,183999.0,186400.0,183601.0,186400.0,400
658
        # 2014-03-11,186925.0,187490.0,185910.0,187101.0,600
659
        # 2014-03-12,186498.0,187832.0,186005.0,187750.0,300
660
        # 2014-03-13,188150.0,188852.0,185254.0,185750.0,700
661
        # 2014-03-14,185825.0,186507.0,183418.0,183860.0,600
662
        # 2014-03-17,184350.0,185790.0,184350.0,185050.0,400
663
        # 2014-03-18,185400.0,185400.0,183860.0,184860.0,200
664
        # 2014-03-19,184860.0,185489.0,182764.0,183860.0,200
665
        # 2014-03-20,183999.0,186742.0,183630.0,186540.0,300
666
667
        # 5 one-minute bars that will be aggregated
668
        # 2014-03-21 13:31:00+00:00,185422401,185426332,185413974,185420153,304
669
        # 2014-03-21 13:32:00+00:00,185422402,185424165,185417717,185420941,300
670
        # 2014-03-21 13:33:00+00:00,185422403,185430663,185419420,185425041,303
671
        # 2014-03-21 13:34:00+00:00,185422403,185431290,185417079,185424184,302
672
        # 2014-03-21 13:35:00+00:00,185422405,185430210,185416293,185423251,302
673
674
        def run_query(field, values):
675
            window = self.get_portal().get_history_window(
676
                [self.BRKA],
677
                pd.Timestamp("2014-03-21 13:35", tz='UTC'),
678
                10,
679
                "1d",
680
                field
681
            )
682
683
            self.assertEqual(len(window), 10)
684
685
            for i in range(0, 10):
686
                self.assertEquals(window.iloc[i].loc[self.BRKA],
687
                                  values[i])
688
689
        # last value is the first minute's open
690
        opens = [183999, 186925, 186498, 188150, 185825, 184350,
691
                 185400, 184860, 183999, 185422.401]
692
693
        # last value is the last minute's close
694
        closes = [186400, 187101, 187750, 185750, 183860, 185050,
695
                  184860, 183860, 186540, 185423.251]
696
697
        # last value is the highest high value
698
        highs = [186400, 187490, 187832, 188852, 186507, 185790,
699
                 185400, 185489, 186742, 185431.290]
700
701
        # last value is the lowest low value
702
        lows = [183601, 185910, 186005, 185254, 183418, 184350, 183860,
703
                182764, 183630, 185413.974]
704
705
        # last value is the sum of all the minute volumes
706
        volumes = [400, 600, 300, 700, 600, 400, 200, 200, 300, 1511]
707
708
        run_query("open_price", opens)
709
        run_query("close_price", closes)
710
        run_query("price", closes)
711
        run_query("high", highs)
712
        run_query("low", lows)
713
        run_query("volume", volumes)
714
715
    def test_daily_splits_with_no_minute_data(self):
716
        # scenario is that we have daily data for AAPL through 6/11,
717
        # but we have no minute data for AAPL on 6/11. there's also a split
718
        # for AAPL on 6/9.
719
        splits = DataFrame(
720
            [
721
                {
722
                    'effective_date': str_to_seconds('2014-06-09'),
723
                    'ratio': (1 / 7.0),
724
                    'sid': self.AAPL,
725
                }
726
            ],
727
            columns=['effective_date', 'ratio', 'sid'])
728
729
        self.create_fake_adjustments(self.tempdir,
730
                                     "adjustments2.sqlite",
731
                                     splits=splits)
732
733
        portal = self.get_portal(adjustments_filename="adjustments2.sqlite")
734
735
        def test_window(field, reference, ffill=True):
736
            window = portal.get_history_window(
737
                [self.AAPL],
738
                pd.Timestamp("2014-06-11 15:30", tz='UTC'),
739
                6,
740
                "1d",
741
                field,
742
                ffill
743
            )
744
745
            self.assertEqual(len(window), 6)
746
747
            for i in range(0, 5):
748
                self.assertEquals(window.iloc[i].loc[self.AAPL],
749
                                  reference[i])
750
751
            if ffill and field == "price":
752
                last_val = window.iloc[5].loc[self.AAPL]
753
                second_to_last_val = window.iloc[4].loc[self.AAPL]
754
755
                self.assertEqual(last_val, second_to_last_val)
756
            else:
757
                if field == "volume":
758
                    self.assertEqual(window.iloc[5].loc[self.AAPL], 0)
759
                else:
760
                    self.assertTrue(np.isnan(window.iloc[5].loc[self.AAPL]))
761
762
        # 2014-06-04,637.4400099999999,647.8899690000001,636.110046,644.819992,p
763
        # 2014-06-05,646.20005,649.370003,642.610008,647.349983,75951400
764
        # 2014-06-06,649.900002,651.259979,644.469971,645.570023,87484600
765
        # 2014-06-09,92.699997,93.879997,91.75,93.699997,75415000
766
        # 2014-06-10,94.730003,95.050003,93.57,94.25,62777000
767
        open_data = [91.063, 92.314, 92.843, 92.699, 94.730]
768
        test_window("open_price", open_data, ffill=False)
769
        test_window("open_price", open_data)
770
771
        high_data = [92.556, 92.767, 93.037, 93.879, 95.050]
772
        test_window("high", high_data, ffill=False)
773
        test_window("high", high_data)
774
775
        low_data = [90.873, 91.801, 92.067, 91.750, 93.570]
776
        test_window("low", low_data, ffill=False)
777
        test_window("low", low_data)
778
779
        close_data = [92.117, 92.478, 92.224, 93.699, 94.250]
780
        test_window("close_price", close_data, ffill=False)
781
        test_window("close_price", close_data)
782
        test_window("price", close_data, ffill=False)
783
        test_window("price", close_data)
784
785
        vol_data = [587093500, 531659800, 612392200, 75415000, 62777000]
786
        test_window("volume", vol_data)
787
        test_window("volume", vol_data, ffill=False)
788
789
    def test_daily_window_starts_before_trading_start(self):
790
        portal = self.get_portal()
791
792
        # MSFT started on 3/3/2014, so try to go before that
793
        window = portal.get_history_window(
794
            [self.MSFT],
795
            pd.Timestamp("2014-03-05 13:35:00", tz='UTC'),
796
            5,
797
            "1d",
798
            "high"
799
        )
800
801
        self.assertEqual(len(window), 5)
802
803
        # should be two empty days, then 3/3 and 3/4, then
804
        # an empty day because we don't have minute data for 3/5
805
        self.assertTrue(np.isnan(window.iloc[0].loc[self.MSFT]))
806
        self.assertTrue(np.isnan(window.iloc[1].loc[self.MSFT]))
807
        self.assertEquals(window.iloc[2].loc[self.MSFT], 38.130)
808
        self.assertEquals(window.iloc[3].loc[self.MSFT], 38.48)
809
        self.assertTrue(np.isnan(window.iloc[4].loc[self.MSFT]))
810
811
    def test_daily_window_ends_before_trading_start(self):
812
        portal = self.get_portal()
813
814
        # MSFT started on 3/3/2014, so try to go before that
815
        window = portal.get_history_window(
816
            [self.MSFT],
817
            pd.Timestamp("2014-02-28 13:35:00", tz='UTC'),
818
            5,
819
            "1d",
820
            "high"
821
        )
822
823
        self.assertEqual(len(window), 5)
824
        for i in range(0, 5):
825
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
826
827
    def test_daily_window_starts_after_trading_end(self):
828
        # MSFT stopped trading EOD Friday 8/29/2014
829
        window = self.get_portal().get_history_window(
830
            [self.MSFT],
831
            pd.Timestamp("2014-09-12 13:35:00", tz='UTC'),
832
            8,
833
            "1d",
834
            "high",
835
        )
836
837
        self.assertEqual(len(window), 8)
838
        for i in range(0, 8):
839
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
840
841
    def test_daily_window_ends_after_trading_end(self):
842
        # MSFT stopped trading EOD Friday 8/29/2014
843
        window = self.get_portal().get_history_window(
844
            [self.MSFT],
845
            pd.Timestamp("2014-09-04 13:35:00", tz='UTC'),
846
            10,
847
            "1d",
848
            "high",
849
        )
850
851
        # should be 7 non-NaNs (8/21-8/22, 8/25-8/29) and 3 NaNs (9/2 - 9/4)
852
        # (9/1/2014 is labor day)
853
        self.assertEqual(len(window), 10)
854
855
        for i in range(0, 7):
856
            self.assertFalse(np.isnan(window.iloc[i].loc[self.MSFT]))
857
858
        for i in range(7, 10):
859
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
860
861
    def test_empty_sid_list(self):
862
        portal = self.get_portal()
863
864
        fields = ["open_price",
865
                  "close_price",
866
                  "high",
867
                  "low",
868
                  "volume",
869
                  "price"]
870
        freqs = ["1m", "1d"]
871
872
        for field in fields:
873
            for freq in freqs:
874
                window = portal.get_history_window(
875
                    [],
876
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
877
                    6,
878
                    freq,
879
                    field
880
                )
881
882
                self.assertEqual(len(window), 6)
883
884
                for i in range(0, 6):
885
                    self.assertEqual(len(window.iloc[i]), 0)
886
887
    def test_daily_window_starts_before_minute_data(self):
888
889
        env = TradingEnvironment()
890
        asset_info = make_simple_asset_info(
891
            [self.GS],
892
            Timestamp('1999-04-05'),
893
            Timestamp('2004-08-30'),
894
            ['GS']
895
        )
896
        env.write_data(equities_df=asset_info)
897
        portal = self.get_portal(env=env)
898
899
        window = portal.get_history_window(
900
            [self.GS],
901
            # 3rd day of daily data for GS, minute data starts in 2002.
902
            pd.Timestamp("1999-04-07 14:35:00", tz='UTC'),
903
            10,
904
            "1d",
905
            "low"
906
        )
907
908
        # 12/20, 12/21, 12/24, 12/26, 12/27, 12/28, 12/31 should be NaNs
909
        # 1/2 and 1/3 should be non-NaN
910
        # 1/4 should be NaN (since we don't have minute data for it)
911
912
        self.assertEqual(len(window), 10)
913
914
        for i in range(0, 7):
915
            self.assertTrue(np.isnan(window.iloc[i].loc[self.GS]))
916
917
        for i in range(8, 9):
918
            self.assertFalse(np.isnan(window.iloc[i].loc[self.GS]))
919
920
        self.assertTrue(np.isnan(window.iloc[9].loc[self.GS]))
921
922
    def test_minute_window_ends_before_1_2_2002(self):
923
        with self.assertRaises(ValueError):
924
            self.get_portal().get_history_window(
925
                [self.GS],
926
                pd.Timestamp("2001-12-31 14:35:00", tz='UTC'),
927
                50,
928
                "1m",
929
                "close_price"
930
            )
931
932
    def test_bad_history_inputs(self):
933
        portal = self.get_portal()
934
935
        # bad fieldname
936
        for field in ["foo", "bar", "", "5"]:
937
            with self.assertRaises(ValueError):
938
                portal.get_history_window(
939
                    [self.AAPL],
940
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
941
                    6,
942
                    "1d",
943
                    field
944
                )
945
946
        # bad frequency
947
        for freq in ["2m", "30m", "3d", "300d", "", "5"]:
948
            with self.assertRaises(ValueError):
949
                portal.get_history_window(
950
                    [self.AAPL],
951
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
952
                    6,
953
                    freq,
954
                    "volume"
955
                )
956
957
    def test_daily_merger(self):
958
        def check(field, ref):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
959
            window = self.get_portal().get_history_window(
960
                [self.C],
961
                pd.Timestamp("2014-07-17 13:35", tz='UTC'),
962
                4,
963
                "1d",
964
                field
965
            )
966
967
            self.assertEqual(len(window), len(ref),)
968
969
            for i in range(0, len(ref) - 1):
970
                self.assertEquals(window.iloc[i].loc[self.C], ref[i], i)
971
972
        # 2014-07-14 00:00:00+00:00,139.18,139.14,139.2,139.17,12351
973
        # 2014-07-15 00:00:00+00:00,139.2,139.2,139.18,139.19,12354
974
        # 2014-07-16 00:00:00+00:00,69.58,69.56,69.57,69.565,12352
975
        # 2014-07-17 13:31:00+00:00,72767,80146,63406,71776,12876
976
        # 2014-07-17 13:32:00+00:00,72769,76943,68907,72925,12875
977
        # 2014-07-17 13:33:00+00:00,72771,76127,63194,69660,12875
978
        # 2014-07-17 13:34:00+00:00,72774,79349,69771,74560,12877
979
        # 2014-07-17 13:35:00+00:00,72776,75340,68970,72155,12879
980
981
        open_ref = [69.59, 69.6, 69.58, 72.767]
982
        high_ref = [69.57, 69.6, 69.56, 80.146]
983
        low_ref = [69.6, 69.59, 69.57, 63.194]
984
        close_ref = [69.585, 69.595, 69.565, 72.155]
985
        vol_ref = [12351, 12354, 12352, 64382]
986
987
        check("open_price", open_ref)
988
        check("high", high_ref)
989
        check("low", low_ref)
990
        check("close_price", close_ref)
991
        check("price", close_ref)
992
        check("volume", vol_ref)
993
994
    def test_minute_adjustments_as_of_lookback_date(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
995
        # AAPL has splits on 2014-03-20 and 2014-03-21
996
        window_0320 = self.get_portal().get_history_window(
997
            [self.AAPL],
998
            pd.Timestamp("2014-03-20 13:35", tz='UTC'),
999
            395,
1000
            "1m",
1001
            "open_price"
1002
        )
1003
1004
        window_0321 = self.get_portal().get_history_window(
1005
            [self.AAPL],
1006
            pd.Timestamp("2014-03-21 13:35", tz='UTC'),
1007
            785,
1008
            "1m",
1009
            "open_price"
1010
        )
1011
1012
        for i in range(0, 395):
1013
            # history on 3/20, since the 3/21 0.5 split hasn't
1014
            # happened yet, should return values 2x larger than history on
1015
            # 3/21
1016
            self.assertEqual(window_0320.iloc[i].loc[self.AAPL],
1017
                             window_0321.iloc[i].loc[self.AAPL] * 2)
1018
1019
    def test_daily_adjustments_as_of_lookback_date(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1020
        window_0402 = self.get_portal().get_history_window(
1021
            [self.IBM],
1022
            pd.Timestamp("2014-04-02 13:35", tz='UTC'),
1023
            23,
1024
            "1d",
1025
            "open_price"
1026
        )
1027
1028
        window_0702 = self.get_portal().get_history_window(
1029
            [self.IBM],
1030
            pd.Timestamp("2014-07-02 13:35", tz='UTC'),
1031
            86,
1032
            "1d",
1033
            "open_price"
1034
        )
1035
1036
        for i in range(0, 22):
1037
            self.assertEqual(window_0402.iloc[i].loc[self.IBM],
1038
                             window_0702.iloc[i].loc[self.IBM] * 2)
1039
1040
    def test_minute_dividends(self):
1041
        def check(field, ref):
1042
            window = self.get_portal().get_history_window(
1043
                [self.DIVIDEND_SID],
1044
                pd.Timestamp("2014-03-18 13:35", tz='UTC'),
1045
                10,
1046
                "1m",
1047
                field
1048
            )
1049
1050
            self.assertEqual(len(window), len(ref))
1051
1052
            np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref)
1053
1054
        # the DIVIDEND stock has dividends on 2014-03-18 (0.98)
1055
        # 2014-03-17 19:56:00+00:00,118923,123229,112445,117837,2273
1056
        # 2014-03-17 19:57:00+00:00,118927,122997,117911,120454,2274
1057
        # 2014-03-17 19:58:00+00:00,118930,129112,111136,120124,2274
1058
        # 2014-03-17 19:59:00+00:00,118932,126147,112112,119129,2276
1059
        # 2014-03-17 20:00:00+00:00,118932,124541,108717,116628,2275
1060
        # 2014-03-18 13:31:00+00:00,116457,120731,114148,117439,2274
1061
        # 2014-03-18 13:32:00+00:00,116461,116520,106572,111546,2275
1062
        # 2014-03-18 13:33:00+00:00,116461,117115,108506,112810,2274
1063
        # 2014-03-18 13:34:00+00:00,116461,119787,108861,114323,2273
1064
        # 2014-03-18 13:35:00+00:00,116464,117221,112698,114960,2272
1065
1066
        open_ref = [116.545,  # 2014-03-17 19:56:00+00:00
1067
                    116.548,  # 2014-03-17 19:57:00+00:00
1068
                    116.551,  # 2014-03-17 19:58:00+00:00
1069
                    116.553,  # 2014-03-17 19:59:00+00:00
1070
                    116.553,  # 2014-03-17 20:00:00+00:00
1071
                    116.457,  # 2014-03-18 13:31:00+00:00
1072
                    116.461,  # 2014-03-18 13:32:00+00:00
1073
                    116.461,  # 2014-03-18 13:33:00+00:00
1074
                    116.461,  # 2014-03-18 13:34:00+00:00
1075
                    116.464]  # 2014-03-18 13:35:00+00:00
1076
1077
        high_ref = [120.764,  # 2014-03-17 19:56:00+00:00
1078
                    120.537,  # 2014-03-17 19:57:00+00:00
1079
                    126.530,  # 2014-03-17 19:58:00+00:00
1080
                    123.624,  # 2014-03-17 19:59:00+00:00
1081
                    122.050,  # 2014-03-17 20:00:00+00:00
1082
                    120.731,  # 2014-03-18 13:31:00+00:00
1083
                    116.520,  # 2014-03-18 13:32:00+00:00
1084
                    117.115,  # 2014-03-18 13:33:00+00:00
1085
                    119.787,  # 2014-03-18 13:34:00+00:00
1086
                    117.221]  # 2014-03-18 13:35:00+00:00
1087
1088
        low_ref = [110.196,  # 2014-03-17 19:56:00+00:00
1089
                   115.553,  # 2014-03-17 19:57:00+00:00
1090
                   108.913,  # 2014-03-17 19:58:00+00:00
1091
                   109.870,  # 2014-03-17 19:59:00+00:00
1092
                   106.543,  # 2014-03-17 20:00:00+00:00
1093
                   114.148,  # 2014-03-18 13:31:00+00:00
1094
                   106.572,  # 2014-03-18 13:32:00+00:00
1095
                   108.506,  # 2014-03-18 13:33:00+00:00
1096
                   108.861,  # 2014-03-18 13:34:00+00:00
1097
                   112.698]  # 2014-03-18 13:35:00+00:00
1098
1099
        close_ref = [115.480,  # 2014-03-17 19:56:00+00:00
1100
                     118.045,  # 2014-03-17 19:57:00+00:00
1101
                     117.722,  # 2014-03-17 19:58:00+00:00
1102
                     116.746,  # 2014-03-17 19:59:00+00:00
1103
                     114.295,  # 2014-03-17 20:00:00+00:00
1104
                     117.439,  # 2014-03-18 13:31:00+00:00
1105
                     111.546,  # 2014-03-18 13:32:00+00:00
1106
                     112.810,  # 2014-03-18 13:33:00+00:00
1107
                     114.323,  # 2014-03-18 13:34:00+00:00
1108
                     114.960]  # 2014-03-18 13:35:00+00:00
1109
1110
        volume_ref = [2273,  # 2014-03-17 19:56:00+00:00
1111
                      2274,  # 2014-03-17 19:57:00+00:00
1112
                      2274,  # 2014-03-17 19:58:00+00:00
1113
                      2276,  # 2014-03-17 19:59:00+00:00
1114
                      2275,  # 2014-03-17 20:00:00+00:00
1115
                      2274,  # 2014-03-18 13:31:00+00:00
1116
                      2275,  # 2014-03-18 13:32:00+00:00
1117
                      2274,  # 2014-03-18 13:33:00+00:00
1118
                      2273,  # 2014-03-18 13:34:00+00:00
1119
                      2272]  # 2014-03-18 13:35:00+00:00
1120
1121
        check("open_price", open_ref)
1122
        check("high", high_ref)
1123
        check("low", low_ref)
1124
        check("close_price", close_ref)
1125
        check("price", close_ref)
1126
        check("volume", volume_ref)
1127
1128
    def test_daily_dividends(self):
1129
        def check(field, ref):
1130
            window = self.get_portal().get_history_window(
1131
                [self.DIVIDEND_SID],
1132
                pd.Timestamp("2014-03-21 13:35", tz='UTC'),
1133
                6,
1134
                "1d",
1135
                field
1136
            )
1137
1138
            self.assertEqual(len(window), len(ref))
1139
1140
            np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref)
1141
1142
        # 2014-03-14 00:00:00+00:00,106408,106527,103498,105012,950
1143
        # 2014-03-17 00:00:00+00:00,106411,110252,99877,105064,950
1144
        # 2014-03-18 00:00:00+00:00,104194,110891,95342,103116,972
1145
        # 2014-03-19 00:00:00+00:00,104198,107086,102615,104851,973
1146
        # 2014-03-20 00:00:00+00:00,100032,102989,92179,97584,1016
1147
        # 2014-03-21 13:31:00+00:00,114098,120818,110333,115575,2866
1148
        # 2014-03-21 13:32:00+00:00,114099,120157,105353,112755,2866
1149
        # 2014-03-21 13:33:00+00:00,114099,122263,108838,115550,2867
1150
        # 2014-03-21 13:34:00+00:00,114101,116620,106654,111637,2867
1151
        # 2014-03-21 13:35:00+00:00,114104,123773,107769,115771,2867
1152
1153
        open_ref = [100.108,  # 2014-03-14 00:00:00+00:00
1154
                    100.111,  # 2014-03-17 00:00:00+00:00
1155
                    100.026,  # 2014-03-18 00:00:00+00:00
1156
                    100.030,  # 2014-03-19 00:00:00+00:00
1157
                    100.032,  # 2014-03-20 00:00:00+00:00
1158
                    114.098]  # 2014-03-21 00:00:00+00:00
1159
1160
        high_ref = [100.221,  # 2014-03-14 00:00:00+00:00
1161
                    103.725,  # 2014-03-17 00:00:00+00:00
1162
                    106.455,  # 2014-03-18 00:00:00+00:00
1163
                    102.803,  # 2014-03-19 00:00:00+00:00
1164
                    102.988,  # 2014-03-20 00:00:00+00:00
1165
                    123.773]  # 2014-03-21 00:00:00+00:00
1166
1167
        low_ref = [97.370,  # 2014-03-14 00:00:00+00:00
1168
                   93.964,  # 2014-03-17 00:00:00+00:00
1169
                   91.528,  # 2014-03-18 00:00:00+00:00
1170
                   98.510,  # 2014-03-19 00:00:00+00:00
1171
                   92.179,  # 2014-03-20 00:00:00+00:00
1172
                   105.353]  # 2014-03-21 00:00:00+00:00
1173
1174
        close_ref = [98.795,  # 2014-03-14 00:00:00+00:00
1175
                     98.844,  # 2014-03-17 00:00:00+00:00
1176
                     98.991,  # 2014-03-18 00:00:00+00:00
1177
                     100.657,  # 2014-03-19 00:00:00+00:00
1178
                     97.584,  # 2014-03-20 00:00:00+00:00
1179
                     115.771]  # 2014-03-21 00:00:00+00:00
1180
1181
        volume_ref = [950,  # 2014-03-14 00:00:00+00:00
1182
                      950,  # 2014-03-17 00:00:00+00:00
1183
                      972,  # 2014-03-18 00:00:00+00:00
1184
                      973,  # 2014-03-19 00:00:00+00:00
1185
                      1016,  # 2014-03-20 00:00:00+00:00
1186
                      14333]  # 2014-03-21 00:00:00+00:00
1187
1188
        check("open_price", open_ref)
1189
        check("high", high_ref)
1190
        check("low", low_ref)
1191
        check("close_price", close_ref)
1192
        check("price", close_ref)
1193
        check("volume", volume_ref)
1194
1195
    @parameterized.expand([('open', 0),
1196
                           ('high', 10000),
1197
                           ('low', 20000),
1198
                           ('close', 30000),
1199
                           ('price', 30000),
1200
                           ('volume', 40000)])
1201
    def test_futures_history_minutes(self, field, offset):
1202
        # our history data, for self.FUTURE_ASSET, is 10,000 bars starting at
1203
        # self.futures_start_dt.  Those 10k bars are 24/7.
1204
1205
        # = 2015-11-30 18:50 UTC, 13:50 Eastern = during market hours
1206
        futures_end_dt = \
1207
            self.futures_start_dates[self.FUTURE_ASSET] + \
1208
            timedelta(minutes=9999)
1209
1210
        window = self.get_portal().get_history_window(
1211
            [self.FUTURE_ASSET],
1212
            futures_end_dt,
1213
            1000,
1214
            "1m",
1215
            field
1216
        )
1217
1218
        # check the minutes are right
1219
        reference_minutes = self.env.market_minute_window(
1220
            futures_end_dt, 1000, step=-1
1221
        )[::-1]
1222
1223
        np.testing.assert_array_equal(window.index, reference_minutes)
1224
1225
        # check the values
1226
1227
        # 2015-11-24 18:41
1228
        # ...
1229
        # 2015-11-24 21:00
1230
        # 2015-11-25 14:31
1231
        # ...
1232
        # 2015-11-25 21:00
1233
        # 2015-11-27 14:31
1234
        # ...
1235
        # 2015-11-27 18:00  # early close
1236
        # 2015-11-30 14:31
1237
        # ...
1238
        # 2015-11-30 18:50
1239
1240
        reference_values = pd.date_range(
1241
            start=self.futures_start_dates[self.FUTURE_ASSET],
1242
            end=futures_end_dt,
1243
            freq="T"
1244
        )
1245
1246
        for idx, dt in enumerate(window.index):
1247
            date_val = reference_values.searchsorted(dt)
1248
            self.assertEqual(offset + date_val,
1249
                             window.iloc[idx][self.FUTURE_ASSET])
1250
1251
    def test_history_minute_blended(self):
1252
        window = self.get_portal().get_history_window(
1253
            [self.FUTURE_ASSET2, self.AAPL],
1254
            pd.Timestamp("2014-03-21 20:00", tz='UTC'),
1255
            200,
1256
            "1m",
1257
            "price"
1258
        )
1259
1260
        # just a sanity check
1261
        self.assertEqual(200, len(window[self.AAPL]))
1262
        self.assertEqual(200, len(window[self.FUTURE_ASSET2]))
1263
1264
    def test_futures_history_daily(self):
1265
        # get 3 days ending 11/30 10:00 am Eastern
1266
        # = 11/25, 11/27 (half day), 11/30 (partial)
1267
1268
        window = self.get_portal().get_history_window(
1269
            [self.env.asset_finder.retrieve_asset(self.FUTURE_ASSET)],
1270
            pd.Timestamp("2015-11-30 15:00", tz='UTC'),
1271
            3,
1272
            "1d",
1273
            "high"
1274
        )
1275
1276
        self.assertEqual(3, len(window[self.FUTURE_ASSET]))
1277
1278
        np.testing.assert_array_equal([12929.0, 15629.0, 19769.0],
1279
                                      window.values.T[0])
1280