Completed
Pull Request — master (#858)
by Eddie
01:43
created

tests.HistoryTestCase   F

Complexity

Total Complexity 92

Size/Duplication

Total Lines 1240
Duplicated Lines 0 %
Metric Value
dl 0
loc 1240
rs 0.6316
wmc 92

39 Methods

Rating   Name   Duplication   Size   Complexity  
A test_daily_adjustments_as_of_lookback_date() 0 20 2
B test_minute_splits() 0 25 1
B test_daily_window_starts_before_minute_data() 0 34 3
B setUpClass() 0 142 2
A test_futures_history_minutes() 0 55 2
B test_daily_merger() 0 36 3
B test_bad_history_inputs() 0 23 5
A tearDownClass() 0 3 1
B test_daily_splits_with_no_minute_data() 0 73 6
B test_empty_sid_list() 0 25 4
B test_minute_merger() 0 32 3
B test_minute_adjustments_as_of_lookback_date() 0 24 2
A test_daily_window_ends_after_trading_end() 0 19 3
A test_minute_window_starts_before_1_2_2002() 0 15 3
B create_fake_adjustments() 0 48 4
A test_minute_window_ends_before_1_2_2002() 0 8 2
B get_portal() 0 24 2
B create_fake_futures_minute_data() 0 28 1
A test_daily_window_starts_after_trading_end() 0 13 2
C test_minute_forward_fill() 0 70 8
B test_history_in_initialize() 0 34 2
B test_window() 0 26 5
A run_query() 0 14 2
A test_daily_functionality() 0 59 3
B create_fake_daily_data() 0 24 3
A test_minute_early_close() 0 23 2
A test_minute_window_ends_before_trading_start() 0 13 2
A test_futures_history_daily() 0 16 1
A test_daily_window_starts_before_trading_start() 0 21 1
A test_minute_basic_functionality() 0 14 2
A test_history_minute_blended() 0 12 1
A test_minute_window_starts_after_trading_end() 0 13 2
B test_minute_dividends() 0 87 2
A create_fake_minute_data() 0 22 1
A test_minute_window_ends_after_trading_end() 0 20 3
A test_daily_window_ends_before_trading_start() 0 15 2
B test_minute_window_starts_before_trading_start() 0 45 4
A test_daily_dividends() 0 66 2
A check() 0 13 2

How to fix   Complexity   

Complex Class

Complex classes like tests.HistoryTestCase often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from os.path import dirname, join, realpath
2
from textwrap import dedent
3
from unittest import TestCase
4
import bcolz
5
import os
6
from datetime import timedelta
7
from nose_parameterized import parameterized
8
from pandas.tslib import normalize_date
9
from testfixtures import TempDirectory
10
import numpy as np
11
from numpy import array
12
import pandas as pd
13
from pandas import (
14
    read_csv,
15
    Timestamp,
16
    DataFrame, DatetimeIndex)
17
18
from six import iteritems
19
from zipline import TradingAlgorithm
20
21
from zipline.data.data_portal import DataPortal
22
from zipline.data.us_equity_pricing import (
23
    DailyBarWriterFromCSVs,
24
    SQLiteAdjustmentWriter,
25
    SQLiteAdjustmentReader,
26
)
27
from zipline.errors import HistoryInInitialize
28
from zipline.utils.test_utils import (
29
    make_simple_asset_info,
30
    str_to_seconds,
31
    MockDailyBarReader
32
)
33
from zipline.data.us_equity_minutes import (
34
    MinuteBarWriterFromCSVs,
35
    BcolzMinuteBarReader
36
)
37
from zipline.utils.tradingcalendar import trading_days
38
from zipline.finance.trading import (
39
    TradingEnvironment,
40
    SimulationParameters
41
)
42
43
TEST_MINUTE_RESOURCE_PATH = join(
44
    dirname(dirname(realpath(__file__))),  # zipline_repo/tests
45
    'tests',
46
    'resources',
47
    'history_inputs',
48
)
49
50
TEST_DAILY_RESOURCE_PATH = join(
51
    dirname(dirname(realpath(__file__))),  # zipline_repo/tests
52
    'tests',
53
    'resources',
54
    'pipeline_inputs',
55
)
56
57
58
class HistoryTestCase(TestCase):
59
    @classmethod
60
    def setUpClass(cls):
61
        cls.AAPL = 1
62
        cls.MSFT = 2
63
        cls.DELL = 3
64
        cls.TSLA = 4
65
        cls.BRKA = 5
66
        cls.IBM = 6
67
        cls.GS = 7
68
        cls.C = 8
69
        cls.DIVIDEND_SID = 9
70
        cls.FUTURE_ASSET = 10
71
        cls.FUTURE_ASSET2 = 11
72
        cls.assets = [cls.AAPL, cls.MSFT, cls.DELL, cls.TSLA, cls.BRKA,
73
                      cls.IBM, cls.GS, cls.C, cls.DIVIDEND_SID]
74
75
        asset_info = make_simple_asset_info(
76
            cls.assets,
77
            Timestamp('2014-03-03'),
78
            Timestamp('2014-08-30'),
79
            ['AAPL', 'MSFT', 'DELL', 'TSLA', 'BRKA', 'IBM', 'GS', 'C',
80
             'DIVIDEND_SID']
81
        )
82
        cls.env = TradingEnvironment()
83
84
        cls.env.write_data(
85
            equities_df=asset_info,
86
            futures_data={
87
                cls.FUTURE_ASSET: {
88
                    "start_date": pd.Timestamp('2015-11-23', tz='UTC'),
89
                    "end_date": pd.Timestamp('2014-12-01', tz='UTC'),
90
                    'symbol': 'TEST_FUTURE',
91
                    'asset_type': 'future',
92
                },
93
                cls.FUTURE_ASSET2: {
94
                    "start_date": pd.Timestamp('2014-03-19', tz='UTC'),
95
                    "end_date": pd.Timestamp('2014-03-22', tz='UTC'),
96
                    'symbol': 'TEST_FUTURE2',
97
                    'asset_type': 'future',
98
                }
99
            }
100
        )
101
102
        cls.tempdir = TempDirectory()
103
        cls.tempdir.create()
104
105
        try:
106
            cls.create_fake_minute_data(cls.tempdir)
107
108
            cls.futures_start_dates = {
109
                cls.FUTURE_ASSET: pd.Timestamp("2015-11-23 20:11", tz='UTC'),
110
                cls.FUTURE_ASSET2: pd.Timestamp("2014-03-19 13:31", tz='UTC')
111
            }
112
113
            futures_tempdir = os.path.join(cls.tempdir.path,
114
                                           'futures', 'minutes')
115
            os.makedirs(futures_tempdir)
116
            cls.create_fake_futures_minute_data(
117
                futures_tempdir,
118
                cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET),
119
                cls.futures_start_dates[cls.FUTURE_ASSET],
120
                cls.futures_start_dates[cls.FUTURE_ASSET] +
121
                timedelta(minutes=10000)
122
            )
123
124
            # build data for FUTURE_ASSET2 from 2014-03-19 13:31 to
125
            # 2014-03-21 20:00
126
            cls.create_fake_futures_minute_data(
127
                futures_tempdir,
128
                cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET2),
129
                cls.futures_start_dates[cls.FUTURE_ASSET2],
130
                cls.futures_start_dates[cls.FUTURE_ASSET2] +
131
                timedelta(minutes=3270)
132
            )
133
134
            cls.create_fake_daily_data(cls.tempdir)
135
136
            splits = DataFrame([
137
                {'effective_date': str_to_seconds("2002-01-03"),
138
                 'ratio': 0.5,
139
                 'sid': cls.AAPL},
140
                {'effective_date': str_to_seconds("2014-03-20"),
141
                 'ratio': 0.5,
142
                 'sid': cls.AAPL},
143
                {'effective_date': str_to_seconds("2014-03-21"),
144
                 'ratio': 0.5,
145
                 'sid': cls.AAPL},
146
                {'effective_date': str_to_seconds("2014-04-01"),
147
                 'ratio': 0.5,
148
                 'sid': cls.IBM},
149
                {'effective_date': str_to_seconds("2014-07-01"),
150
                 'ratio': 0.5,
151
                 'sid': cls.IBM},
152
                {'effective_date': str_to_seconds("2014-07-07"),
153
                 'ratio': 0.5,
154
                 'sid': cls.IBM}],
155
                columns=['effective_date', 'ratio', 'sid'],
156
            )
157
158
            mergers = DataFrame([
159
                {'effective_date': str_to_seconds("2014-07-16"),
160
                 'ratio': 0.5,
161
                 'sid': cls.C}
162
            ],
163
                columns=['effective_date', 'ratio', 'sid'])
164
165
            dividends = DataFrame([
166
                {'ex_date':
167
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
168
                 'record_date':
169
                 Timestamp("2014-03-19", tz='UTC').to_datetime64(),
170
                 'declared_date':
171
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
172
                 'pay_date':
173
                 Timestamp("2014-03-20", tz='UTC').to_datetime64(),
174
                 'amount': 2.0,
175
                 'sid': cls.DIVIDEND_SID},
176
                {'ex_date':
177
                 Timestamp("2014-03-20", tz='UTC').to_datetime64(),
178
                 'record_date':
179
                 Timestamp("2014-03-21", tz='UTC').to_datetime64(),
180
                 'declared_date':
181
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
182
                 'pay_date':
183
                 Timestamp("2014-03-23", tz='UTC').to_datetime64(),
184
                 'amount': 4.0,
185
                 'sid': cls.DIVIDEND_SID}],
186
                columns=['ex_date',
187
                         'record_date',
188
                         'declared_date',
189
                         'pay_date',
190
                         'amount',
191
                         'sid'])
192
193
            cls.create_fake_adjustments(cls.tempdir,
194
                                        "adjustments.sqlite",
195
                                        splits=splits,
196
                                        mergers=mergers,
197
                                        dividends=dividends)
198
        except:
199
            cls.tempdir.cleanup()
200
            raise
201
202
    @classmethod
203
    def tearDownClass(cls):
204
        cls.tempdir.cleanup()
205
206
    @classmethod
207
    def create_fake_futures_minute_data(cls, tempdir, asset, start_dt, end_dt):
208
        num_minutes = int((end_dt - start_dt).total_seconds() / 60)
209
210
        # need to prepend one 0 per minute between normalize_date(start_dt)
211
        # and start_dt
212
        zeroes_buffer = \
213
            [0] * int((start_dt -
214
                       normalize_date(start_dt)).total_seconds() / 60)
215
216
        future_df = pd.DataFrame({
217
            "open": np.array(zeroes_buffer +
218
                             list(range(0, num_minutes))) * 1000,
219
            "high": np.array(zeroes_buffer +
220
                             list(range(10000, 10000 + num_minutes))) * 1000,
221
            "low": np.array(zeroes_buffer +
222
                            list(range(20000, 20000 + num_minutes))) * 1000,
223
            "close": np.array(zeroes_buffer +
224
                              list(range(30000, 30000 + num_minutes))) * 1000,
225
            "volume": np.array(zeroes_buffer +
226
                               list(range(40000, 40000 + num_minutes)))
227
        })
228
229
        path = join(tempdir, "{0}.bcolz".format(asset.sid))
230
        ctable = bcolz.ctable.fromdataframe(future_df, rootdir=path)
231
232
        ctable.attrs["start_dt"] = start_dt.value / 1e9
233
        ctable.attrs["last_dt"] = end_dt.value / 1e9
234
235
    @classmethod
236
    def create_fake_minute_data(cls, tempdir):
237
        resources = {
238
            cls.AAPL: join(TEST_MINUTE_RESOURCE_PATH, 'AAPL_minute.csv.gz'),
239
            cls.MSFT: join(TEST_MINUTE_RESOURCE_PATH, 'MSFT_minute.csv.gz'),
240
            cls.DELL: join(TEST_MINUTE_RESOURCE_PATH, 'DELL_minute.csv.gz'),
241
            cls.TSLA: join(TEST_MINUTE_RESOURCE_PATH, "TSLA_minute.csv.gz"),
242
            cls.BRKA: join(TEST_MINUTE_RESOURCE_PATH, "BRKA_minute.csv.gz"),
243
            cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"),
244
            cls.GS:
245
            join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"),  # unused
246
            cls.C: join(TEST_MINUTE_RESOURCE_PATH, "C_minute.csv.gz"),
247
            cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH,
248
                                   "DIVIDEND_minute.csv.gz"),
249
        }
250
251
        equities_tempdir = os.path.join(tempdir.path, 'equity', 'minutes')
252
        os.makedirs(equities_tempdir)
253
254
        MinuteBarWriterFromCSVs(resources,
255
                                pd.Timestamp('2002-01-02', tz='UTC')).write(
256
                                    equities_tempdir, cls.assets)
257
258
    @classmethod
259
    def create_fake_daily_data(cls, tempdir):
260
        resources = {
261
            cls.AAPL: join(TEST_DAILY_RESOURCE_PATH, 'AAPL.csv'),
262
            cls.MSFT: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),
263
            cls.DELL: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),  # unused
264
            cls.TSLA: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),  # unused
265
            cls.BRKA: join(TEST_DAILY_RESOURCE_PATH, 'BRK-A.csv'),
266
            cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, 'IBM_daily.csv.gz'),
267
            cls.GS: join(TEST_MINUTE_RESOURCE_PATH, 'GS_daily.csv.gz'),
268
            cls.C: join(TEST_MINUTE_RESOURCE_PATH, 'C_daily.csv.gz'),
269
            cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH,
270
                                   'DIVIDEND_daily.csv.gz')
271
        }
272
        raw_data = {
273
            asset: read_csv(path, parse_dates=['day']).set_index('day')
274
            for asset, path in iteritems(resources)
275
        }
276
        for frame in raw_data.values():
277
            frame['price'] = frame['close']
278
279
        writer = DailyBarWriterFromCSVs(resources)
280
        data_path = tempdir.getpath('test_daily_data.bcolz')
281
        writer.write(data_path, trading_days, cls.assets)
282
283
    @classmethod
284
    def create_fake_adjustments(cls, tempdir, filename,
285
                                splits=None, mergers=None, dividends=None):
286
        writer = SQLiteAdjustmentWriter(tempdir.getpath(filename),
287
                                        cls.env.trading_days,
288
                                        MockDailyBarReader())
289
290
        if dividends is None:
291
            dividends = DataFrame(
292
                {
293
                    # Hackery to make the dtypes correct on an empty frame.
294
                    'ex_date': array([], dtype='datetime64[ns]'),
295
                    'pay_date': array([], dtype='datetime64[ns]'),
296
                    'record_date': array([], dtype='datetime64[ns]'),
297
                    'declared_date': array([], dtype='datetime64[ns]'),
298
                    'amount': array([], dtype=float),
299
                    'sid': array([], dtype=int),
300
                },
301
                index=DatetimeIndex([], tz='UTC'),
302
                columns=['ex_date',
303
                         'pay_date',
304
                         'record_date',
305
                         'declared_date',
306
                         'amount',
307
                         'sid']
308
                )
309
310
        if splits is None:
311
            splits = DataFrame(
312
                {
313
                    # Hackery to make the dtypes correct on an empty frame.
314
                    'effective_date': array([], dtype=int),
315
                    'ratio': array([], dtype=float),
316
                    'sid': array([], dtype=int),
317
                },
318
                index=DatetimeIndex([], tz='UTC'))
319
320
        if mergers is None:
321
            mergers = DataFrame(
322
                {
323
                    # Hackery to make the dtypes correct on an empty frame.
324
                    'effective_date': array([], dtype=int),
325
                    'ratio': array([], dtype=float),
326
                    'sid': array([], dtype=int),
327
                },
328
                index=DatetimeIndex([], tz='UTC'))
329
330
        writer.write(splits, mergers, dividends)
331
332
    def get_portal(self,
333
                   daily_equities_filename="test_daily_data.bcolz",
334
                   adjustments_filename="adjustments.sqlite",
335
                   env=None):
336
337
        if env is None:
338
            env = self.env
339
340
        temp_path = self.tempdir.path
341
342
        minutes_path = os.path.join(temp_path, 'equity', 'minutes')
343
        futures_path = os.path.join(temp_path, 'futures', 'minutes')
344
345
        adjustment_reader = SQLiteAdjustmentReader(
346
            join(temp_path, adjustments_filename))
347
348
        equity_minute_reader = BcolzMinuteBarReader(minutes_path)
349
350
        return DataPortal(
351
            env,
352
            equity_minute_reader=equity_minute_reader,
353
            minutes_futures_path=futures_path,
354
            daily_equities_path=join(temp_path, daily_equities_filename),
355
            adjustment_reader=adjustment_reader
356
        )
357
358
    def test_history_in_initialize(self):
359
        algo_text = dedent(
360
            """\
361
            from zipline.api import history
362
363
            def initialize(context):
364
                history([24], 10, '1d', 'price')
365
366
            def handle_data(context, data):
367
                pass
368
            """
369
        )
370
371
        start = pd.Timestamp('2007-04-05', tz='UTC')
372
        end = pd.Timestamp('2007-04-10', tz='UTC')
373
374
        sim_params = SimulationParameters(
375
            period_start=start,
376
            period_end=end,
377
            capital_base=float("1.0e5"),
378
            data_frequency='minute',
379
            emission_rate='daily',
380
            env=self.env,
381
        )
382
383
        test_algo = TradingAlgorithm(
384
            script=algo_text,
385
            data_frequency='minute',
386
            sim_params=sim_params,
387
            env=self.env,
388
        )
389
390
        with self.assertRaises(HistoryInInitialize):
391
            test_algo.initialize()
392
393
    def test_minute_basic_functionality(self):
394
        # get a 5-bar minute history from the very end of the available data
395
        window = self.get_portal().get_history_window(
396
            [1],
397
            pd.Timestamp("2014-03-21 18:23:00+00:00", tz='UTC'),
398
            5,
399
            "1m",
400
            "open_price"
401
        )
402
403
        self.assertEqual(len(window), 5)
404
        reference = [534.469, 534.471, 534.475, 534.477, 534.477]
405
        for i in range(0, 4):
406
            self.assertEqual(window.iloc[-5 + i].loc[1], reference[i])
407
408
    def test_minute_splits(self):
409
        portal = self.get_portal()
410
411
        window = portal.get_history_window(
412
            [1],
413
            pd.Timestamp("2014-03-21 18:30:00+00:00", tz='UTC'),
414
            1000,
415
            "1m",
416
            "open_price"
417
        )
418
419
        self.assertEqual(len(window), 1000)
420
421
        # there are two splits for AAPL (on 2014-03-20 and 2014-03-21),
422
        # each with ratio 0.5).
423
424
        day1_end = pd.Timestamp("2014-03-19 20:00", tz='UTC')
425
        day2_start = pd.Timestamp("2014-03-20 13:31", tz='UTC')
426
        day2_end = pd.Timestamp("2014-03-20 20:00", tz='UTC')
427
        day3_start = pd.Timestamp("2014-03-21 13:31", tz='UTC')
428
429
        self.assertEquals(window.loc[day1_end, 1], 533.086)
430
        self.assertEquals(window.loc[day2_start, 1], 533.087)
431
        self.assertEquals(window.loc[day2_end, 1], 533.853)
432
        self.assertEquals(window.loc[day3_start, 1], 533.854)
433
434
    def test_minute_window_starts_before_trading_start(self):
435
        portal = self.get_portal()
436
437
        # get a 50-bar minute history for MSFT starting 5 minutes into 3/20,
438
        # its first trading day
439
        window = portal.get_history_window(
440
            [2],
441
            pd.Timestamp("2014-03-20 13:35:00", tz='UTC'),
442
            50,
443
            "1m",
444
            "high",
445
        )
446
447
        self.assertEqual(len(window), 50)
448
        reference = [107.081, 109.476, 102.316, 107.861, 106.040]
449
        for i in range(0, 4):
450
            self.assertEqual(window.iloc[-5 + i].loc[2], reference[i])
451
452
        # get history for two securities at the same time, where one starts
453
        # trading a day later than the other
454
        window2 = portal.get_history_window(
455
            [1, 2],
456
            pd.Timestamp("2014-03-20 13:35:00", tz='UTC'),
457
            50,
458
            "1m",
459
            "low",
460
        )
461
462
        self.assertEqual(len(window2), 50)
463
        reference2 = {
464
            1: [1059.318, 1055.914, 1061.136, 1063.698, 1055.964],
465
            2: [98.902, 99.841, 90.984, 99.891, 98.027]
466
        }
467
468
        for i in range(0, 45):
469
            self.assertFalse(np.isnan(window2.iloc[i].loc[1]))
470
471
            # there should be 45 NaNs for MSFT until it starts trading
472
            self.assertTrue(np.isnan(window2.iloc[i].loc[2]))
473
474
        for i in range(0, 4):
475
            self.assertEquals(window2.iloc[-5 + i].loc[1],
476
                              reference2[1][i])
477
            self.assertEquals(window2.iloc[-5 + i].loc[2],
478
                              reference2[2][i])
479
480
    def test_minute_window_ends_before_trading_start(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
481
        # entire window is before the trading start
482
        window = self.get_portal().get_history_window(
483
            [2],
484
            pd.Timestamp("2014-02-05 14:35:00", tz='UTC'),
485
            100,
486
            "1m",
487
            "high"
488
        )
489
490
        self.assertEqual(len(window), 100)
491
        for i in range(0, 100):
492
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
493
494
    def test_minute_window_ends_after_trading_end(self):
495
        portal = self.get_portal()
496
497
        window = portal.get_history_window(
498
            [2],
499
            pd.Timestamp("2014-03-24 13:35:00", tz='UTC'),
500
            50,
501
            "1m",
502
            "high",
503
        )
504
505
        # should be 45 non-NaNs then 5 NaNs as MSFT has stopped trading at
506
        # the end of the day 2014-03-21 (and the 22nd and 23rd is weekend)
507
        self.assertEqual(len(window), 50)
508
509
        for i in range(0, 45):
510
            self.assertFalse(np.isnan(window.iloc[i].loc[2]))
511
512
        for i in range(46, 50):
513
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
514
515
    def test_minute_window_starts_after_trading_end(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
516
        # entire window is after the trading end
517
        window = self.get_portal().get_history_window(
518
            [2],
519
            pd.Timestamp("2014-04-02 14:35:00", tz='UTC'),
520
            100,
521
            "1m",
522
            "high"
523
        )
524
525
        self.assertEqual(len(window), 100)
526
        for i in range(0, 100):
527
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
528
529
    def test_minute_window_starts_before_1_2_2002(self):
530
        window = self.get_portal().get_history_window(
531
            [3],
532
            pd.Timestamp("2002-01-02 14:35:00", tz='UTC'),
533
            50,
534
            "1m",
535
            "close_price"
536
        )
537
538
        self.assertEqual(len(window), 50)
539
        for i in range(0, 45):
540
            self.assertTrue(np.isnan(window.iloc[i].loc[3]))
541
542
        for i in range(46, 50):
543
            self.assertFalse(np.isnan(window.iloc[i].loc[3]))
544
545
    def test_minute_early_close(self):
546
        # market was closed early on 7/3, and that's reflected in our
547
        # fake IBM minute data.  also, IBM had a split that takes effect
548
        # right after the early close.
549
550
        # five minutes into the day after an early close, get 20 1m bars
551
        window = self.get_portal().get_history_window(
552
            [self.IBM],
553
            pd.Timestamp("2014-07-07 13:35:00", tz='UTC'),
554
            20,
555
            "1m",
556
            "high"
557
        )
558
559
        self.assertEqual(len(window), 20)
560
561
        reference = [27134.486, 27134.802, 27134.660, 27132.813, 27130.964,
562
                     27133.767, 27133.268, 27131.510, 27134.946, 27132.400,
563
                     27134.350, 27130.588, 27132.528, 27130.418, 27131.040,
564
                     27132.664, 27131.307, 27133.978, 27132.779, 27134.476]
565
566
        for i in range(0, 20):
567
            self.assertAlmostEquals(window.iloc[i].loc[self.IBM], reference[i])
568
569
    def test_minute_merger(self):
570
        def check(field, ref):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
571
            window = self.get_portal().get_history_window(
572
                [self.C],
573
                pd.Timestamp("2014-07-16 13:35", tz='UTC'),
574
                10,
575
                "1m",
576
                field
577
            )
578
579
            self.assertEqual(len(window), len(ref))
580
581
            for i in range(0, len(ref) - 1):
582
                self.assertEquals(window.iloc[i].loc[self.C], ref[i])
583
584
        open_ref = [71.99, 71.991, 71.992, 71.996, 71.996,
585
                    72.000, 72.001, 72.002, 72.004, 72.005]
586
        high_ref = [77.334, 80.196, 80.387, 72.331, 79.184,
587
                    75.439, 81.176, 78.564, 80.498, 82.000]
588
        low_ref = [62.621, 70.427, 65.572, 68.357, 63.623,
589
                   69.805, 67.245, 64.238, 64.487, 71.864]
590
        close_ref = [69.977, 75.311, 72.979, 70.344, 71.403,
591
                     72.622, 74.210, 71.401, 72.492, 73.669]
592
        vol_ref = [12663, 12662, 12661, 12661, 12660, 12661,
593
                   12663, 12662, 12663, 12662]
594
595
        check("open_price", open_ref)
596
        check("high", high_ref)
597
        check("low", low_ref)
598
        check("close_price", close_ref)
599
        check("price", close_ref)
600
        check("volume", vol_ref)
601
602
    def test_minute_forward_fill(self):
603
        # only forward fill if ffill=True AND we are asking for "price"
604
605
        # our fake TSLA data (sid 4) is missing a bunch of minute bars
606
        # right after the open on 2002-01-02
607
608
        for field in ["open_price", "high", "low", "volume", "close_price"]:
609
            no_ffill = self.get_portal().get_history_window(
610
                [4],
611
                pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
612
                390,
613
                "1m",
614
                field
615
            )
616
617
            missing_bar_indices = [1, 3, 5, 7, 9, 11, 13]
618
            if field == 'volume':
619
                for bar_idx in missing_bar_indices:
620
                    self.assertEqual(no_ffill.iloc[bar_idx].loc[4], 0)
621
            else:
622
                for bar_idx in missing_bar_indices:
623
                    self.assertTrue(np.isnan(no_ffill.iloc[bar_idx].loc[4]))
624
625
        ffill_window = self.get_portal().get_history_window(
626
            [4],
627
            pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
628
            390,
629
            "1m",
630
            "price"
631
        )
632
633
        for i in range(0, 390):
634
            self.assertFalse(np.isnan(ffill_window.iloc[i].loc[4]))
635
636
        # 2002-01-02 14:31:00+00:00  126.183
637
        # 2002-01-02 14:32:00+00:00  126.183
638
        # 2002-01-02 14:33:00+00:00  125.648
639
        # 2002-01-02 14:34:00+00:00  125.648
640
        # 2002-01-02 14:35:00+00:00  126.016
641
        # 2002-01-02 14:36:00+00:00  126.016
642
        # 2002-01-02 14:37:00+00:00  127.918
643
        # 2002-01-02 14:38:00+00:00  127.918
644
        # 2002-01-02 14:39:00+00:00  126.423
645
        # 2002-01-02 14:40:00+00:00  126.423
646
        # 2002-01-02 14:41:00+00:00  129.825
647
        # 2002-01-02 14:42:00+00:00  129.825
648
        # 2002-01-02 14:43:00+00:00  125.392
649
        # 2002-01-02 14:44:00+00:00  125.392
650
651
        vals = [126.183, 125.648, 126.016, 127.918, 126.423, 129.825, 125.392]
652
        for idx, val in enumerate(vals):
653
            self.assertEqual(ffill_window.iloc[2 * idx].loc[4], val)
654
            self.assertEqual(ffill_window.iloc[(2 * idx) + 1].loc[4], val)
655
656
        # make sure that if we pass ffill=False with field="price", we do
657
        # not ffill
658
        really_no_ffill_window = self.get_portal().get_history_window(
659
            [4],
660
            pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
661
            390,
662
            "1m",
663
            "price",
664
            ffill=False
665
        )
666
667
        for idx, val in enumerate(vals):
668
            idx1 = 2 * idx
669
            idx2 = idx1 + 1
670
            self.assertEqual(really_no_ffill_window.iloc[idx1].loc[4], val)
671
            self.assertTrue(np.isnan(really_no_ffill_window.iloc[idx2].loc[4]))
672
673
    def test_daily_functionality(self):
674
        # 9 daily bars
675
        # 2014-03-10,183999.0,186400.0,183601.0,186400.0,400
676
        # 2014-03-11,186925.0,187490.0,185910.0,187101.0,600
677
        # 2014-03-12,186498.0,187832.0,186005.0,187750.0,300
678
        # 2014-03-13,188150.0,188852.0,185254.0,185750.0,700
679
        # 2014-03-14,185825.0,186507.0,183418.0,183860.0,600
680
        # 2014-03-17,184350.0,185790.0,184350.0,185050.0,400
681
        # 2014-03-18,185400.0,185400.0,183860.0,184860.0,200
682
        # 2014-03-19,184860.0,185489.0,182764.0,183860.0,200
683
        # 2014-03-20,183999.0,186742.0,183630.0,186540.0,300
684
685
        # 5 one-minute bars that will be aggregated
686
        # 2014-03-21 13:31:00+00:00,185422401,185426332,185413974,185420153,304
687
        # 2014-03-21 13:32:00+00:00,185422402,185424165,185417717,185420941,300
688
        # 2014-03-21 13:33:00+00:00,185422403,185430663,185419420,185425041,303
689
        # 2014-03-21 13:34:00+00:00,185422403,185431290,185417079,185424184,302
690
        # 2014-03-21 13:35:00+00:00,185422405,185430210,185416293,185423251,302
691
692
        def run_query(field, values):
693
            window = self.get_portal().get_history_window(
694
                [self.BRKA],
695
                pd.Timestamp("2014-03-21 13:35", tz='UTC'),
696
                10,
697
                "1d",
698
                field
699
            )
700
701
            self.assertEqual(len(window), 10)
702
703
            for i in range(0, 10):
704
                self.assertEquals(window.iloc[i].loc[self.BRKA],
705
                                  values[i])
706
707
        # last value is the first minute's open
708
        opens = [183999, 186925, 186498, 188150, 185825, 184350,
709
                 185400, 184860, 183999, 185422.401]
710
711
        # last value is the last minute's close
712
        closes = [186400, 187101, 187750, 185750, 183860, 185050,
713
                  184860, 183860, 186540, 185423.251]
714
715
        # last value is the highest high value
716
        highs = [186400, 187490, 187832, 188852, 186507, 185790,
717
                 185400, 185489, 186742, 185431.290]
718
719
        # last value is the lowest low value
720
        lows = [183601, 185910, 186005, 185254, 183418, 184350, 183860,
721
                182764, 183630, 185413.974]
722
723
        # last value is the sum of all the minute volumes
724
        volumes = [400, 600, 300, 700, 600, 400, 200, 200, 300, 1511]
725
726
        run_query("open_price", opens)
727
        run_query("close_price", closes)
728
        run_query("price", closes)
729
        run_query("high", highs)
730
        run_query("low", lows)
731
        run_query("volume", volumes)
732
733
    def test_daily_splits_with_no_minute_data(self):
734
        # scenario is that we have daily data for AAPL through 6/11,
735
        # but we have no minute data for AAPL on 6/11. there's also a split
736
        # for AAPL on 6/9.
737
        splits = DataFrame(
738
            [
739
                {
740
                    'effective_date': str_to_seconds('2014-06-09'),
741
                    'ratio': (1 / 7.0),
742
                    'sid': self.AAPL,
743
                }
744
            ],
745
            columns=['effective_date', 'ratio', 'sid'])
746
747
        self.create_fake_adjustments(self.tempdir,
748
                                     "adjustments2.sqlite",
749
                                     splits=splits)
750
751
        portal = self.get_portal(adjustments_filename="adjustments2.sqlite")
752
753
        def test_window(field, reference, ffill=True):
754
            window = portal.get_history_window(
755
                [self.AAPL],
756
                pd.Timestamp("2014-06-11 15:30", tz='UTC'),
757
                6,
758
                "1d",
759
                field,
760
                ffill
761
            )
762
763
            self.assertEqual(len(window), 6)
764
765
            for i in range(0, 5):
766
                self.assertEquals(window.iloc[i].loc[self.AAPL],
767
                                  reference[i])
768
769
            if ffill and field == "price":
770
                last_val = window.iloc[5].loc[self.AAPL]
771
                second_to_last_val = window.iloc[4].loc[self.AAPL]
772
773
                self.assertEqual(last_val, second_to_last_val)
774
            else:
775
                if field == "volume":
776
                    self.assertEqual(window.iloc[5].loc[self.AAPL], 0)
777
                else:
778
                    self.assertTrue(np.isnan(window.iloc[5].loc[self.AAPL]))
779
780
        # 2014-06-04,637.4400099999999,647.8899690000001,636.110046,644.819992,p
781
        # 2014-06-05,646.20005,649.370003,642.610008,647.349983,75951400
782
        # 2014-06-06,649.900002,651.259979,644.469971,645.570023,87484600
783
        # 2014-06-09,92.699997,93.879997,91.75,93.699997,75415000
784
        # 2014-06-10,94.730003,95.050003,93.57,94.25,62777000
785
        open_data = [91.063, 92.314, 92.843, 92.699, 94.730]
786
        test_window("open_price", open_data, ffill=False)
787
        test_window("open_price", open_data)
788
789
        high_data = [92.556, 92.767, 93.037, 93.879, 95.050]
790
        test_window("high", high_data, ffill=False)
791
        test_window("high", high_data)
792
793
        low_data = [90.873, 91.801, 92.067, 91.750, 93.570]
794
        test_window("low", low_data, ffill=False)
795
        test_window("low", low_data)
796
797
        close_data = [92.117, 92.478, 92.224, 93.699, 94.250]
798
        test_window("close_price", close_data, ffill=False)
799
        test_window("close_price", close_data)
800
        test_window("price", close_data, ffill=False)
801
        test_window("price", close_data)
802
803
        vol_data = [587093500, 531659800, 612392200, 75415000, 62777000]
804
        test_window("volume", vol_data)
805
        test_window("volume", vol_data, ffill=False)
806
807
    def test_daily_window_starts_before_trading_start(self):
808
        portal = self.get_portal()
809
810
        # MSFT started on 3/3/2014, so try to go before that
811
        window = portal.get_history_window(
812
            [self.MSFT],
813
            pd.Timestamp("2014-03-05 13:35:00", tz='UTC'),
814
            5,
815
            "1d",
816
            "high"
817
        )
818
819
        self.assertEqual(len(window), 5)
820
821
        # should be two empty days, then 3/3 and 3/4, then
822
        # an empty day because we don't have minute data for 3/5
823
        self.assertTrue(np.isnan(window.iloc[0].loc[self.MSFT]))
824
        self.assertTrue(np.isnan(window.iloc[1].loc[self.MSFT]))
825
        self.assertEquals(window.iloc[2].loc[self.MSFT], 38.130)
826
        self.assertEquals(window.iloc[3].loc[self.MSFT], 38.48)
827
        self.assertTrue(np.isnan(window.iloc[4].loc[self.MSFT]))
828
829
    def test_daily_window_ends_before_trading_start(self):
830
        portal = self.get_portal()
831
832
        # MSFT started on 3/3/2014, so try to go before that
833
        window = portal.get_history_window(
834
            [self.MSFT],
835
            pd.Timestamp("2014-02-28 13:35:00", tz='UTC'),
836
            5,
837
            "1d",
838
            "high"
839
        )
840
841
        self.assertEqual(len(window), 5)
842
        for i in range(0, 5):
843
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
844
845
    def test_daily_window_starts_after_trading_end(self):
846
        # MSFT stopped trading EOD Friday 8/29/2014
847
        window = self.get_portal().get_history_window(
848
            [self.MSFT],
849
            pd.Timestamp("2014-09-12 13:35:00", tz='UTC'),
850
            8,
851
            "1d",
852
            "high",
853
        )
854
855
        self.assertEqual(len(window), 8)
856
        for i in range(0, 8):
857
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
858
859
    def test_daily_window_ends_after_trading_end(self):
860
        # MSFT stopped trading EOD Friday 8/29/2014
861
        window = self.get_portal().get_history_window(
862
            [self.MSFT],
863
            pd.Timestamp("2014-09-04 13:35:00", tz='UTC'),
864
            10,
865
            "1d",
866
            "high",
867
        )
868
869
        # should be 7 non-NaNs (8/21-8/22, 8/25-8/29) and 3 NaNs (9/2 - 9/4)
870
        # (9/1/2014 is labor day)
871
        self.assertEqual(len(window), 10)
872
873
        for i in range(0, 7):
874
            self.assertFalse(np.isnan(window.iloc[i].loc[self.MSFT]))
875
876
        for i in range(7, 10):
877
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
878
879
    def test_empty_sid_list(self):
880
        portal = self.get_portal()
881
882
        fields = ["open_price",
883
                  "close_price",
884
                  "high",
885
                  "low",
886
                  "volume",
887
                  "price"]
888
        freqs = ["1m", "1d"]
889
890
        for field in fields:
891
            for freq in freqs:
892
                window = portal.get_history_window(
893
                    [],
894
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
895
                    6,
896
                    freq,
897
                    field
898
                )
899
900
                self.assertEqual(len(window), 6)
901
902
                for i in range(0, 6):
903
                    self.assertEqual(len(window.iloc[i]), 0)
904
905
    def test_daily_window_starts_before_minute_data(self):
906
907
        env = TradingEnvironment()
908
        asset_info = make_simple_asset_info(
909
            [self.GS],
910
            Timestamp('1999-04-05'),
911
            Timestamp('2004-08-30'),
912
            ['GS']
913
        )
914
        env.write_data(equities_df=asset_info)
915
        portal = self.get_portal(env=env)
916
917
        window = portal.get_history_window(
918
            [self.GS],
919
            # 3rd day of daily data for GS, minute data starts in 2002.
920
            pd.Timestamp("1999-04-07 14:35:00", tz='UTC'),
921
            10,
922
            "1d",
923
            "low"
924
        )
925
926
        # 12/20, 12/21, 12/24, 12/26, 12/27, 12/28, 12/31 should be NaNs
927
        # 1/2 and 1/3 should be non-NaN
928
        # 1/4 should be NaN (since we don't have minute data for it)
929
930
        self.assertEqual(len(window), 10)
931
932
        for i in range(0, 7):
933
            self.assertTrue(np.isnan(window.iloc[i].loc[self.GS]))
934
935
        for i in range(8, 9):
936
            self.assertFalse(np.isnan(window.iloc[i].loc[self.GS]))
937
938
        self.assertTrue(np.isnan(window.iloc[9].loc[self.GS]))
939
940
    def test_minute_window_ends_before_1_2_2002(self):
941
        with self.assertRaises(ValueError):
942
            self.get_portal().get_history_window(
943
                [self.GS],
944
                pd.Timestamp("2001-12-31 14:35:00", tz='UTC'),
945
                50,
946
                "1m",
947
                "close_price"
948
            )
949
950
    def test_bad_history_inputs(self):
951
        portal = self.get_portal()
952
953
        # bad fieldname
954
        for field in ["foo", "bar", "", "5"]:
955
            with self.assertRaises(ValueError):
956
                portal.get_history_window(
957
                    [self.AAPL],
958
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
959
                    6,
960
                    "1d",
961
                    field
962
                )
963
964
        # bad frequency
965
        for freq in ["2m", "30m", "3d", "300d", "", "5"]:
966
            with self.assertRaises(ValueError):
967
                portal.get_history_window(
968
                    [self.AAPL],
969
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
970
                    6,
971
                    freq,
972
                    "volume"
973
                )
974
975
    def test_daily_merger(self):
976
        def check(field, ref):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
977
            window = self.get_portal().get_history_window(
978
                [self.C],
979
                pd.Timestamp("2014-07-17 13:35", tz='UTC'),
980
                4,
981
                "1d",
982
                field
983
            )
984
985
            self.assertEqual(len(window), len(ref),)
986
987
            for i in range(0, len(ref) - 1):
988
                self.assertEquals(window.iloc[i].loc[self.C], ref[i], i)
989
990
        # 2014-07-14 00:00:00+00:00,139.18,139.14,139.2,139.17,12351
991
        # 2014-07-15 00:00:00+00:00,139.2,139.2,139.18,139.19,12354
992
        # 2014-07-16 00:00:00+00:00,69.58,69.56,69.57,69.565,12352
993
        # 2014-07-17 13:31:00+00:00,72767,80146,63406,71776,12876
994
        # 2014-07-17 13:32:00+00:00,72769,76943,68907,72925,12875
995
        # 2014-07-17 13:33:00+00:00,72771,76127,63194,69660,12875
996
        # 2014-07-17 13:34:00+00:00,72774,79349,69771,74560,12877
997
        # 2014-07-17 13:35:00+00:00,72776,75340,68970,72155,12879
998
999
        open_ref = [69.59, 69.6, 69.58, 72.767]
1000
        high_ref = [69.57, 69.6, 69.56, 80.146]
1001
        low_ref = [69.6, 69.59, 69.57, 63.194]
1002
        close_ref = [69.585, 69.595, 69.565, 72.155]
1003
        vol_ref = [12351, 12354, 12352, 64382]
1004
1005
        check("open_price", open_ref)
1006
        check("high", high_ref)
1007
        check("low", low_ref)
1008
        check("close_price", close_ref)
1009
        check("price", close_ref)
1010
        check("volume", vol_ref)
1011
1012
    def test_minute_adjustments_as_of_lookback_date(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1013
        # AAPL has splits on 2014-03-20 and 2014-03-21
1014
        window_0320 = self.get_portal().get_history_window(
1015
            [self.AAPL],
1016
            pd.Timestamp("2014-03-20 13:35", tz='UTC'),
1017
            395,
1018
            "1m",
1019
            "open_price"
1020
        )
1021
1022
        window_0321 = self.get_portal().get_history_window(
1023
            [self.AAPL],
1024
            pd.Timestamp("2014-03-21 13:35", tz='UTC'),
1025
            785,
1026
            "1m",
1027
            "open_price"
1028
        )
1029
1030
        for i in range(0, 395):
1031
            # history on 3/20, since the 3/21 0.5 split hasn't
1032
            # happened yet, should return values 2x larger than history on
1033
            # 3/21
1034
            self.assertEqual(window_0320.iloc[i].loc[self.AAPL],
1035
                             window_0321.iloc[i].loc[self.AAPL] * 2)
1036
1037
    def test_daily_adjustments_as_of_lookback_date(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1038
        window_0402 = self.get_portal().get_history_window(
1039
            [self.IBM],
1040
            pd.Timestamp("2014-04-02 13:35", tz='UTC'),
1041
            23,
1042
            "1d",
1043
            "open_price"
1044
        )
1045
1046
        window_0702 = self.get_portal().get_history_window(
1047
            [self.IBM],
1048
            pd.Timestamp("2014-07-02 13:35", tz='UTC'),
1049
            86,
1050
            "1d",
1051
            "open_price"
1052
        )
1053
1054
        for i in range(0, 22):
1055
            self.assertEqual(window_0402.iloc[i].loc[self.IBM],
1056
                             window_0702.iloc[i].loc[self.IBM] * 2)
1057
1058
    def test_minute_dividends(self):
1059
        def check(field, ref):
1060
            window = self.get_portal().get_history_window(
1061
                [self.DIVIDEND_SID],
1062
                pd.Timestamp("2014-03-18 13:35", tz='UTC'),
1063
                10,
1064
                "1m",
1065
                field
1066
            )
1067
1068
            self.assertEqual(len(window), len(ref))
1069
1070
            np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref)
1071
1072
        # the DIVIDEND stock has dividends on 2014-03-18 (0.98)
1073
        # 2014-03-17 19:56:00+00:00,118923,123229,112445,117837,2273
1074
        # 2014-03-17 19:57:00+00:00,118927,122997,117911,120454,2274
1075
        # 2014-03-17 19:58:00+00:00,118930,129112,111136,120124,2274
1076
        # 2014-03-17 19:59:00+00:00,118932,126147,112112,119129,2276
1077
        # 2014-03-17 20:00:00+00:00,118932,124541,108717,116628,2275
1078
        # 2014-03-18 13:31:00+00:00,116457,120731,114148,117439,2274
1079
        # 2014-03-18 13:32:00+00:00,116461,116520,106572,111546,2275
1080
        # 2014-03-18 13:33:00+00:00,116461,117115,108506,112810,2274
1081
        # 2014-03-18 13:34:00+00:00,116461,119787,108861,114323,2273
1082
        # 2014-03-18 13:35:00+00:00,116464,117221,112698,114960,2272
1083
1084
        open_ref = [116.545,  # 2014-03-17 19:56:00+00:00
1085
                    116.548,  # 2014-03-17 19:57:00+00:00
1086
                    116.551,  # 2014-03-17 19:58:00+00:00
1087
                    116.553,  # 2014-03-17 19:59:00+00:00
1088
                    116.553,  # 2014-03-17 20:00:00+00:00
1089
                    116.457,  # 2014-03-18 13:31:00+00:00
1090
                    116.461,  # 2014-03-18 13:32:00+00:00
1091
                    116.461,  # 2014-03-18 13:33:00+00:00
1092
                    116.461,  # 2014-03-18 13:34:00+00:00
1093
                    116.464]  # 2014-03-18 13:35:00+00:00
1094
1095
        high_ref = [120.764,  # 2014-03-17 19:56:00+00:00
1096
                    120.537,  # 2014-03-17 19:57:00+00:00
1097
                    126.530,  # 2014-03-17 19:58:00+00:00
1098
                    123.624,  # 2014-03-17 19:59:00+00:00
1099
                    122.050,  # 2014-03-17 20:00:00+00:00
1100
                    120.731,  # 2014-03-18 13:31:00+00:00
1101
                    116.520,  # 2014-03-18 13:32:00+00:00
1102
                    117.115,  # 2014-03-18 13:33:00+00:00
1103
                    119.787,  # 2014-03-18 13:34:00+00:00
1104
                    117.221]  # 2014-03-18 13:35:00+00:00
1105
1106
        low_ref = [110.196,  # 2014-03-17 19:56:00+00:00
1107
                   115.553,  # 2014-03-17 19:57:00+00:00
1108
                   108.913,  # 2014-03-17 19:58:00+00:00
1109
                   109.870,  # 2014-03-17 19:59:00+00:00
1110
                   106.543,  # 2014-03-17 20:00:00+00:00
1111
                   114.148,  # 2014-03-18 13:31:00+00:00
1112
                   106.572,  # 2014-03-18 13:32:00+00:00
1113
                   108.506,  # 2014-03-18 13:33:00+00:00
1114
                   108.861,  # 2014-03-18 13:34:00+00:00
1115
                   112.698]  # 2014-03-18 13:35:00+00:00
1116
1117
        close_ref = [115.480,  # 2014-03-17 19:56:00+00:00
1118
                     118.045,  # 2014-03-17 19:57:00+00:00
1119
                     117.722,  # 2014-03-17 19:58:00+00:00
1120
                     116.746,  # 2014-03-17 19:59:00+00:00
1121
                     114.295,  # 2014-03-17 20:00:00+00:00
1122
                     117.439,  # 2014-03-18 13:31:00+00:00
1123
                     111.546,  # 2014-03-18 13:32:00+00:00
1124
                     112.810,  # 2014-03-18 13:33:00+00:00
1125
                     114.323,  # 2014-03-18 13:34:00+00:00
1126
                     114.960]  # 2014-03-18 13:35:00+00:00
1127
1128
        volume_ref = [2273,  # 2014-03-17 19:56:00+00:00
1129
                      2274,  # 2014-03-17 19:57:00+00:00
1130
                      2274,  # 2014-03-17 19:58:00+00:00
1131
                      2276,  # 2014-03-17 19:59:00+00:00
1132
                      2275,  # 2014-03-17 20:00:00+00:00
1133
                      2274,  # 2014-03-18 13:31:00+00:00
1134
                      2275,  # 2014-03-18 13:32:00+00:00
1135
                      2274,  # 2014-03-18 13:33:00+00:00
1136
                      2273,  # 2014-03-18 13:34:00+00:00
1137
                      2272]  # 2014-03-18 13:35:00+00:00
1138
1139
        check("open_price", open_ref)
1140
        check("high", high_ref)
1141
        check("low", low_ref)
1142
        check("close_price", close_ref)
1143
        check("price", close_ref)
1144
        check("volume", volume_ref)
1145
1146
    def test_daily_dividends(self):
1147
        def check(field, ref):
1148
            window = self.get_portal().get_history_window(
1149
                [self.DIVIDEND_SID],
1150
                pd.Timestamp("2014-03-21 13:35", tz='UTC'),
1151
                6,
1152
                "1d",
1153
                field
1154
            )
1155
1156
            self.assertEqual(len(window), len(ref))
1157
1158
            np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref)
1159
1160
        # 2014-03-14 00:00:00+00:00,106408,106527,103498,105012,950
1161
        # 2014-03-17 00:00:00+00:00,106411,110252,99877,105064,950
1162
        # 2014-03-18 00:00:00+00:00,104194,110891,95342,103116,972
1163
        # 2014-03-19 00:00:00+00:00,104198,107086,102615,104851,973
1164
        # 2014-03-20 00:00:00+00:00,100032,102989,92179,97584,1016
1165
        # 2014-03-21 13:31:00+00:00,114098,120818,110333,115575,2866
1166
        # 2014-03-21 13:32:00+00:00,114099,120157,105353,112755,2866
1167
        # 2014-03-21 13:33:00+00:00,114099,122263,108838,115550,2867
1168
        # 2014-03-21 13:34:00+00:00,114101,116620,106654,111637,2867
1169
        # 2014-03-21 13:35:00+00:00,114104,123773,107769,115771,2867
1170
1171
        open_ref = [100.108,  # 2014-03-14 00:00:00+00:00
1172
                    100.111,  # 2014-03-17 00:00:00+00:00
1173
                    100.026,  # 2014-03-18 00:00:00+00:00
1174
                    100.030,  # 2014-03-19 00:00:00+00:00
1175
                    100.032,  # 2014-03-20 00:00:00+00:00
1176
                    114.098]  # 2014-03-21 00:00:00+00:00
1177
1178
        high_ref = [100.221,  # 2014-03-14 00:00:00+00:00
1179
                    103.725,  # 2014-03-17 00:00:00+00:00
1180
                    106.455,  # 2014-03-18 00:00:00+00:00
1181
                    102.803,  # 2014-03-19 00:00:00+00:00
1182
                    102.988,  # 2014-03-20 00:00:00+00:00
1183
                    123.773]  # 2014-03-21 00:00:00+00:00
1184
1185
        low_ref = [97.370,  # 2014-03-14 00:00:00+00:00
1186
                   93.964,  # 2014-03-17 00:00:00+00:00
1187
                   91.528,  # 2014-03-18 00:00:00+00:00
1188
                   98.510,  # 2014-03-19 00:00:00+00:00
1189
                   92.179,  # 2014-03-20 00:00:00+00:00
1190
                   105.353]  # 2014-03-21 00:00:00+00:00
1191
1192
        close_ref = [98.795,  # 2014-03-14 00:00:00+00:00
1193
                     98.844,  # 2014-03-17 00:00:00+00:00
1194
                     98.991,  # 2014-03-18 00:00:00+00:00
1195
                     100.657,  # 2014-03-19 00:00:00+00:00
1196
                     97.584,  # 2014-03-20 00:00:00+00:00
1197
                     115.771]  # 2014-03-21 00:00:00+00:00
1198
1199
        volume_ref = [950,  # 2014-03-14 00:00:00+00:00
1200
                      950,  # 2014-03-17 00:00:00+00:00
1201
                      972,  # 2014-03-18 00:00:00+00:00
1202
                      973,  # 2014-03-19 00:00:00+00:00
1203
                      1016,  # 2014-03-20 00:00:00+00:00
1204
                      14333]  # 2014-03-21 00:00:00+00:00
1205
1206
        check("open_price", open_ref)
1207
        check("high", high_ref)
1208
        check("low", low_ref)
1209
        check("close_price", close_ref)
1210
        check("price", close_ref)
1211
        check("volume", volume_ref)
1212
1213
    @parameterized.expand([('open', 0),
1214
                           ('high', 10000),
1215
                           ('low', 20000),
1216
                           ('close', 30000),
1217
                           ('price', 30000),
1218
                           ('volume', 40000)])
1219
    def test_futures_history_minutes(self, field, offset):
1220
        # our history data, for self.FUTURE_ASSET, is 10,000 bars starting at
1221
        # self.futures_start_dt.  Those 10k bars are 24/7.
1222
1223
        # = 2015-11-30 18:50 UTC, 13:50 Eastern = during market hours
1224
        futures_end_dt = \
1225
            self.futures_start_dates[self.FUTURE_ASSET] + \
1226
            timedelta(minutes=9999)
1227
1228
        window = self.get_portal().get_history_window(
1229
            [self.FUTURE_ASSET],
1230
            futures_end_dt,
1231
            1000,
1232
            "1m",
1233
            field
1234
        )
1235
1236
        # check the minutes are right
1237
        reference_minutes = self.env.market_minute_window(
1238
            futures_end_dt, 1000, step=-1
1239
        )[::-1]
1240
1241
        np.testing.assert_array_equal(window.index, reference_minutes)
1242
1243
        # check the values
1244
1245
        # 2015-11-24 18:41
1246
        # ...
1247
        # 2015-11-24 21:00
1248
        # 2015-11-25 14:31
1249
        # ...
1250
        # 2015-11-25 21:00
1251
        # 2015-11-27 14:31
1252
        # ...
1253
        # 2015-11-27 18:00  # early close
1254
        # 2015-11-30 14:31
1255
        # ...
1256
        # 2015-11-30 18:50
1257
1258
        reference_values = pd.date_range(
1259
            start=self.futures_start_dates[self.FUTURE_ASSET],
1260
            end=futures_end_dt,
1261
            freq="T"
1262
        )
1263
1264
        for idx, dt in enumerate(window.index):
1265
            date_val = reference_values.searchsorted(dt)
1266
            self.assertEqual(offset + date_val,
1267
                             window.iloc[idx][self.FUTURE_ASSET])
1268
1269
    def test_history_minute_blended(self):
1270
        window = self.get_portal().get_history_window(
1271
            [self.FUTURE_ASSET2, self.AAPL],
1272
            pd.Timestamp("2014-03-21 20:00", tz='UTC'),
1273
            200,
1274
            "1m",
1275
            "price"
1276
        )
1277
1278
        # just a sanity check
1279
        self.assertEqual(200, len(window[self.AAPL]))
1280
        self.assertEqual(200, len(window[self.FUTURE_ASSET2]))
1281
1282
    def test_futures_history_daily(self):
1283
        # get 3 days ending 11/30 10:00 am Eastern
1284
        # = 11/25, 11/27 (half day), 11/30 (partial)
1285
1286
        window = self.get_portal().get_history_window(
1287
            [self.env.asset_finder.retrieve_asset(self.FUTURE_ASSET)],
1288
            pd.Timestamp("2015-11-30 15:00", tz='UTC'),
1289
            3,
1290
            "1d",
1291
            "high"
1292
        )
1293
1294
        self.assertEqual(3, len(window[self.FUTURE_ASSET]))
1295
1296
        np.testing.assert_array_equal([12929.0, 15629.0, 19769.0],
1297
                                      window.values.T[0])
1298