Completed
Pull Request — master (#858)
by Eddie
02:03
created

tests.TestHistoryContainerResize   B

Complexity

Total Complexity 36

Size/Duplication

Total Lines 223
Duplicated Lines 0 %
Metric Value
dl 0
loc 223
rs 8.8
wmc 36
1
from os.path import dirname, join, realpath
2
from textwrap import dedent
3
from unittest import TestCase
4
5
from testfixtures import TempDirectory
6
import numpy as np
7
from numpy import array
8
import pandas as pd
9
from pandas import (
10
    read_csv,
11
    Timestamp,
12
    DataFrame, DatetimeIndex)
13
14
from six import iteritems
15
from zipline import TradingAlgorithm
16
17
from zipline.data.data_portal import DataPortal
18
from zipline.data.us_equity_pricing import (
19
    DailyBarWriterFromCSVs,
20
    SQLiteAdjustmentWriter,
21
    SQLiteAdjustmentReader,
22
)
23
from zipline.errors import HistoryInInitialize
24
from zipline.utils.test_utils import (
25
    make_simple_asset_info,
26
    str_to_seconds,
27
    MockDailyBarReader
28
)
29
from zipline.data.minute_writer import MinuteBarWriterFromCSVs
30
from zipline.utils.tradingcalendar import trading_days
31
from zipline.finance.trading import (
32
    TradingEnvironment,
33
    SimulationParameters
34
)
35
36
TEST_MINUTE_RESOURCE_PATH = join(
37
    dirname(dirname(realpath(__file__))),  # zipline_repo/tests
38
    'tests',
39
    'resources',
40
    'history_inputs',
41
)
42
43
TEST_DAILY_RESOURCE_PATH = join(
44
    dirname(dirname(realpath(__file__))),  # zipline_repo/tests
45
    'tests',
46
    'resources',
47
    'pipeline_inputs',
48
)
49
50
51
class HistoryTestCase(TestCase):
52
    @classmethod
53
    def setUpClass(cls):
54
        cls.AAPL = 1
55
        cls.MSFT = 2
56
        cls.DELL = 3
57
        cls.TSLA = 4
58
        cls.BRKA = 5
59
        cls.IBM = 6
60
        cls.GS = 7
61
        cls.C = 8
62
        cls.DIVIDEND_SID = 9
63
        cls.assets = [cls.AAPL, cls.MSFT, cls.DELL, cls.TSLA, cls.BRKA,
64
                      cls.IBM, cls.GS, cls.C, cls.DIVIDEND_SID]
65
66
        asset_info = make_simple_asset_info(
67
            cls.assets,
68
            Timestamp('2014-03-03'),
69
            Timestamp('2014-08-30'),
70
            ['AAPL', 'MSFT', 'DELL', 'TSLA', 'BRKA', 'IBM', 'GS', 'C',
71
             'DIVIDEND_SID']
72
        )
73
        cls.env = TradingEnvironment()
74
        cls.env.write_data(equities_df=asset_info)
75
76
        cls.tempdir = TempDirectory()
77
        cls.tempdir.create()
78
79
        try:
80
            cls.create_fake_minute_data(cls.tempdir)
81
            cls.create_fake_daily_data(cls.tempdir)
82
83
            splits = DataFrame([
84
                {'effective_date': str_to_seconds("2002-01-03"),
85
                 'ratio': 0.5,
86
                 'sid': cls.AAPL},
87
                {'effective_date': str_to_seconds("2014-03-20"),
88
                 'ratio': 0.5,
89
                 'sid': cls.AAPL},
90
                {'effective_date': str_to_seconds("2014-03-21"),
91
                 'ratio': 0.5,
92
                 'sid': cls.AAPL},
93
                {'effective_date': str_to_seconds("2014-04-01"),
94
                 'ratio': 0.5,
95
                 'sid': cls.IBM},
96
                {'effective_date': str_to_seconds("2014-07-01"),
97
                 'ratio': 0.5,
98
                 'sid': cls.IBM},
99
                {'effective_date': str_to_seconds("2014-07-07"),
100
                 'ratio': 0.5,
101
                 'sid': cls.IBM}],
102
                columns=['effective_date', 'ratio', 'sid'],
103
            )
104
105
            mergers = DataFrame([
106
                {'effective_date': str_to_seconds("2014-07-16"),
107
                 'ratio': 0.5,
108
                 'sid': cls.C}
109
            ],
110
                columns=['effective_date', 'ratio', 'sid'])
111
112
            dividends = DataFrame([
113
                {'ex_date':
114
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
115
                 'record_date':
116
                 Timestamp("2014-03-19", tz='UTC').to_datetime64(),
117
                 'declared_date':
118
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
119
                 'pay_date':
120
                 Timestamp("2014-03-20", tz='UTC').to_datetime64(),
121
                 'amount': 2.0,
122
                 'sid': cls.DIVIDEND_SID},
123
                {'ex_date':
124
                 Timestamp("2014-03-20", tz='UTC').to_datetime64(),
125
                 'record_date':
126
                 Timestamp("2014-03-21", tz='UTC').to_datetime64(),
127
                 'declared_date':
128
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
129
                 'pay_date':
130
                 Timestamp("2014-03-23", tz='UTC').to_datetime64(),
131
                 'amount': 4.0,
132
                 'sid': cls.DIVIDEND_SID}],
133
                columns=['ex_date',
134
                         'record_date',
135
                         'declared_date',
136
                         'pay_date',
137
                         'amount',
138
                         'sid'])
139
140
            cls.create_fake_adjustments(cls.tempdir,
141
                                        "adjustments.sqlite",
142
                                        splits=splits,
143
                                        mergers=mergers,
144
                                        dividends=dividends)
145
        except:
146
            cls.tempdir.cleanup()
147
            raise
148
149
    @classmethod
150
    def tearDownClass(cls):
151
        cls.tempdir.cleanup()
152
153
    @classmethod
154
    def create_fake_minute_data(cls, tempdir):
155
        resources = {
156
            cls.AAPL: join(TEST_MINUTE_RESOURCE_PATH, 'AAPL_minute.csv.gz'),
157
            cls.MSFT: join(TEST_MINUTE_RESOURCE_PATH, 'MSFT_minute.csv.gz'),
158
            cls.DELL: join(TEST_MINUTE_RESOURCE_PATH, 'DELL_minute.csv.gz'),
159
            cls.TSLA: join(TEST_MINUTE_RESOURCE_PATH, "TSLA_minute.csv.gz"),
160
            cls.BRKA: join(TEST_MINUTE_RESOURCE_PATH, "BRKA_minute.csv.gz"),
161
            cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"),
162
            cls.GS:
163
            join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"),  # unused
164
            cls.C: join(TEST_MINUTE_RESOURCE_PATH, "C_minute.csv.gz"),
165
            cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH,
166
                                   "DIVIDEND_minute.csv.gz")
167
        }
168
169
        MinuteBarWriterFromCSVs(resources).write(tempdir.path, cls.assets)
170
171
    @classmethod
172
    def create_fake_daily_data(cls, tempdir):
173
        resources = {
174
            cls.AAPL: join(TEST_DAILY_RESOURCE_PATH, 'AAPL.csv'),
175
            cls.MSFT: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),
176
            cls.DELL: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),  # unused
177
            cls.TSLA: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),  # unused
178
            cls.BRKA: join(TEST_DAILY_RESOURCE_PATH, 'BRK-A.csv'),
179
            cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, 'IBM_daily.csv.gz'),
180
            cls.GS: join(TEST_MINUTE_RESOURCE_PATH, 'GS_daily.csv.gz'),
181
            cls.C: join(TEST_MINUTE_RESOURCE_PATH, 'C_daily.csv.gz'),
182
            cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH,
183
                                   'DIVIDEND_daily.csv.gz')
184
        }
185
        raw_data = {
186
            asset: read_csv(path, parse_dates=['day']).set_index('day')
187
            for asset, path in iteritems(resources)
188
        }
189
        for frame in raw_data.values():
190
            frame['price'] = frame['close']
191
192
        writer = DailyBarWriterFromCSVs(resources)
193
        data_path = tempdir.getpath('test_daily_data.bcolz')
194
        writer.write(data_path, trading_days, cls.assets)
195
196
    @classmethod
197
    def create_fake_adjustments(cls, tempdir, filename,
198
                                splits=None, mergers=None, dividends=None):
199
        writer = SQLiteAdjustmentWriter(tempdir.getpath(filename),
200
                                        cls.env.trading_days,
201
                                        MockDailyBarReader())
202
203
        if dividends is None:
204
            dividends = DataFrame(
205
                {
206
                    # Hackery to make the dtypes correct on an empty frame.
207
                    'ex_date': array([], dtype='datetime64[ns]'),
208
                    'pay_date': array([], dtype='datetime64[ns]'),
209
                    'record_date': array([], dtype='datetime64[ns]'),
210
                    'declared_date': array([], dtype='datetime64[ns]'),
211
                    'amount': array([], dtype=float),
212
                    'sid': array([], dtype=int),
213
                },
214
                index=DatetimeIndex([], tz='UTC'),
215
                columns=['ex_date',
216
                         'pay_date',
217
                         'record_date',
218
                         'declared_date',
219
                         'amount',
220
                         'sid']
221
                )
222
223
        if splits is None:
224
            splits = DataFrame(
225
                {
226
                    # Hackery to make the dtypes correct on an empty frame.
227
                    'effective_date': array([], dtype=int),
228
                    'ratio': array([], dtype=float),
229
                    'sid': array([], dtype=int),
230
                },
231
                index=DatetimeIndex([], tz='UTC'))
232
233
        if mergers is None:
234
            mergers = DataFrame(
235
                {
236
                    # Hackery to make the dtypes correct on an empty frame.
237
                    'effective_date': array([], dtype=int),
238
                    'ratio': array([], dtype=float),
239
                    'sid': array([], dtype=int),
240
                },
241
                index=DatetimeIndex([], tz='UTC'))
242
243
        writer.write(splits, mergers, dividends)
244
245
    def get_portal(self,
246
                   daily_equities_filename="test_daily_data.bcolz",
247
                   adjustments_filename="adjustments.sqlite",
248
                   env=None):
249
250
        if env is None:
251
            env = self.env
252
253
        temp_path = self.tempdir.path
254
255
        adjustment_reader = SQLiteAdjustmentReader(
256
            join(temp_path, adjustments_filename))
257
258
        return DataPortal(
259
            env,
260
            minutes_equities_path=temp_path,
261
            daily_equities_path=join(temp_path, daily_equities_filename),
262
            adjustment_reader=adjustment_reader
263
        )
264
265
    def test_history_in_initialize(self):
266
        algo_text = dedent(
267
            """\
268
            from zipline.api import history
269
270
            def initialize(context):
271
                history([24], 10, '1d', 'price')
272
273
            def handle_data(context, data):
274
                pass
275
            """
276
        )
277
278
        start = pd.Timestamp('2007-04-05', tz='UTC')
279
        end = pd.Timestamp('2007-04-10', tz='UTC')
280
281
        sim_params = SimulationParameters(
282
            period_start=start,
283
            period_end=end,
284
            capital_base=float("1.0e5"),
285
            data_frequency='minute',
286
            emission_rate='daily',
287
            env=self.env,
288
        )
289
290
        test_algo = TradingAlgorithm(
291
            script=algo_text,
292
            data_frequency='minute',
293
            sim_params=sim_params,
294
            env=self.env,
295
        )
296
297
        with self.assertRaises(HistoryInInitialize):
298
            test_algo.initialize()
299
300
    def test_minute_basic_functionality(self):
301
        # get a 5-bar minute history from the very end of the available data
302
        window = self.get_portal().get_history_window(
303
            [1],
304
            pd.Timestamp("2014-03-21 18:23:00+00:00", tz='UTC'),
305
            5,
306
            "1m",
307
            "open_price"
308
        )
309
310
        self.assertEqual(len(window), 5)
311
        reference = [534.469, 534.471, 534.475, 534.477, 534.477]
312
        for i in range(0, 4):
313
            self.assertEqual(window.iloc[-5 + i].loc[1], reference[i])
314
315
    def test_minute_splits(self):
316
        portal = self.get_portal()
317
318
        window = portal.get_history_window(
319
            [1],
320
            pd.Timestamp("2014-03-21 18:30:00+00:00", tz='UTC'),
321
            1000,
322
            "1m",
323
            "open_price"
324
        )
325
326
        self.assertEqual(len(window), 1000)
327
328
        # there are two splits for AAPL (on 2014-03-20 and 2014-03-21),
329
        # each with ratio 0.5).
330
331
        day1_end = pd.Timestamp("2014-03-19 20:00", tz='UTC')
332
        day2_start = pd.Timestamp("2014-03-20 13:31", tz='UTC')
333
        day2_end = pd.Timestamp("2014-03-20 20:00", tz='UTC')
334
        day3_start = pd.Timestamp("2014-03-21 13:31", tz='UTC')
335
336
        self.assertEquals(window.loc[day1_end, 1], 533.086)
337
        self.assertEquals(window.loc[day2_start, 1], 533.087)
338
        self.assertEquals(window.loc[day2_end, 1], 533.853)
339
        self.assertEquals(window.loc[day3_start, 1], 533.854)
340
341
    def test_minute_window_starts_before_trading_start(self):
342
        portal = self.get_portal()
343
344
        # get a 50-bar minute history for MSFT starting 5 minutes into 3/20,
345
        # its first trading day
346
        window = portal.get_history_window(
347
            [2],
348
            pd.Timestamp("2014-03-20 13:35:00", tz='UTC'),
349
            50,
350
            "1m",
351
            "high",
352
        )
353
354
        self.assertEqual(len(window), 50)
355
        reference = [107.081, 109.476, 102.316, 107.861, 106.040]
356
        for i in range(0, 4):
357
            self.assertEqual(window.iloc[-5 + i].loc[2], reference[i])
358
359
        # get history for two securities at the same time, where one starts
360
        # trading a day later than the other
361
        window2 = portal.get_history_window(
362
            [1, 2],
363
            pd.Timestamp("2014-03-20 13:35:00", tz='UTC'),
364
            50,
365
            "1m",
366
            "low",
367
        )
368
369
        self.assertEqual(len(window2), 50)
370
        reference2 = {
371
            1: [1059.318, 1055.914, 1061.136, 1063.698, 1055.964],
372
            2: [98.902, 99.841, 90.984, 99.891, 98.027]
373
        }
374
375
        for i in range(0, 45):
376
            self.assertFalse(np.isnan(window2.iloc[i].loc[1]))
377
378
            # there should be 45 NaNs for MSFT until it starts trading
379
            self.assertTrue(np.isnan(window2.iloc[i].loc[2]))
380
381
        for i in range(0, 4):
382
            self.assertEquals(window2.iloc[-5 + i].loc[1],
383
                              reference2[1][i])
384
            self.assertEquals(window2.iloc[-5 + i].loc[2],
385
                              reference2[2][i])
386
387
    def test_minute_window_ends_before_trading_start(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
388
        # entire window is before the trading start
389
        window = self.get_portal().get_history_window(
390
            [2],
391
            pd.Timestamp("2014-02-05 14:35:00", tz='UTC'),
392
            100,
393
            "1m",
394
            "high"
395
        )
396
397
        self.assertEqual(len(window), 100)
398
        for i in range(0, 100):
399
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
400
401
    def test_minute_window_ends_after_trading_end(self):
402
        portal = self.get_portal()
403
404
        window = portal.get_history_window(
405
            [2],
406
            pd.Timestamp("2014-03-24 13:35:00", tz='UTC'),
407
            50,
408
            "1m",
409
            "high",
410
        )
411
412
        # should be 45 non-NaNs then 5 NaNs as MSFT has stopped trading at
413
        # the end of the day 2014-03-21 (and the 22nd and 23rd is weekend)
414
        self.assertEqual(len(window), 50)
415
416
        for i in range(0, 45):
417
            self.assertFalse(np.isnan(window.iloc[i].loc[2]))
418
419
        for i in range(46, 50):
420
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
421
422
    def test_minute_window_starts_after_trading_end(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
423
        # entire window is after the trading end
424
        window = self.get_portal().get_history_window(
425
            [2],
426
            pd.Timestamp("2014-04-02 14:35:00", tz='UTC'),
427
            100,
428
            "1m",
429
            "high"
430
        )
431
432
        self.assertEqual(len(window), 100)
433
        for i in range(0, 100):
434
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))
435
436
    def test_minute_window_starts_before_1_2_2002(self):
437
        window = self.get_portal().get_history_window(
438
            [3],
439
            pd.Timestamp("2002-01-02 14:35:00", tz='UTC'),
440
            50,
441
            "1m",
442
            "close_price"
443
        )
444
445
        self.assertEqual(len(window), 50)
446
        for i in range(0, 45):
447
            self.assertTrue(np.isnan(window.iloc[i].loc[3]))
448
449
        for i in range(46, 50):
450
            self.assertFalse(np.isnan(window.iloc[i].loc[3]))
451
452
    def test_minute_early_close(self):
453
        # market was closed early on 7/3, and that's reflected in our
454
        # fake IBM minute data.  also, IBM had a split that takes effect
455
        # right after the early close.
456
457
        # five minutes into the day after an early close, get 20 1m bars
458
        window = self.get_portal().get_history_window(
459
            [self.IBM],
460
            pd.Timestamp("2014-07-07 13:35:00", tz='UTC'),
461
            20,
462
            "1m",
463
            "high"
464
        )
465
466
        self.assertEqual(len(window), 20)
467
468
        reference = [27134.486, 27134.802, 27134.660, 27132.813, 27130.964,
469
                     27133.767, 27133.268, 27131.510, 27134.946, 27132.400,
470
                     27134.350, 27130.588, 27132.528, 27130.418, 27131.040,
471
                     27132.664, 27131.307, 27133.978, 27132.779, 27134.476]
472
473
        for i in range(0, 20):
474
            self.assertAlmostEquals(window.iloc[i].loc[self.IBM], reference[i])
475
476
    def test_minute_merger(self):
477
        def check(field, ref):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
478
            window = self.get_portal().get_history_window(
479
                [self.C],
480
                pd.Timestamp("2014-07-16 13:35", tz='UTC'),
481
                10,
482
                "1m",
483
                field
484
            )
485
486
            self.assertEqual(len(window), len(ref))
487
488
            for i in range(0, len(ref) - 1):
489
                self.assertEquals(window.iloc[i].loc[self.C], ref[i])
490
491
        open_ref = [71.99, 71.991, 71.992, 71.996, 71.996,
492
                    72.000, 72.001, 72.002, 72.004, 72.005]
493
        high_ref = [77.334, 80.196, 80.387, 72.331, 79.184,
494
                    75.439, 81.176, 78.564, 80.498, 82.000]
495
        low_ref = [62.621, 70.427, 65.572, 68.357, 63.623,
496
                   69.805, 67.245, 64.238, 64.487, 71.864]
497
        close_ref = [69.977, 75.311, 72.979, 70.344, 71.403,
498
                     72.622, 74.210, 71.401, 72.492, 73.669]
499
        vol_ref = [12663, 12662, 12661, 12661, 12660, 12661,
500
                   12663, 12662, 12663, 12662]
501
502
        check("open_price", open_ref)
503
        check("high", high_ref)
504
        check("low", low_ref)
505
        check("close_price", close_ref)
506
        check("price", close_ref)
507
        check("volume", vol_ref)
508
509
    def test_minute_forward_fill(self):
510
        # only forward fill if ffill=True AND we are asking for "price"
511
512
        # our fake TSLA data (sid 4) is missing a bunch of minute bars
513
        # right after the open on 2002-01-02
514
515
        for field in ["open_price", "high", "low", "volume", "close_price"]:
516
            no_ffill = self.get_portal().get_history_window(
517
                [4],
518
                pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
519
                390,
520
                "1m",
521
                field
522
            )
523
524
            missing_bar_indices = [1, 3, 5, 7, 9, 11, 13]
525
            if field == 'volume':
526
                for bar_idx in missing_bar_indices:
527
                    self.assertEqual(no_ffill.iloc[bar_idx].loc[4], 0)
528
            else:
529
                for bar_idx in missing_bar_indices:
530
                    self.assertTrue(np.isnan(no_ffill.iloc[bar_idx].loc[4]))
531
532
        ffill_window = self.get_portal().get_history_window(
533
            [4],
534
            pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
535
            390,
536
            "1m",
537
            "price"
538
        )
539
540
        for i in range(0, 390):
541
            self.assertFalse(np.isnan(ffill_window.iloc[i].loc[4]))
542
543
        # 2002-01-02 14:31:00+00:00  126.183
544
        # 2002-01-02 14:32:00+00:00  126.183
545
        # 2002-01-02 14:33:00+00:00  125.648
546
        # 2002-01-02 14:34:00+00:00  125.648
547
        # 2002-01-02 14:35:00+00:00  126.016
548
        # 2002-01-02 14:36:00+00:00  126.016
549
        # 2002-01-02 14:37:00+00:00  127.918
550
        # 2002-01-02 14:38:00+00:00  127.918
551
        # 2002-01-02 14:39:00+00:00  126.423
552
        # 2002-01-02 14:40:00+00:00  126.423
553
        # 2002-01-02 14:41:00+00:00  129.825
554
        # 2002-01-02 14:42:00+00:00  129.825
555
        # 2002-01-02 14:43:00+00:00  125.392
556
        # 2002-01-02 14:44:00+00:00  125.392
557
558
        vals = [126.183, 125.648, 126.016, 127.918, 126.423, 129.825, 125.392]
559
        for idx, val in enumerate(vals):
560
            self.assertEqual(ffill_window.iloc[2 * idx].loc[4], val)
561
            self.assertEqual(ffill_window.iloc[(2 * idx) + 1].loc[4], val)
562
563
        # make sure that if we pass ffill=False with field="price", we do
564
        # not ffill
565
        really_no_ffill_window = self.get_portal().get_history_window(
566
            [4],
567
            pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
568
            390,
569
            "1m",
570
            "price",
571
            ffill=False
572
        )
573
574
        for idx, val in enumerate(vals):
575
            idx1 = 2 * idx
576
            idx2 = idx1 + 1
577
            self.assertEqual(really_no_ffill_window.iloc[idx1].loc[4], val)
578
            self.assertTrue(np.isnan(really_no_ffill_window.iloc[idx2].loc[4]))
579
580
    def test_daily_functionality(self):
581
        # 9 daily bars
582
        # 2014-03-10,183999.0,186400.0,183601.0,186400.0,400
583
        # 2014-03-11,186925.0,187490.0,185910.0,187101.0,600
584
        # 2014-03-12,186498.0,187832.0,186005.0,187750.0,300
585
        # 2014-03-13,188150.0,188852.0,185254.0,185750.0,700
586
        # 2014-03-14,185825.0,186507.0,183418.0,183860.0,600
587
        # 2014-03-17,184350.0,185790.0,184350.0,185050.0,400
588
        # 2014-03-18,185400.0,185400.0,183860.0,184860.0,200
589
        # 2014-03-19,184860.0,185489.0,182764.0,183860.0,200
590
        # 2014-03-20,183999.0,186742.0,183630.0,186540.0,300
591
592
        # 5 one-minute bars that will be aggregated
593
        # 2014-03-21 13:31:00+00:00,185422401,185426332,185413974,185420153,304
594
        # 2014-03-21 13:32:00+00:00,185422402,185424165,185417717,185420941,300
595
        # 2014-03-21 13:33:00+00:00,185422403,185430663,185419420,185425041,303
596
        # 2014-03-21 13:34:00+00:00,185422403,185431290,185417079,185424184,302
597
        # 2014-03-21 13:35:00+00:00,185422405,185430210,185416293,185423251,302
598
599
        def run_query(field, values):
600
            window = self.get_portal().get_history_window(
601
                [self.BRKA],
602
                pd.Timestamp("2014-03-21 13:35", tz='UTC'),
603
                10,
604
                "1d",
605
                field
606
            )
607
608
            self.assertEqual(len(window), 10)
609
610
            for i in range(0, 10):
611
                self.assertEquals(window.iloc[i].loc[self.BRKA],
612
                                  values[i])
613
614
        # last value is the first minute's open
615
        opens = [183999, 186925, 186498, 188150, 185825, 184350,
616
                 185400, 184860, 183999, 185422.401]
617
618
        # last value is the last minute's close
619
        closes = [186400, 187101, 187750, 185750, 183860, 185050,
620
                  184860, 183860, 186540, 185423.251]
621
622
        # last value is the highest high value
623
        highs = [186400, 187490, 187832, 188852, 186507, 185790,
624
                 185400, 185489, 186742, 185431.290]
625
626
        # last value is the lowest low value
627
        lows = [183601, 185910, 186005, 185254, 183418, 184350, 183860,
628
                182764, 183630, 185413.974]
629
630
        # last value is the sum of all the minute volumes
631
        volumes = [400, 600, 300, 700, 600, 400, 200, 200, 300, 1511]
632
633
        run_query("open_price", opens)
634
        run_query("close_price", closes)
635
        run_query("price", closes)
636
        run_query("high", highs)
637
        run_query("low", lows)
638
        run_query("volume", volumes)
639
640
    def test_daily_splits_with_no_minute_data(self):
641
        # scenario is that we have daily data for AAPL through 6/11,
642
        # but we have no minute data for AAPL on 6/11. there's also a split
643
        # for AAPL on 6/9.
644
        splits = DataFrame(
645
            [
646
                {
647
                    'effective_date': str_to_seconds('2014-06-09'),
648
                    'ratio': (1 / 7.0),
649
                    'sid': self.AAPL,
650
                }
651
            ],
652
            columns=['effective_date', 'ratio', 'sid'])
653
654
        self.create_fake_adjustments(self.tempdir,
655
                                     "adjustments2.sqlite",
656
                                     splits=splits)
657
658
        portal = self.get_portal(adjustments_filename="adjustments2.sqlite")
659
660
        def test_window(field, reference, ffill=True):
661
            window = portal.get_history_window(
662
                [self.AAPL],
663
                pd.Timestamp("2014-06-11 15:30", tz='UTC'),
664
                6,
665
                "1d",
666
                field,
667
                ffill
668
            )
669
670
            self.assertEqual(len(window), 6)
671
672
            for i in range(0, 5):
673
                self.assertEquals(window.iloc[i].loc[self.AAPL],
674
                                  reference[i])
675
676
            if ffill and field == "price":
677
                last_val = window.iloc[5].loc[self.AAPL]
678
                second_to_last_val = window.iloc[4].loc[self.AAPL]
679
680
                self.assertEqual(last_val, second_to_last_val)
681
            else:
682
                if field == "volume":
683
                    self.assertEqual(window.iloc[5].loc[self.AAPL], 0)
684
                else:
685
                    self.assertTrue(np.isnan(window.iloc[5].loc[self.AAPL]))
686
687
        # 2014-06-04,637.4400099999999,647.8899690000001,636.110046,644.819992,p
688
        # 2014-06-05,646.20005,649.370003,642.610008,647.349983,75951400
689
        # 2014-06-06,649.900002,651.259979,644.469971,645.570023,87484600
690
        # 2014-06-09,92.699997,93.879997,91.75,93.699997,75415000
691
        # 2014-06-10,94.730003,95.050003,93.57,94.25,62777000
692
        open_data = [91.063, 92.314, 92.843, 92.699, 94.730]
693
        test_window("open_price", open_data, ffill=False)
694
        test_window("open_price", open_data)
695
696
        high_data = [92.556, 92.767, 93.037, 93.879, 95.050]
697
        test_window("high", high_data, ffill=False)
698
        test_window("high", high_data)
699
700
        low_data = [90.873, 91.801, 92.067, 91.750, 93.570]
701
        test_window("low", low_data, ffill=False)
702
        test_window("low", low_data)
703
704
        close_data = [92.117, 92.478, 92.224, 93.699, 94.250]
705
        test_window("close_price", close_data, ffill=False)
706
        test_window("close_price", close_data)
707
        test_window("price", close_data, ffill=False)
708
        test_window("price", close_data)
709
710
        vol_data = [587093500, 531659800, 612392200, 75415000, 62777000]
711
        test_window("volume", vol_data)
712
        test_window("volume", vol_data, ffill=False)
713
714
    def test_daily_window_starts_before_trading_start(self):
715
        portal = self.get_portal()
716
717
        # MSFT started on 3/3/2014, so try to go before that
718
        window = portal.get_history_window(
719
            [self.MSFT],
720
            pd.Timestamp("2014-03-05 13:35:00", tz='UTC'),
721
            5,
722
            "1d",
723
            "high"
724
        )
725
726
        self.assertEqual(len(window), 5)
727
728
        # should be two empty days, then 3/3 and 3/4, then
729
        # an empty day because we don't have minute data for 3/5
730
        self.assertTrue(np.isnan(window.iloc[0].loc[self.MSFT]))
731
        self.assertTrue(np.isnan(window.iloc[1].loc[self.MSFT]))
732
        self.assertEquals(window.iloc[2].loc[self.MSFT], 38.130)
733
        self.assertEquals(window.iloc[3].loc[self.MSFT], 38.48)
734
        self.assertTrue(np.isnan(window.iloc[4].loc[self.MSFT]))
735
736
    def test_daily_window_ends_before_trading_start(self):
737
        portal = self.get_portal()
738
739
        # MSFT started on 3/3/2014, so try to go before that
740
        window = portal.get_history_window(
741
            [self.MSFT],
742
            pd.Timestamp("2014-02-28 13:35:00", tz='UTC'),
743
            5,
744
            "1d",
745
            "high"
746
        )
747
748
        self.assertEqual(len(window), 5)
749
        for i in range(0, 5):
750
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
751
752
    def test_daily_window_starts_after_trading_end(self):
753
        # MSFT stopped trading EOD Friday 8/29/2014
754
        window = self.get_portal().get_history_window(
755
            [self.MSFT],
756
            pd.Timestamp("2014-09-12 13:35:00", tz='UTC'),
757
            8,
758
            "1d",
759
            "high",
760
        )
761
762
        self.assertEqual(len(window), 8)
763
        for i in range(0, 8):
764
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
765
766
    def test_daily_window_ends_after_trading_end(self):
767
        # MSFT stopped trading EOD Friday 8/29/2014
768
        window = self.get_portal().get_history_window(
769
            [self.MSFT],
770
            pd.Timestamp("2014-09-04 13:35:00", tz='UTC'),
771
            10,
772
            "1d",
773
            "high",
774
        )
775
776
        # should be 7 non-NaNs (8/21-8/22, 8/25-8/29) and 3 NaNs (9/2 - 9/4)
777
        # (9/1/2014 is labor day)
778
        self.assertEqual(len(window), 10)
779
780
        for i in range(0, 7):
781
            self.assertFalse(np.isnan(window.iloc[i].loc[self.MSFT]))
782
783
        for i in range(7, 10):
784
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))
785
786
    def test_empty_sid_list(self):
787
        portal = self.get_portal()
788
789
        fields = ["open_price",
790
                  "close_price",
791
                  "high",
792
                  "low",
793
                  "volume",
794
                  "price"]
795
        freqs = ["1m", "1d"]
796
797
        for field in fields:
798
            for freq in freqs:
799
                window = portal.get_history_window(
800
                    [],
801
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
802
                    6,
803
                    freq,
804
                    field
805
                )
806
807
                self.assertEqual(len(window), 6)
808
809
                for i in range(0, 6):
810
                    self.assertEqual(len(window.iloc[i]), 0)
811
812
    def test_daily_window_starts_before_1_2_2002(self):
813
814
        env = TradingEnvironment()
815
        asset_info = make_simple_asset_info(
816
            [self.GS],
817
            Timestamp('1999-05-04'),
818
            Timestamp('2004-08-30'),
819
            ['GS']
820
        )
821
        env.write_data(equities_df=asset_info)
822
        portal = self.get_portal(env=env)
823
824
        window = portal.get_history_window(
825
            [self.GS],
826
            pd.Timestamp("2002-01-04 14:35:00", tz='UTC'),
827
            10,
828
            "1d",
829
            "low"
830
        )
831
832
        # 12/20, 12/21, 12/24, 12/26, 12/27, 12/28, 12/31 should be NaNs
833
        # 1/2 and 1/3 should be non-NaN
834
        # 1/4 should be NaN (since we don't have minute data for it)
835
836
        self.assertEqual(len(window), 10)
837
838
        for i in range(0, 7):
839
            self.assertTrue(np.isnan(window.iloc[i].loc[self.GS]))
840
841
        for i in range(8, 9):
842
            self.assertFalse(np.isnan(window.iloc[i].loc[self.GS]))
843
844
        self.assertTrue(np.isnan(window.iloc[9].loc[self.GS]))
845
846
    def test_minute_window_ends_before_1_2_2002(self):
847
        with self.assertRaises(ValueError):
848
            self.get_portal().get_history_window(
849
                [self.GS],
850
                pd.Timestamp("2001-12-31 14:35:00", tz='UTC'),
851
                50,
852
                "1d",
853
                "close_price"
854
            )
855
856
    def test_bad_history_inputs(self):
857
        portal = self.get_portal()
858
859
        # bad fieldname
860
        for field in ["foo", "bar", "", "5"]:
861
            with self.assertRaises(ValueError):
862
                portal.get_history_window(
863
                    [self.AAPL],
864
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
865
                    6,
866
                    "1d",
867
                    field
868
                )
869
870
        # bad frequency
871
        for freq in ["2m", "30m", "3d", "300d", "", "5"]:
872
            with self.assertRaises(ValueError):
873
                portal.get_history_window(
874
                    [self.AAPL],
875
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
876
                    6,
877
                    freq,
878
                    "volume"
879
                )
880
881
    def test_daily_merger(self):
882
        def check(field, ref):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
883
            window = self.get_portal().get_history_window(
884
                [self.C],
885
                pd.Timestamp("2014-07-17 13:35", tz='UTC'),
886
                4,
887
                "1d",
888
                field
889
            )
890
891
            self.assertEqual(len(window), len(ref),)
892
893
            for i in range(0, len(ref) - 1):
894
                self.assertEquals(window.iloc[i].loc[self.C], ref[i], i)
895
896
        # 2014-07-14 00:00:00+00:00,139.18,139.14,139.2,139.17,12351
897
        # 2014-07-15 00:00:00+00:00,139.2,139.2,139.18,139.19,12354
898
        # 2014-07-16 00:00:00+00:00,69.58,69.56,69.57,69.565,12352
899
        # 2014-07-17 13:31:00+00:00,72767,80146,63406,71776,12876
900
        # 2014-07-17 13:32:00+00:00,72769,76943,68907,72925,12875
901
        # 2014-07-17 13:33:00+00:00,72771,76127,63194,69660,12875
902
        # 2014-07-17 13:34:00+00:00,72774,79349,69771,74560,12877
903
        # 2014-07-17 13:35:00+00:00,72776,75340,68970,72155,12879
904
905
        open_ref = [69.59, 69.6, 69.58, 72.767]
906
        high_ref = [69.57, 69.6, 69.56, 80.146]
907
        low_ref = [69.6, 69.59, 69.57, 63.194]
908
        close_ref = [69.585, 69.595, 69.565, 72.155]
909
        vol_ref = [12351, 12354, 12352, 64382]
910
911
        check("open_price", open_ref)
912
        check("high", high_ref)
913
        check("low", low_ref)
914
        check("close_price", close_ref)
915
        check("price", close_ref)
916
        check("volume", vol_ref)
917
918
    def test_minute_adjustments_as_of_lookback_date(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
919
        # AAPL has splits on 2014-03-20 and 2014-03-21
920
        window_0320 = self.get_portal().get_history_window(
921
            [self.AAPL],
922
            pd.Timestamp("2014-03-20 13:35", tz='UTC'),
923
            395,
924
            "1m",
925
            "open_price"
926
        )
927
928
        window_0321 = self.get_portal().get_history_window(
929
            [self.AAPL],
930
            pd.Timestamp("2014-03-21 13:35", tz='UTC'),
931
            785,
932
            "1m",
933
            "open_price"
934
        )
935
936
        for i in range(0, 395):
937
            # history on 3/20, since the 3/21 0.5 split hasn't
938
            # happened yet, should return values 2x larger than history on
939
            # 3/21
940
            self.assertEqual(window_0320.iloc[i].loc[self.AAPL],
941
                             window_0321.iloc[i].loc[self.AAPL] * 2)
942
943
    def test_daily_adjustments_as_of_lookback_date(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
944
        window_0402 = self.get_portal().get_history_window(
945
            [self.IBM],
946
            pd.Timestamp("2014-04-02 13:35", tz='UTC'),
947
            23,
948
            "1d",
949
            "open_price"
950
        )
951
952
        window_0702 = self.get_portal().get_history_window(
953
            [self.IBM],
954
            pd.Timestamp("2014-07-02 13:35", tz='UTC'),
955
            86,
956
            "1d",
957
            "open_price"
958
        )
959
960
        for i in range(0, 22):
961
            self.assertEqual(window_0402.iloc[i].loc[self.IBM],
962
                             window_0702.iloc[i].loc[self.IBM] * 2)
963
964
    def test_minute_dividends(self):
965
        def check(field, ref):
966
            window = self.get_portal().get_history_window(
967
                [self.DIVIDEND_SID],
968
                pd.Timestamp("2014-03-18 13:35", tz='UTC'),
969
                10,
970
                "1m",
971
                field
972
            )
973
974
            self.assertEqual(len(window), len(ref))
975
976
            np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref)
977
978
        # the DIVIDEND stock has dividends on 2014-03-18 (0.98)
979
        # 2014-03-17 19:56:00+00:00,118923,123229,112445,117837,2273
980
        # 2014-03-17 19:57:00+00:00,118927,122997,117911,120454,2274
981
        # 2014-03-17 19:58:00+00:00,118930,129112,111136,120124,2274
982
        # 2014-03-17 19:59:00+00:00,118932,126147,112112,119129,2276
983
        # 2014-03-17 20:00:00+00:00,118932,124541,108717,116628,2275
984
        # 2014-03-18 13:31:00+00:00,116457,120731,114148,117439,2274
985
        # 2014-03-18 13:32:00+00:00,116461,116520,106572,111546,2275
986
        # 2014-03-18 13:33:00+00:00,116461,117115,108506,112810,2274
987
        # 2014-03-18 13:34:00+00:00,116461,119787,108861,114323,2273
988
        # 2014-03-18 13:35:00+00:00,116464,117221,112698,114960,2272
989
990
        open_ref = [116.545,  # 2014-03-17 19:56:00+00:00
991
                    116.548,  # 2014-03-17 19:57:00+00:00
992
                    116.551,  # 2014-03-17 19:58:00+00:00
993
                    116.553,  # 2014-03-17 19:59:00+00:00
994
                    116.553,  # 2014-03-17 20:00:00+00:00
995
                    116.457,  # 2014-03-18 13:31:00+00:00
996
                    116.461,  # 2014-03-18 13:32:00+00:00
997
                    116.461,  # 2014-03-18 13:33:00+00:00
998
                    116.461,  # 2014-03-18 13:34:00+00:00
999
                    116.464]  # 2014-03-18 13:35:00+00:00
1000
1001
        high_ref = [120.764,  # 2014-03-17 19:56:00+00:00
1002
                    120.537,  # 2014-03-17 19:57:00+00:00
1003
                    126.530,  # 2014-03-17 19:58:00+00:00
1004
                    123.624,  # 2014-03-17 19:59:00+00:00
1005
                    122.050,  # 2014-03-17 20:00:00+00:00
1006
                    120.731,  # 2014-03-18 13:31:00+00:00
1007
                    116.520,  # 2014-03-18 13:32:00+00:00
1008
                    117.115,  # 2014-03-18 13:33:00+00:00
1009
                    119.787,  # 2014-03-18 13:34:00+00:00
1010
                    117.221]  # 2014-03-18 13:35:00+00:00
1011
1012
        low_ref = [110.196,  # 2014-03-17 19:56:00+00:00
1013
                   115.553,  # 2014-03-17 19:57:00+00:00
1014
                   108.913,  # 2014-03-17 19:58:00+00:00
1015
                   109.870,  # 2014-03-17 19:59:00+00:00
1016
                   106.543,  # 2014-03-17 20:00:00+00:00
1017
                   114.148,  # 2014-03-18 13:31:00+00:00
1018
                   106.572,  # 2014-03-18 13:32:00+00:00
1019
                   108.506,  # 2014-03-18 13:33:00+00:00
1020
                   108.861,  # 2014-03-18 13:34:00+00:00
1021
                   112.698]  # 2014-03-18 13:35:00+00:00
1022
1023
        close_ref = [115.480,  # 2014-03-17 19:56:00+00:00
1024
                     118.045,  # 2014-03-17 19:57:00+00:00
1025
                     117.722,  # 2014-03-17 19:58:00+00:00
1026
                     116.746,  # 2014-03-17 19:59:00+00:00
1027
                     114.295,  # 2014-03-17 20:00:00+00:00
1028
                     117.439,  # 2014-03-18 13:31:00+00:00
1029
                     111.546,  # 2014-03-18 13:32:00+00:00
1030
                     112.810,  # 2014-03-18 13:33:00+00:00
1031
                     114.323,  # 2014-03-18 13:34:00+00:00
1032
                     114.960]  # 2014-03-18 13:35:00+00:00
1033
1034
        volume_ref = [2273,  # 2014-03-17 19:56:00+00:00
1035
                      2274,  # 2014-03-17 19:57:00+00:00
1036
                      2274,  # 2014-03-17 19:58:00+00:00
1037
                      2276,  # 2014-03-17 19:59:00+00:00
1038
                      2275,  # 2014-03-17 20:00:00+00:00
1039
                      2274,  # 2014-03-18 13:31:00+00:00
1040
                      2275,  # 2014-03-18 13:32:00+00:00
1041
                      2274,  # 2014-03-18 13:33:00+00:00
1042
                      2273,  # 2014-03-18 13:34:00+00:00
1043
                      2272]  # 2014-03-18 13:35:00+00:00
1044
1045
        check("open_price", open_ref)
1046
        check("high", high_ref)
1047
        check("low", low_ref)
1048
        check("close_price", close_ref)
1049
        check("price", close_ref)
1050
        check("volume", volume_ref)
1051
1052
    def test_daily_dividends(self):
1053
        def check(field, ref):
1054
            window = self.get_portal().get_history_window(
1055
                [self.DIVIDEND_SID],
1056
                pd.Timestamp("2014-03-21 13:35", tz='UTC'),
1057
                6,
1058
                "1d",
1059
                field
1060
            )
1061
1062
            self.assertEqual(len(window), len(ref))
1063
1064
            np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref)
1065
1066
        # 2014-03-14 00:00:00+00:00,106408,106527,103498,105012,950
1067
        # 2014-03-17 00:00:00+00:00,106411,110252,99877,105064,950
1068
        # 2014-03-18 00:00:00+00:00,104194,110891,95342,103116,972
1069
        # 2014-03-19 00:00:00+00:00,104198,107086,102615,104851,973
1070
        # 2014-03-20 00:00:00+00:00,100032,102989,92179,97584,1016
1071
        # 2014-03-21 13:31:00+00:00,114098,120818,110333,115575,2866
1072
        # 2014-03-21 13:32:00+00:00,114099,120157,105353,112755,2866
1073
        # 2014-03-21 13:33:00+00:00,114099,122263,108838,115550,2867
1074
        # 2014-03-21 13:34:00+00:00,114101,116620,106654,111637,2867
1075
        # 2014-03-21 13:35:00+00:00,114104,123773,107769,115771,2867
1076
1077
        open_ref = [100.108,  # 2014-03-14 00:00:00+00:00
1078
                    100.111,  # 2014-03-17 00:00:00+00:00
1079
                    100.026,  # 2014-03-18 00:00:00+00:00
1080
                    100.030,  # 2014-03-19 00:00:00+00:00
1081
                    100.032,  # 2014-03-20 00:00:00+00:00
1082
                    114.098]  # 2014-03-21 00:00:00+00:00
1083
1084
        high_ref = [100.221,  # 2014-03-14 00:00:00+00:00
1085
                    103.725,  # 2014-03-17 00:00:00+00:00
1086
                    106.455,  # 2014-03-18 00:00:00+00:00
1087
                    102.803,  # 2014-03-19 00:00:00+00:00
1088
                    102.988,  # 2014-03-20 00:00:00+00:00
1089
                    123.773]  # 2014-03-21 00:00:00+00:00
1090
1091
        low_ref = [97.370,  # 2014-03-14 00:00:00+00:00
1092
                   93.964,  # 2014-03-17 00:00:00+00:00
1093
                   91.528,  # 2014-03-18 00:00:00+00:00
1094
                   98.510,  # 2014-03-19 00:00:00+00:00
1095
                   92.179,  # 2014-03-20 00:00:00+00:00
1096
                   105.353]  # 2014-03-21 00:00:00+00:00
1097
1098
        close_ref = [98.795,  # 2014-03-14 00:00:00+00:00
1099
                     98.844,  # 2014-03-17 00:00:00+00:00
1100
                     98.991,  # 2014-03-18 00:00:00+00:00
1101
                     100.657,  # 2014-03-19 00:00:00+00:00
1102
                     97.584,  # 2014-03-20 00:00:00+00:00
1103
                     115.771]  # 2014-03-21 00:00:00+00:00
1104
1105
        volume_ref = [950,  # 2014-03-14 00:00:00+00:00
1106
                      950,  # 2014-03-17 00:00:00+00:00
1107
                      972,  # 2014-03-18 00:00:00+00:00
1108
                      973,  # 2014-03-19 00:00:00+00:00
1109
                      1016,  # 2014-03-20 00:00:00+00:00
1110
                      14333]  # 2014-03-21 00:00:00+00:00
1111
1112
        check("open_price", open_ref)
1113
        check("high", high_ref)
1114
        check("low", low_ref)
1115
        check("close_price", close_ref)
1116
        check("price", close_ref)
1117
        check("volume", volume_ref)
1118