tests.HistoryTestCase.test_daily_dividends() - Code Metrics - Inspection of "Q 2.0" - quantopian/zipline - Measure and Improve Code Quality continuously with Scrutinizer

Completed
Pull Request — master (#858)

by Eddie
created 2015-12-18 12:47 UTC
tests.HistoryTestCase.test_daily_dividends() A

↳ Parent: tests.HistoryTestCase
Complexity

Conditions
Size

Total Lines
Duplication

Lines	0
Ratio	0 %
Metric	Value
cc	2
dl	0
loc	66
rs	9.3191
How to fix Long Method

from os.path import dirname, join, realpath
from textwrap import dedent
from unittest import TestCase
import bcolz
import os
from datetime import timedelta
from nose_parameterized import parameterized
from pandas.tslib import normalize_date
from testfixtures import TempDirectory
import numpy as np
from numpy import array
import pandas as pd
from pandas import (
    read_csv,
    Timestamp,
    DataFrame, DatetimeIndex)

from six import iteritems
from zipline import TradingAlgorithm

from zipline.data.data_portal import DataPortal
from zipline.data.us_equity_pricing import (
    DailyBarWriterFromCSVs,
    SQLiteAdjustmentWriter,
    SQLiteAdjustmentReader,
)
from zipline.errors import HistoryInInitialize
from zipline.utils.test_utils import (
    make_simple_asset_info,
    str_to_seconds,
    MockDailyBarReader
)
from zipline.data.future_pricing import FutureMinuteReader
from zipline.data.us_equity_pricing import BcolzDailyBarReader
from zipline.data.us_equity_minutes import (
    MinuteBarWriterFromCSVs,
    BcolzMinuteBarReader
)
from zipline.utils.tradingcalendar import trading_days
from zipline.finance.trading import (
    TradingEnvironment,
    SimulationParameters
)

TEST_MINUTE_RESOURCE_PATH = join(
    dirname(dirname(realpath(__file__))),  # zipline_repo/tests
    'tests',
    'resources',
    'history_inputs',
)

TEST_DAILY_RESOURCE_PATH = join(
    dirname(dirname(realpath(__file__))),  # zipline_repo/tests
    'tests',
    'resources',
    'pipeline_inputs',
)


class HistoryTestCase(TestCase):
    @classmethod
    def setUpClass(cls):
        cls.AAPL = 1
        cls.MSFT = 2
        cls.DELL = 3
        cls.TSLA = 4
        cls.BRKA = 5
        cls.IBM = 6
        cls.GS = 7
        cls.C = 8
        cls.DIVIDEND_SID = 9
        cls.FUTURE_ASSET = 10
        cls.FUTURE_ASSET2 = 11
        cls.assets = [cls.AAPL, cls.MSFT, cls.DELL, cls.TSLA, cls.BRKA,
                      cls.IBM, cls.GS, cls.C, cls.DIVIDEND_SID]

        asset_info = make_simple_asset_info(
            cls.assets,
            Timestamp('2014-03-03'),
            Timestamp('2014-08-30'),
            ['AAPL', 'MSFT', 'DELL', 'TSLA', 'BRKA', 'IBM', 'GS', 'C',
             'DIVIDEND_SID']
        )
        cls.env = TradingEnvironment()

        cls.env.write_data(
            equities_df=asset_info,
            futures_data={
                cls.FUTURE_ASSET: {
                    "start_date": pd.Timestamp('2015-11-23', tz='UTC'),
                    "end_date": pd.Timestamp('2014-12-01', tz='UTC'),
                    'symbol': 'TEST_FUTURE',
                    'asset_type': 'future',
                },
                cls.FUTURE_ASSET2: {
                    "start_date": pd.Timestamp('2014-03-19', tz='UTC'),
                    "end_date": pd.Timestamp('2014-03-22', tz='UTC'),
                    'symbol': 'TEST_FUTURE2',
                    'asset_type': 'future',
                }
            }
        )

        cls.tempdir = TempDirectory()
        cls.tempdir.create()

        try:
            cls.create_fake_minute_data(cls.tempdir)

            cls.futures_start_dates = {
                cls.FUTURE_ASSET: pd.Timestamp("2015-11-23 20:11", tz='UTC'),
                cls.FUTURE_ASSET2: pd.Timestamp("2014-03-19 13:31", tz='UTC')
            }

            futures_tempdir = os.path.join(cls.tempdir.path,
                                           'futures', 'minutes')
            os.makedirs(futures_tempdir)
            cls.create_fake_futures_minute_data(
                futures_tempdir,
                cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET),
                cls.futures_start_dates[cls.FUTURE_ASSET],
                cls.futures_start_dates[cls.FUTURE_ASSET] +
                timedelta(minutes=10000)
            )

            # build data for FUTURE_ASSET2 from 2014-03-19 13:31 to
            # 2014-03-21 20:00
            cls.create_fake_futures_minute_data(
                futures_tempdir,
                cls.env.asset_finder.retrieve_asset(cls.FUTURE_ASSET2),
                cls.futures_start_dates[cls.FUTURE_ASSET2],
                cls.futures_start_dates[cls.FUTURE_ASSET2] +
                timedelta(minutes=3270)
            )

            cls.create_fake_daily_data(cls.tempdir)

            splits = DataFrame([
                {'effective_date': str_to_seconds("2002-01-03"),
                 'ratio': 0.5,
                 'sid': cls.AAPL},
                {'effective_date': str_to_seconds("2014-03-20"),
                 'ratio': 0.5,
                 'sid': cls.AAPL},
                {'effective_date': str_to_seconds("2014-03-21"),
                 'ratio': 0.5,
                 'sid': cls.AAPL},
                {'effective_date': str_to_seconds("2014-04-01"),
                 'ratio': 0.5,
                 'sid': cls.IBM},
                {'effective_date': str_to_seconds("2014-07-01"),
                 'ratio': 0.5,
                 'sid': cls.IBM},
                {'effective_date': str_to_seconds("2014-07-07"),
                 'ratio': 0.5,
                 'sid': cls.IBM}],
                columns=['effective_date', 'ratio', 'sid'],
            )

            mergers = DataFrame([
                {'effective_date': str_to_seconds("2014-07-16"),
                 'ratio': 0.5,
                 'sid': cls.C}
            ],
                columns=['effective_date', 'ratio', 'sid'])

            dividends = DataFrame([
                {'ex_date':
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
                 'record_date':
                 Timestamp("2014-03-19", tz='UTC').to_datetime64(),
                 'declared_date':
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
                 'pay_date':
                 Timestamp("2014-03-20", tz='UTC').to_datetime64(),
                 'amount': 2.0,
                 'sid': cls.DIVIDEND_SID},
                {'ex_date':
                 Timestamp("2014-03-20", tz='UTC').to_datetime64(),
                 'record_date':
                 Timestamp("2014-03-21", tz='UTC').to_datetime64(),
                 'declared_date':
                 Timestamp("2014-03-18", tz='UTC').to_datetime64(),
                 'pay_date':
                 Timestamp("2014-03-23", tz='UTC').to_datetime64(),
                 'amount': 4.0,
                 'sid': cls.DIVIDEND_SID}],
                columns=['ex_date',
                         'record_date',
                         'declared_date',
                         'pay_date',
                         'amount',
                         'sid'])

            cls.create_fake_adjustments(cls.tempdir,
                                        "adjustments.sqlite",
                                        splits=splits,
                                        mergers=mergers,
                                        dividends=dividends)
        except:
            cls.tempdir.cleanup()
            raise

    @classmethod
    def tearDownClass(cls):
        cls.tempdir.cleanup()

    @classmethod
    def create_fake_futures_minute_data(cls, tempdir, asset, start_dt, end_dt):
        num_minutes = int((end_dt - start_dt).total_seconds() / 60)

        # need to prepend one 0 per minute between normalize_date(start_dt)
        # and start_dt
        zeroes_buffer = \
            [0] * int((start_dt -
                       normalize_date(start_dt)).total_seconds() / 60)

        future_df = pd.DataFrame({
            "open": np.array(zeroes_buffer +
                             list(range(0, num_minutes))) * 1000,
            "high": np.array(zeroes_buffer +
                             list(range(10000, 10000 + num_minutes))) * 1000,
            "low": np.array(zeroes_buffer +
                            list(range(20000, 20000 + num_minutes))) * 1000,
            "close": np.array(zeroes_buffer +
                              list(range(30000, 30000 + num_minutes))) * 1000,
            "volume": np.array(zeroes_buffer +
                               list(range(40000, 40000 + num_minutes)))
        })

        path = join(tempdir, "{0}.bcolz".format(asset.sid))
        ctable = bcolz.ctable.fromdataframe(future_df, rootdir=path)

        ctable.attrs["start_dt"] = start_dt.value / 1e9
        ctable.attrs["last_dt"] = end_dt.value / 1e9

    @classmethod
    def create_fake_minute_data(cls, tempdir):
        resources = {
            cls.AAPL: join(TEST_MINUTE_RESOURCE_PATH, 'AAPL_minute.csv.gz'),
            cls.MSFT: join(TEST_MINUTE_RESOURCE_PATH, 'MSFT_minute.csv.gz'),
            cls.DELL: join(TEST_MINUTE_RESOURCE_PATH, 'DELL_minute.csv.gz'),
            cls.TSLA: join(TEST_MINUTE_RESOURCE_PATH, "TSLA_minute.csv.gz"),
            cls.BRKA: join(TEST_MINUTE_RESOURCE_PATH, "BRKA_minute.csv.gz"),
            cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"),
            cls.GS:
            join(TEST_MINUTE_RESOURCE_PATH, "IBM_minute.csv.gz"),  # unused
            cls.C: join(TEST_MINUTE_RESOURCE_PATH, "C_minute.csv.gz"),
            cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH,
                                   "DIVIDEND_minute.csv.gz"),
        }

        equities_tempdir = os.path.join(tempdir.path, 'equity', 'minutes')
        os.makedirs(equities_tempdir)

        MinuteBarWriterFromCSVs(resources,
                                pd.Timestamp('2002-01-02', tz='UTC')).write(
                                    equities_tempdir, cls.assets)

    @classmethod
    def create_fake_daily_data(cls, tempdir):
        resources = {
            cls.AAPL: join(TEST_DAILY_RESOURCE_PATH, 'AAPL.csv'),
            cls.MSFT: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),
            cls.DELL: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),  # unused
            cls.TSLA: join(TEST_DAILY_RESOURCE_PATH, 'MSFT.csv'),  # unused
            cls.BRKA: join(TEST_DAILY_RESOURCE_PATH, 'BRK-A.csv'),
            cls.IBM: join(TEST_MINUTE_RESOURCE_PATH, 'IBM_daily.csv.gz'),
            cls.GS: join(TEST_MINUTE_RESOURCE_PATH, 'GS_daily.csv.gz'),
            cls.C: join(TEST_MINUTE_RESOURCE_PATH, 'C_daily.csv.gz'),
            cls.DIVIDEND_SID: join(TEST_MINUTE_RESOURCE_PATH,
                                   'DIVIDEND_daily.csv.gz')
        }
        raw_data = {
            asset: read_csv(path, parse_dates=['day']).set_index('day')
            for asset, path in iteritems(resources)
        }
        for frame in raw_data.values():
            frame['price'] = frame['close']

        writer = DailyBarWriterFromCSVs(resources)
        data_path = tempdir.getpath('test_daily_data.bcolz')
        writer.write(data_path, trading_days, cls.assets)

    @classmethod
    def create_fake_adjustments(cls, tempdir, filename,
                                splits=None, mergers=None, dividends=None):
        writer = SQLiteAdjustmentWriter(tempdir.getpath(filename),
                                        cls.env.trading_days,
                                        MockDailyBarReader())

        if dividends is None:
            dividends = DataFrame(
                {
                    # Hackery to make the dtypes correct on an empty frame.
                    'ex_date': array([], dtype='datetime64[ns]'),
                    'pay_date': array([], dtype='datetime64[ns]'),
                    'record_date': array([], dtype='datetime64[ns]'),
                    'declared_date': array([], dtype='datetime64[ns]'),
                    'amount': array([], dtype=float),
                    'sid': array([], dtype=int),
                },
                index=DatetimeIndex([], tz='UTC'),
                columns=['ex_date',
                         'pay_date',
                         'record_date',
                         'declared_date',
                         'amount',
                         'sid']
                )

        if splits is None:
            splits = DataFrame(
                {
                    # Hackery to make the dtypes correct on an empty frame.
                    'effective_date': array([], dtype=int),
                    'ratio': array([], dtype=float),
                    'sid': array([], dtype=int),
                },
                index=DatetimeIndex([], tz='UTC'))

        if mergers is None:
            mergers = DataFrame(
                {
                    # Hackery to make the dtypes correct on an empty frame.
                    'effective_date': array([], dtype=int),
                    'ratio': array([], dtype=float),
                    'sid': array([], dtype=int),
                },
                index=DatetimeIndex([], tz='UTC'))

        writer.write(splits, mergers, dividends)

    def get_portal(self,
                   daily_equities_filename="test_daily_data.bcolz",
                   adjustments_filename="adjustments.sqlite",
                   env=None):

        if env is None:
            env = self.env

        temp_path = self.tempdir.path

        minutes_path = os.path.join(temp_path, 'equity', 'minutes')
        futures_path = os.path.join(temp_path, 'futures', 'minutes')

        adjustment_reader = SQLiteAdjustmentReader(
            join(temp_path, adjustments_filename))

        equity_minute_reader = BcolzMinuteBarReader(minutes_path)

        equity_daily_reader = BcolzDailyBarReader(
            join(temp_path, daily_equities_filename))

        future_minute_reader = FutureMinuteReader(futures_path)

        return DataPortal(
            env,
            equity_minute_reader=equity_minute_reader,
            future_minute_reader=future_minute_reader,
            equity_daily_reader=equity_daily_reader,
            adjustment_reader=adjustment_reader
        )

    def test_history_in_initialize(self):
        algo_text = dedent(
            """\
            from zipline.api import history

            def initialize(context):
                history([24], 10, '1d', 'price')

            def handle_data(context, data):
                pass
            """
        )

        start = pd.Timestamp('2007-04-05', tz='UTC')
        end = pd.Timestamp('2007-04-10', tz='UTC')

        sim_params = SimulationParameters(
            period_start=start,
            period_end=end,
            capital_base=float("1.0e5"),
            data_frequency='minute',
            emission_rate='daily',
            env=self.env,
        )

        test_algo = TradingAlgorithm(
            script=algo_text,
            data_frequency='minute',
            sim_params=sim_params,
            env=self.env,
        )

        with self.assertRaises(HistoryInInitialize):
            test_algo.initialize()

    def test_minute_basic_functionality(self):
        # get a 5-bar minute history from the very end of the available data
        window = self.get_portal().get_history_window(
            [1],
            pd.Timestamp("2014-03-21 18:23:00+00:00", tz='UTC'),
            5,
            "1m",
            "open_price"
        )

        self.assertEqual(len(window), 5)
        reference = [534.469, 534.471, 534.475, 534.477, 534.477]
        for i in range(0, 4):
            self.assertEqual(window.iloc[-5 + i].loc[1], reference[i])

    def test_minute_splits(self):
        portal = self.get_portal()

        window = portal.get_history_window(
            [1],
            pd.Timestamp("2014-03-21 18:30:00+00:00", tz='UTC'),
            1000,
            "1m",
            "open_price"
        )

        self.assertEqual(len(window), 1000)

        # there are two splits for AAPL (on 2014-03-20 and 2014-03-21),
        # each with ratio 0.5).

        day1_end = pd.Timestamp("2014-03-19 20:00", tz='UTC')
        day2_start = pd.Timestamp("2014-03-20 13:31", tz='UTC')
        day2_end = pd.Timestamp("2014-03-20 20:00", tz='UTC')
        day3_start = pd.Timestamp("2014-03-21 13:31", tz='UTC')

        self.assertEquals(window.loc[day1_end, 1], 533.086)
        self.assertEquals(window.loc[day2_start, 1], 533.087)
        self.assertEquals(window.loc[day2_end, 1], 533.853)
        self.assertEquals(window.loc[day3_start, 1], 533.854)

    def test_minute_window_starts_before_trading_start(self):
        portal = self.get_portal()

        # get a 50-bar minute history for MSFT starting 5 minutes into 3/20,
        # its first trading day
        window = portal.get_history_window(
            [2],
            pd.Timestamp("2014-03-20 13:35:00", tz='UTC'),
            50,
            "1m",
            "high",
        )

        self.assertEqual(len(window), 50)
        reference = [107.081, 109.476, 102.316, 107.861, 106.040]
        for i in range(0, 4):
            self.assertEqual(window.iloc[-5 + i].loc[2], reference[i])

        # get history for two securities at the same time, where one starts
        # trading a day later than the other
        window2 = portal.get_history_window(
            [1, 2],
            pd.Timestamp("2014-03-20 13:35:00", tz='UTC'),
            50,
            "1m",
            "low",
        )

        self.assertEqual(len(window2), 50)
        reference2 = {
            1: [1059.318, 1055.914, 1061.136, 1063.698, 1055.964],
            2: [98.902, 99.841, 90.984, 99.891, 98.027]
        }

        for i in range(0, 45):
            self.assertFalse(np.isnan(window2.iloc[i].loc[1]))

            # there should be 45 NaNs for MSFT until it starts trading
            self.assertTrue(np.isnan(window2.iloc[i].loc[2]))

        for i in range(0, 4):
            self.assertEquals(window2.iloc[-5 + i].loc[1],
                              reference2[1][i])
            self.assertEquals(window2.iloc[-5 + i].loc[2],
                              reference2[2][i])

    def test_minute_window_ends_before_trading_start(self):

        # entire window is before the trading start
        window = self.get_portal().get_history_window(
            [2],
            pd.Timestamp("2014-02-05 14:35:00", tz='UTC'),
            100,
            "1m",
            "high"
        )

        self.assertEqual(len(window), 100)
        for i in range(0, 100):
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))

    def test_minute_window_ends_after_trading_end(self):
        portal = self.get_portal()

        window = portal.get_history_window(
            [2],
            pd.Timestamp("2014-03-24 13:35:00", tz='UTC'),
            50,
            "1m",
            "high",
        )

        # should be 45 non-NaNs then 5 NaNs as MSFT has stopped trading at
        # the end of the day 2014-03-21 (and the 22nd and 23rd is weekend)
        self.assertEqual(len(window), 50)

        for i in range(0, 45):
            self.assertFalse(np.isnan(window.iloc[i].loc[2]))

        for i in range(46, 50):
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))

    def test_minute_window_starts_after_trading_end(self):

        # entire window is after the trading end
        window = self.get_portal().get_history_window(
            [2],
            pd.Timestamp("2014-04-02 14:35:00", tz='UTC'),
            100,
            "1m",
            "high"
        )

        self.assertEqual(len(window), 100)
        for i in range(0, 100):
            self.assertTrue(np.isnan(window.iloc[i].loc[2]))

    def test_minute_window_starts_before_1_2_2002(self):
        window = self.get_portal().get_history_window(
            [3],
            pd.Timestamp("2002-01-02 14:35:00", tz='UTC'),
            50,
            "1m",
            "close_price"
        )

        self.assertEqual(len(window), 50)
        for i in range(0, 45):
            self.assertTrue(np.isnan(window.iloc[i].loc[3]))

        for i in range(46, 50):
            self.assertFalse(np.isnan(window.iloc[i].loc[3]))

    def test_minute_early_close(self):
        # market was closed early on 7/3, and that's reflected in our
        # fake IBM minute data.  also, IBM had a split that takes effect
        # right after the early close.

        # five minutes into the day after an early close, get 20 1m bars
        window = self.get_portal().get_history_window(
            [self.IBM],
            pd.Timestamp("2014-07-07 13:35:00", tz='UTC'),
            20,
            "1m",
            "high"
        )

        self.assertEqual(len(window), 20)

        reference = [27134.486, 27134.802, 27134.660, 27132.813, 27130.964,
                     27133.767, 27133.268, 27131.510, 27134.946, 27132.400,
                     27134.350, 27130.588, 27132.528, 27130.418, 27131.040,
                     27132.664, 27131.307, 27133.978, 27132.779, 27134.476]

        for i in range(0, 20):
            self.assertAlmostEquals(window.iloc[i].loc[self.IBM], reference[i])

    def test_minute_merger(self):
        def check(field, ref):

            window = self.get_portal().get_history_window(
                [self.C],
                pd.Timestamp("2014-07-16 13:35", tz='UTC'),
                10,
                "1m",
                field
            )

            self.assertEqual(len(window), len(ref))

            for i in range(0, len(ref) - 1):
                self.assertEquals(window.iloc[i].loc[self.C], ref[i])

        open_ref = [71.99, 71.991, 71.992, 71.996, 71.996,
                    72.000, 72.001, 72.002, 72.004, 72.005]
        high_ref = [77.334, 80.196, 80.387, 72.331, 79.184,
                    75.439, 81.176, 78.564, 80.498, 82.000]
        low_ref = [62.621, 70.427, 65.572, 68.357, 63.623,
                   69.805, 67.245, 64.238, 64.487, 71.864]
        close_ref = [69.977, 75.311, 72.979, 70.344, 71.403,
                     72.622, 74.210, 71.401, 72.492, 73.669]
        vol_ref = [12663, 12662, 12661, 12661, 12660, 12661,
                   12663, 12662, 12663, 12662]

        check("open_price", open_ref)
        check("high", high_ref)
        check("low", low_ref)
        check("close_price", close_ref)
        check("price", close_ref)
        check("volume", vol_ref)

    def test_minute_forward_fill(self):
        # only forward fill if ffill=True AND we are asking for "price"

        # our fake TSLA data (sid 4) is missing a bunch of minute bars
        # right after the open on 2002-01-02

        for field in ["open_price", "high", "low", "volume", "close_price"]:
            no_ffill = self.get_portal().get_history_window(
                [4],
                pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
                390,
                "1m",
                field
            )

            missing_bar_indices = [1, 3, 5, 7, 9, 11, 13]
            if field == 'volume':
                for bar_idx in missing_bar_indices:
                    self.assertEqual(no_ffill.iloc[bar_idx].loc[4], 0)
            else:
                for bar_idx in missing_bar_indices:
                    self.assertTrue(np.isnan(no_ffill.iloc[bar_idx].loc[4]))

        ffill_window = self.get_portal().get_history_window(
            [4],
            pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
            390,
            "1m",
            "price"
        )

        for i in range(0, 390):
            self.assertFalse(np.isnan(ffill_window.iloc[i].loc[4]))

        # 2002-01-02 14:31:00+00:00  126.183
        # 2002-01-02 14:32:00+00:00  126.183
        # 2002-01-02 14:33:00+00:00  125.648
        # 2002-01-02 14:34:00+00:00  125.648
        # 2002-01-02 14:35:00+00:00  126.016
        # 2002-01-02 14:36:00+00:00  126.016
        # 2002-01-02 14:37:00+00:00  127.918
        # 2002-01-02 14:38:00+00:00  127.918
        # 2002-01-02 14:39:00+00:00  126.423
        # 2002-01-02 14:40:00+00:00  126.423
        # 2002-01-02 14:41:00+00:00  129.825
        # 2002-01-02 14:42:00+00:00  129.825
        # 2002-01-02 14:43:00+00:00  125.392
        # 2002-01-02 14:44:00+00:00  125.392

        vals = [126.183, 125.648, 126.016, 127.918, 126.423, 129.825, 125.392]
        for idx, val in enumerate(vals):
            self.assertEqual(ffill_window.iloc[2 * idx].loc[4], val)
            self.assertEqual(ffill_window.iloc[(2 * idx) + 1].loc[4], val)

        # make sure that if we pass ffill=False with field="price", we do
        # not ffill
        really_no_ffill_window = self.get_portal().get_history_window(
            [4],
            pd.Timestamp("2002-01-02 21:00:00", tz='UTC'),
            390,
            "1m",
            "price",
            ffill=False
        )

        for idx, val in enumerate(vals):
            idx1 = 2 * idx
            idx2 = idx1 + 1
            self.assertEqual(really_no_ffill_window.iloc[idx1].loc[4], val)
            self.assertTrue(np.isnan(really_no_ffill_window.iloc[idx2].loc[4]))

    def test_daily_functionality(self):
        # 9 daily bars
        # 2014-03-10,183999.0,186400.0,183601.0,186400.0,400
        # 2014-03-11,186925.0,187490.0,185910.0,187101.0,600
        # 2014-03-12,186498.0,187832.0,186005.0,187750.0,300
        # 2014-03-13,188150.0,188852.0,185254.0,185750.0,700
        # 2014-03-14,185825.0,186507.0,183418.0,183860.0,600
        # 2014-03-17,184350.0,185790.0,184350.0,185050.0,400
        # 2014-03-18,185400.0,185400.0,183860.0,184860.0,200
        # 2014-03-19,184860.0,185489.0,182764.0,183860.0,200
        # 2014-03-20,183999.0,186742.0,183630.0,186540.0,300

        # 5 one-minute bars that will be aggregated
        # 2014-03-21 13:31:00+00:00,185422401,185426332,185413974,185420153,304
        # 2014-03-21 13:32:00+00:00,185422402,185424165,185417717,185420941,300
        # 2014-03-21 13:33:00+00:00,185422403,185430663,185419420,185425041,303
        # 2014-03-21 13:34:00+00:00,185422403,185431290,185417079,185424184,302
        # 2014-03-21 13:35:00+00:00,185422405,185430210,185416293,185423251,302

        def run_query(field, values):
            window = self.get_portal().get_history_window(
                [self.BRKA],
                pd.Timestamp("2014-03-21 13:35", tz='UTC'),
                10,
                "1d",
                field
            )

            self.assertEqual(len(window), 10)

            for i in range(0, 10):
                self.assertEquals(window.iloc[i].loc[self.BRKA],
                                  values[i])

        # last value is the first minute's open
        opens = [183999, 186925, 186498, 188150, 185825, 184350,
                 185400, 184860, 183999, 185422.401]

        # last value is the last minute's close
        closes = [186400, 187101, 187750, 185750, 183860, 185050,
                  184860, 183860, 186540, 185423.251]

        # last value is the highest high value
        highs = [186400, 187490, 187832, 188852, 186507, 185790,
                 185400, 185489, 186742, 185431.290]

        # last value is the lowest low value
        lows = [183601, 185910, 186005, 185254, 183418, 184350, 183860,
                182764, 183630, 185413.974]

        # last value is the sum of all the minute volumes
        volumes = [400, 600, 300, 700, 600, 400, 200, 200, 300, 1511]

        run_query("open_price", opens)
        run_query("close_price", closes)
        run_query("price", closes)
        run_query("high", highs)
        run_query("low", lows)
        run_query("volume", volumes)

    def test_daily_splits_with_no_minute_data(self):
        # scenario is that we have daily data for AAPL through 6/11,
        # but we have no minute data for AAPL on 6/11. there's also a split
        # for AAPL on 6/9.
        splits = DataFrame(
            [
                {
                    'effective_date': str_to_seconds('2014-06-09'),
                    'ratio': (1 / 7.0),
                    'sid': self.AAPL,
                }
            ],
            columns=['effective_date', 'ratio', 'sid'])

        self.create_fake_adjustments(self.tempdir,
                                     "adjustments2.sqlite",
                                     splits=splits)

        portal = self.get_portal(adjustments_filename="adjustments2.sqlite")

        def test_window(field, reference, ffill=True):
            window = portal.get_history_window(
                [self.AAPL],
                pd.Timestamp("2014-06-11 15:30", tz='UTC'),
                6,
                "1d",
                field,
                ffill
            )

            self.assertEqual(len(window), 6)

            for i in range(0, 5):
                self.assertEquals(window.iloc[i].loc[self.AAPL],
                                  reference[i])

            if ffill and field == "price":
                last_val = window.iloc[5].loc[self.AAPL]
                second_to_last_val = window.iloc[4].loc[self.AAPL]

                self.assertEqual(last_val, second_to_last_val)
            else:
                if field == "volume":
                    self.assertEqual(window.iloc[5].loc[self.AAPL], 0)
                else:
                    self.assertTrue(np.isnan(window.iloc[5].loc[self.AAPL]))

        # 2014-06-04,637.4400099999999,647.8899690000001,636.110046,644.819992,p
        # 2014-06-05,646.20005,649.370003,642.610008,647.349983,75951400
        # 2014-06-06,649.900002,651.259979,644.469971,645.570023,87484600
        # 2014-06-09,92.699997,93.879997,91.75,93.699997,75415000
        # 2014-06-10,94.730003,95.050003,93.57,94.25,62777000
        open_data = [91.063, 92.314, 92.843, 92.699, 94.730]
        test_window("open_price", open_data, ffill=False)
        test_window("open_price", open_data)

        high_data = [92.556, 92.767, 93.037, 93.879, 95.050]
        test_window("high", high_data, ffill=False)
        test_window("high", high_data)

        low_data = [90.873, 91.801, 92.067, 91.750, 93.570]
        test_window("low", low_data, ffill=False)
        test_window("low", low_data)

        close_data = [92.117, 92.478, 92.224, 93.699, 94.250]
        test_window("close_price", close_data, ffill=False)
        test_window("close_price", close_data)
        test_window("price", close_data, ffill=False)
        test_window("price", close_data)

        vol_data = [587093500, 531659800, 612392200, 75415000, 62777000]
        test_window("volume", vol_data)
        test_window("volume", vol_data, ffill=False)

    def test_daily_window_starts_before_trading_start(self):
        portal = self.get_portal()

        # MSFT started on 3/3/2014, so try to go before that
        window = portal.get_history_window(
            [self.MSFT],
            pd.Timestamp("2014-03-05 13:35:00", tz='UTC'),
            5,
            "1d",
            "high"
        )

        self.assertEqual(len(window), 5)

        # should be two empty days, then 3/3 and 3/4, then
        # an empty day because we don't have minute data for 3/5
        self.assertTrue(np.isnan(window.iloc[0].loc[self.MSFT]))
        self.assertTrue(np.isnan(window.iloc[1].loc[self.MSFT]))
        self.assertEquals(window.iloc[2].loc[self.MSFT], 38.130)
        self.assertEquals(window.iloc[3].loc[self.MSFT], 38.48)
        self.assertTrue(np.isnan(window.iloc[4].loc[self.MSFT]))

    def test_daily_window_ends_before_trading_start(self):
        portal = self.get_portal()

        # MSFT started on 3/3/2014, so try to go before that
        window = portal.get_history_window(
            [self.MSFT],
            pd.Timestamp("2014-02-28 13:35:00", tz='UTC'),
            5,
            "1d",
            "high"
        )

        self.assertEqual(len(window), 5)
        for i in range(0, 5):
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))

    def test_daily_window_starts_after_trading_end(self):
        # MSFT stopped trading EOD Friday 8/29/2014
        window = self.get_portal().get_history_window(
            [self.MSFT],
            pd.Timestamp("2014-09-12 13:35:00", tz='UTC'),
            8,
            "1d",
            "high",
        )

        self.assertEqual(len(window), 8)
        for i in range(0, 8):
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))

    def test_daily_window_ends_after_trading_end(self):
        # MSFT stopped trading EOD Friday 8/29/2014
        window = self.get_portal().get_history_window(
            [self.MSFT],
            pd.Timestamp("2014-09-04 13:35:00", tz='UTC'),
            10,
            "1d",
            "high",
        )

        # should be 7 non-NaNs (8/21-8/22, 8/25-8/29) and 3 NaNs (9/2 - 9/4)
        # (9/1/2014 is labor day)
        self.assertEqual(len(window), 10)

        for i in range(0, 7):
            self.assertFalse(np.isnan(window.iloc[i].loc[self.MSFT]))

        for i in range(7, 10):
            self.assertTrue(np.isnan(window.iloc[i].loc[self.MSFT]))

    def test_empty_sid_list(self):
        portal = self.get_portal()

        fields = ["open_price",
                  "close_price",
                  "high",
                  "low",
                  "volume",
                  "price"]
        freqs = ["1m", "1d"]

        for field in fields:
            for freq in freqs:
                window = portal.get_history_window(
                    [],
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
                    6,
                    freq,
                    field
                )

                self.assertEqual(len(window), 6)

                for i in range(0, 6):
                    self.assertEqual(len(window.iloc[i]), 0)

    def test_daily_window_starts_before_minute_data(self):

        env = TradingEnvironment()
        asset_info = make_simple_asset_info(
            [self.GS],
            Timestamp('1999-04-05'),
            Timestamp('2004-08-30'),
            ['GS']
        )
        env.write_data(equities_df=asset_info)
        portal = self.get_portal(env=env)

        window = portal.get_history_window(
            [self.GS],
            # 3rd day of daily data for GS, minute data starts in 2002.
            pd.Timestamp("1999-04-07 14:35:00", tz='UTC'),
            10,
            "1d",
            "low"
        )

        # 12/20, 12/21, 12/24, 12/26, 12/27, 12/28, 12/31 should be NaNs
        # 1/2 and 1/3 should be non-NaN
        # 1/4 should be NaN (since we don't have minute data for it)

        self.assertEqual(len(window), 10)

        for i in range(0, 7):
            self.assertTrue(np.isnan(window.iloc[i].loc[self.GS]))

        for i in range(8, 9):
            self.assertFalse(np.isnan(window.iloc[i].loc[self.GS]))

        self.assertTrue(np.isnan(window.iloc[9].loc[self.GS]))

    def test_minute_window_ends_before_1_2_2002(self):
        with self.assertRaises(ValueError):
            self.get_portal().get_history_window(
                [self.GS],
                pd.Timestamp("2001-12-31 14:35:00", tz='UTC'),
                50,
                "1m",
                "close_price"
            )

    def test_bad_history_inputs(self):
        portal = self.get_portal()

        # bad fieldname
        for field in ["foo", "bar", "", "5"]:
            with self.assertRaises(ValueError):
                portal.get_history_window(
                    [self.AAPL],
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
                    6,
                    "1d",
                    field
                )

        # bad frequency
        for freq in ["2m", "30m", "3d", "300d", "", "5"]:
            with self.assertRaises(ValueError):
                portal.get_history_window(
                    [self.AAPL],
                    pd.Timestamp("2014-06-11 15:30", tz='UTC'),
                    6,
                    freq,
                    "volume"
                )

    def test_daily_merger(self):
        def check(field, ref):

            window = self.get_portal().get_history_window(
                [self.C],
                pd.Timestamp("2014-07-17 13:35", tz='UTC'),
                4,
                "1d",
                field
            )

            self.assertEqual(len(window), len(ref),)

            for i in range(0, len(ref) - 1):
                self.assertEquals(window.iloc[i].loc[self.C], ref[i], i)

        # 2014-07-14 00:00:00+00:00,139.18,139.14,139.2,139.17,12351
        # 2014-07-15 00:00:00+00:00,139.2,139.2,139.18,139.19,12354
        # 2014-07-16 00:00:00+00:00,69.58,69.56,69.57,69.565,12352
        # 2014-07-17 13:31:00+00:00,72767,80146,63406,71776,12876
        # 2014-07-17 13:32:00+00:00,72769,76943,68907,72925,12875
        # 2014-07-17 13:33:00+00:00,72771,76127,63194,69660,12875
        # 2014-07-17 13:34:00+00:00,72774,79349,69771,74560,12877
        # 2014-07-17 13:35:00+00:00,72776,75340,68970,72155,12879

        open_ref = [69.59, 69.6, 69.58, 72.767]
        high_ref = [69.57, 69.6, 69.56, 80.146]
        low_ref = [69.6, 69.59, 69.57, 63.194]
        close_ref = [69.585, 69.595, 69.565, 72.155]
        vol_ref = [12351, 12354, 12352, 64382]

        check("open_price", open_ref)
        check("high", high_ref)
        check("low", low_ref)
        check("close_price", close_ref)
        check("price", close_ref)
        check("volume", vol_ref)

    def test_minute_adjustments_as_of_lookback_date(self):

        # AAPL has splits on 2014-03-20 and 2014-03-21
        window_0320 = self.get_portal().get_history_window(
            [self.AAPL],
            pd.Timestamp("2014-03-20 13:35", tz='UTC'),
            395,
            "1m",
            "open_price"
        )

        window_0321 = self.get_portal().get_history_window(
            [self.AAPL],
            pd.Timestamp("2014-03-21 13:35", tz='UTC'),
            785,
            "1m",
            "open_price"
        )

        for i in range(0, 395):
            # history on 3/20, since the 3/21 0.5 split hasn't
            # happened yet, should return values 2x larger than history on
            # 3/21
            self.assertEqual(window_0320.iloc[i].loc[self.AAPL],
                             window_0321.iloc[i].loc[self.AAPL] * 2)

    def test_daily_adjustments_as_of_lookback_date(self):

        window_0402 = self.get_portal().get_history_window(
            [self.IBM],
            pd.Timestamp("2014-04-02 13:35", tz='UTC'),
            23,
            "1d",
            "open_price"
        )

        window_0702 = self.get_portal().get_history_window(
            [self.IBM],
            pd.Timestamp("2014-07-02 13:35", tz='UTC'),
            86,
            "1d",
            "open_price"
        )

        for i in range(0, 22):
            self.assertEqual(window_0402.iloc[i].loc[self.IBM],
                             window_0702.iloc[i].loc[self.IBM] * 2)

    def test_minute_dividends(self):
        def check(field, ref):
            window = self.get_portal().get_history_window(
                [self.DIVIDEND_SID],
                pd.Timestamp("2014-03-18 13:35", tz='UTC'),
                10,
                "1m",
                field
            )

            self.assertEqual(len(window), len(ref))

            np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref)

        # the DIVIDEND stock has dividends on 2014-03-18 (0.98)
        # 2014-03-17 19:56:00+00:00,118923,123229,112445,117837,2273
        # 2014-03-17 19:57:00+00:00,118927,122997,117911,120454,2274
        # 2014-03-17 19:58:00+00:00,118930,129112,111136,120124,2274
        # 2014-03-17 19:59:00+00:00,118932,126147,112112,119129,2276
        # 2014-03-17 20:00:00+00:00,118932,124541,108717,116628,2275
        # 2014-03-18 13:31:00+00:00,116457,120731,114148,117439,2274
        # 2014-03-18 13:32:00+00:00,116461,116520,106572,111546,2275
        # 2014-03-18 13:33:00+00:00,116461,117115,108506,112810,2274
        # 2014-03-18 13:34:00+00:00,116461,119787,108861,114323,2273
        # 2014-03-18 13:35:00+00:00,116464,117221,112698,114960,2272

        open_ref = [116.545,  # 2014-03-17 19:56:00+00:00
                    116.548,  # 2014-03-17 19:57:00+00:00
                    116.551,  # 2014-03-17 19:58:00+00:00
                    116.553,  # 2014-03-17 19:59:00+00:00
                    116.553,  # 2014-03-17 20:00:00+00:00
                    116.457,  # 2014-03-18 13:31:00+00:00
                    116.461,  # 2014-03-18 13:32:00+00:00
                    116.461,  # 2014-03-18 13:33:00+00:00
                    116.461,  # 2014-03-18 13:34:00+00:00
                    116.464]  # 2014-03-18 13:35:00+00:00

        high_ref = [120.764,  # 2014-03-17 19:56:00+00:00
                    120.537,  # 2014-03-17 19:57:00+00:00
                    126.530,  # 2014-03-17 19:58:00+00:00
                    123.624,  # 2014-03-17 19:59:00+00:00
                    122.050,  # 2014-03-17 20:00:00+00:00
                    120.731,  # 2014-03-18 13:31:00+00:00
                    116.520,  # 2014-03-18 13:32:00+00:00
                    117.115,  # 2014-03-18 13:33:00+00:00
                    119.787,  # 2014-03-18 13:34:00+00:00
                    117.221]  # 2014-03-18 13:35:00+00:00

        low_ref = [110.196,  # 2014-03-17 19:56:00+00:00
                   115.553,  # 2014-03-17 19:57:00+00:00
                   108.913,  # 2014-03-17 19:58:00+00:00
                   109.870,  # 2014-03-17 19:59:00+00:00
                   106.543,  # 2014-03-17 20:00:00+00:00
                   114.148,  # 2014-03-18 13:31:00+00:00
                   106.572,  # 2014-03-18 13:32:00+00:00
                   108.506,  # 2014-03-18 13:33:00+00:00
                   108.861,  # 2014-03-18 13:34:00+00:00
                   112.698]  # 2014-03-18 13:35:00+00:00

        close_ref = [115.480,  # 2014-03-17 19:56:00+00:00
                     118.045,  # 2014-03-17 19:57:00+00:00
                     117.722,  # 2014-03-17 19:58:00+00:00
                     116.746,  # 2014-03-17 19:59:00+00:00
                     114.295,  # 2014-03-17 20:00:00+00:00
                     117.439,  # 2014-03-18 13:31:00+00:00
                     111.546,  # 2014-03-18 13:32:00+00:00
                     112.810,  # 2014-03-18 13:33:00+00:00
                     114.323,  # 2014-03-18 13:34:00+00:00
                     114.960]  # 2014-03-18 13:35:00+00:00

        volume_ref = [2273,  # 2014-03-17 19:56:00+00:00
                      2274,  # 2014-03-17 19:57:00+00:00
                      2274,  # 2014-03-17 19:58:00+00:00
                      2276,  # 2014-03-17 19:59:00+00:00
                      2275,  # 2014-03-17 20:00:00+00:00
                      2274,  # 2014-03-18 13:31:00+00:00
                      2275,  # 2014-03-18 13:32:00+00:00
                      2274,  # 2014-03-18 13:33:00+00:00
                      2273,  # 2014-03-18 13:34:00+00:00
                      2272]  # 2014-03-18 13:35:00+00:00

        check("open_price", open_ref)
        check("high", high_ref)
        check("low", low_ref)
        check("close_price", close_ref)
        check("price", close_ref)
        check("volume", volume_ref)

    def test_daily_dividends(self):
        def check(field, ref):
            window = self.get_portal().get_history_window(
                [self.DIVIDEND_SID],
                pd.Timestamp("2014-03-21 13:35", tz='UTC'),
                6,
                "1d",
                field
            )

            self.assertEqual(len(window), len(ref))

            np.testing.assert_allclose(window.loc[:, self.DIVIDEND_SID], ref)

        # 2014-03-14 00:00:00+00:00,106408,106527,103498,105012,950
        # 2014-03-17 00:00:00+00:00,106411,110252,99877,105064,950
        # 2014-03-18 00:00:00+00:00,104194,110891,95342,103116,972
        # 2014-03-19 00:00:00+00:00,104198,107086,102615,104851,973
        # 2014-03-20 00:00:00+00:00,100032,102989,92179,97584,1016
        # 2014-03-21 13:31:00+00:00,114098,120818,110333,115575,2866
        # 2014-03-21 13:32:00+00:00,114099,120157,105353,112755,2866
        # 2014-03-21 13:33:00+00:00,114099,122263,108838,115550,2867
        # 2014-03-21 13:34:00+00:00,114101,116620,106654,111637,2867
        # 2014-03-21 13:35:00+00:00,114104,123773,107769,115771,2867

        open_ref = [100.108,  # 2014-03-14 00:00:00+00:00
                    100.111,  # 2014-03-17 00:00:00+00:00
                    100.026,  # 2014-03-18 00:00:00+00:00
                    100.030,  # 2014-03-19 00:00:00+00:00
                    100.032,  # 2014-03-20 00:00:00+00:00
                    114.098]  # 2014-03-21 00:00:00+00:00

        high_ref = [100.221,  # 2014-03-14 00:00:00+00:00
                    103.725,  # 2014-03-17 00:00:00+00:00
                    106.455,  # 2014-03-18 00:00:00+00:00
                    102.803,  # 2014-03-19 00:00:00+00:00
                    102.988,  # 2014-03-20 00:00:00+00:00
                    123.773]  # 2014-03-21 00:00:00+00:00

        low_ref = [97.370,  # 2014-03-14 00:00:00+00:00
                   93.964,  # 2014-03-17 00:00:00+00:00
                   91.528,  # 2014-03-18 00:00:00+00:00
                   98.510,  # 2014-03-19 00:00:00+00:00
                   92.179,  # 2014-03-20 00:00:00+00:00
                   105.353]  # 2014-03-21 00:00:00+00:00

        close_ref = [98.795,  # 2014-03-14 00:00:00+00:00
                     98.844,  # 2014-03-17 00:00:00+00:00
                     98.991,  # 2014-03-18 00:00:00+00:00
                     100.657,  # 2014-03-19 00:00:00+00:00
                     97.584,  # 2014-03-20 00:00:00+00:00
                     115.771]  # 2014-03-21 00:00:00+00:00

        volume_ref = [950,  # 2014-03-14 00:00:00+00:00
                      950,  # 2014-03-17 00:00:00+00:00
                      972,  # 2014-03-18 00:00:00+00:00
                      973,  # 2014-03-19 00:00:00+00:00
                      1016,  # 2014-03-20 00:00:00+00:00
                      14333]  # 2014-03-21 00:00:00+00:00

        check("open_price", open_ref)
        check("high", high_ref)
        check("low", low_ref)
        check("close_price", close_ref)
        check("price", close_ref)
        check("volume", volume_ref)

    @parameterized.expand([('open', 0),
                           ('high', 10000),
                           ('low', 20000),
                           ('close', 30000),
                           ('price', 30000),
                           ('volume', 40000)])
    def test_futures_history_minutes(self, field, offset):
        # our history data, for self.FUTURE_ASSET, is 10,000 bars starting at
        # self.futures_start_dt.  Those 10k bars are 24/7.

        # = 2015-11-30 18:50 UTC, 13:50 Eastern = during market hours
        futures_end_dt = \
            self.futures_start_dates[self.FUTURE_ASSET] + \
            timedelta(minutes=9999)

        window = self.get_portal().get_history_window(
            [self.FUTURE_ASSET],
            futures_end_dt,
            1000,
            "1m",
            field
        )

        # check the minutes are right
        reference_minutes = self.env.market_minute_window(
            futures_end_dt, 1000, step=-1
        )[::-1]

        np.testing.assert_array_equal(window.index, reference_minutes)

        # check the values

        # 2015-11-24 18:41
        # ...
        # 2015-11-24 21:00
        # 2015-11-25 14:31
        # ...
        # 2015-11-25 21:00
        # 2015-11-27 14:31
        # ...
        # 2015-11-27 18:00  # early close
        # 2015-11-30 14:31
        # ...
        # 2015-11-30 18:50

        reference_values = pd.date_range(
            start=self.futures_start_dates[self.FUTURE_ASSET],
            end=futures_end_dt,
            freq="T"
        )

        for idx, dt in enumerate(window.index):
            date_val = reference_values.searchsorted(dt)
            self.assertEqual(offset + date_val,
                             window.iloc[idx][self.FUTURE_ASSET])

    def test_history_minute_blended(self):
        window = self.get_portal().get_history_window(
            [self.FUTURE_ASSET2, self.AAPL],
            pd.Timestamp("2014-03-21 20:00", tz='UTC'),
            200,
            "1m",
            "price"
        )

        # just a sanity check
        self.assertEqual(200, len(window[self.AAPL]))
        self.assertEqual(200, len(window[self.FUTURE_ASSET2]))

    def test_futures_history_daily(self):
        # get 3 days ending 11/30 10:00 am Eastern
        # = 11/25, 11/27 (half day), 11/30 (partial)

        window = self.get_portal().get_history_window(
            [self.env.asset_finder.retrieve_asset(self.FUTURE_ASSET)],
            pd.Timestamp("2015-11-30 15:00", tz='UTC'),
            3,
            "1d",
            "high"
        )

        self.assertEqual(3, len(window[self.FUTURE_ASSET]))

        np.testing.assert_array_equal([12929.0, 15629.0, 19769.0],
                                      window.values.T[0])

quantopian / zipline

Pull Request — master (#858)

tests.HistoryTestCase.test_daily_dividends() A

Complexity

Size

Duplication

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like