tests.risk.ensure_latest_answer_key() - Code Metrics - Inspection of "BLD: Replace `pre_setup` with a lazy cmdclass." - quantopian/zipline - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#867)

by Joe

created 2015-12-17 16:50 UTC

tests.risk.ensure_latest_answer_key() B

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
cc	6
dl	0
loc	48
rs	7.6529

#
# Copyright 2014 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import hashlib
import os

import numpy as np
import pandas as pd
import pytz
import xlrd
import requests

from six.moves import map


def col_letter_to_index(col_letter):
    # Only supports single letter,
    # but answer key doesn't need multi-letter, yet.
    index = 0
    for i, char in enumerate(reversed(col_letter)):
        index += ((ord(char) - 65) + 1) * pow(26, i)
    return index

DIR = os.path.dirname(os.path.realpath(__file__))

ANSWER_KEY_CHECKSUMS_PATH = os.path.join(DIR, 'risk-answer-key-checksums')
ANSWER_KEY_CHECKSUMS = open(ANSWER_KEY_CHECKSUMS_PATH, 'r').read().splitlines()

ANSWER_KEY_FILENAME = 'risk-answer-key.xlsx'

ANSWER_KEY_PATH = os.path.join(DIR, ANSWER_KEY_FILENAME)

ANSWER_KEY_BUCKET_NAME = 'zipline-test_data'

ANSWER_KEY_DL_TEMPLATE = """
https://s3.amazonaws.com/zipline-test-data/risk/{md5}/risk-answer-key.xlsx
""".strip()

LATEST_ANSWER_KEY_URL = ANSWER_KEY_DL_TEMPLATE.format(
    md5=ANSWER_KEY_CHECKSUMS[-1])


def answer_key_signature():
    with open(ANSWER_KEY_PATH, 'rb') as f:
        md5 = hashlib.md5()
        buf = f.read(1024)
        md5.update(buf)
        while buf != b"":
            buf = f.read(1024)
            md5.update(buf)
    return md5.hexdigest()


def ensure_latest_answer_key():
    """
    Get the latest answer key from a publically available location.

    Logic for determining what and when to download is as such:

    - If there is no local spreadsheet file, then get the lastest answer key,
    as defined by the last row in the checksum file.
    - If there is a local spreadsheet file:
    -- If the spreadsheet's checksum is in the checksum file:
    --- If the spreadsheet's checksum does not match the latest, then grab the
    the latest checksum and replace the local checksum file.
    --- If the spreadsheet's checksum matches the latest, then skip download,
    and use the local spreadsheet as a cached copy.
    -- If the spreadsheet's checksum is not in the checksum file, then leave
    the local file alone, assuming that the local xls's md5 is not in the list
    due to local modifications during development.

    It is possible that md5's could collide, if that is ever case, we should
    then find an alternative naming scheme.

    The spreadsheet answer sheet is not kept in SCM, as every edit would
    increase the repo size by the file size, since it is treated as a binary.
    """

    answer_key_dl_checksum = None

    local_answer_key_exists = os.path.exists(ANSWER_KEY_PATH)
    if local_answer_key_exists:
        local_hash = answer_key_signature()

        if local_hash in ANSWER_KEY_CHECKSUMS:
            # Assume previously downloaded version.
            # Check for latest.
            if local_hash != ANSWER_KEY_CHECKSUMS[-1]:
                # More recent checksum, download
                answer_key_dl_checksum = ANSWER_KEY_CHECKSUMS[-1]
            else:
                # Assume local copy that is being developed on
                answer_key_dl_checksum = None
    else:
        answer_key_dl_checksum = ANSWER_KEY_CHECKSUMS[-1]

    if answer_key_dl_checksum:
        res = requests.get(
            ANSWER_KEY_DL_TEMPLATE.format(md5=answer_key_dl_checksum))
        with open(ANSWER_KEY_PATH, 'wb') as f:
            f.write(res.content)

# Get latest answer key on load.
ensure_latest_answer_key()


class DataIndex(object):
    """
    Coordinates for the spreadsheet, using the values as seen in the notebook.
    The python-excel libraries use 0 index, while the spreadsheet in a GUI
    uses a 1 index.
    """
    def __init__(self, sheet_name, col, row_start, row_end,
                 value_type='float'):
        self.sheet_name = sheet_name
        self.col = col
        self.row_start = row_start
        self.row_end = row_end
        self.value_type = value_type

    @property
    def col_index(self):
        return col_letter_to_index(self.col) - 1

    @property
    def row_start_index(self):
        return self.row_start - 1

    @property
    def row_end_index(self):
        return self.row_end - 1

    def __str__(self):
        return "'{sheet_name}'!{col}{row_start}:{col}{row_end}".format(
            sheet_name=self.sheet_name,
            col=self.col,
            row_start=self.row_start,
            row_end=self.row_end
        )


class AnswerKey(object):

    INDEXES = {
        'RETURNS': DataIndex('Sim Period', 'D', 4, 255),

        'BENCHMARK': {
            'Dates': DataIndex('s_p', 'A', 4, 254, value_type='date'),
            'Returns': DataIndex('s_p', 'H', 4, 254)
        },

        # Below matches the inconsistent capitalization in spreadsheet
        'BENCHMARK_PERIOD_RETURNS': {
            'Monthly': DataIndex('s_p', 'R', 8, 19),
            '3-Month': DataIndex('s_p', 'S', 10, 19),
            '6-month': DataIndex('s_p', 'T', 13, 19),
            'year': DataIndex('s_p', 'U', 19, 19),
        },

        'BENCHMARK_PERIOD_VOLATILITY': {
            'Monthly': DataIndex('s_p', 'V', 8, 19),
            '3-Month': DataIndex('s_p', 'W', 10, 19),
            '6-month': DataIndex('s_p', 'X', 13, 19),
            'year': DataIndex('s_p', 'Y', 19, 19),
        },

        'ALGORITHM_PERIOD_RETURNS': {
            'Monthly': DataIndex('Sim Period', 'Z', 23, 34),
            '3-Month': DataIndex('Sim Period', 'AA', 25, 34),
            '6-month': DataIndex('Sim Period', 'AB', 28, 34),
            'year': DataIndex('Sim Period', 'AC', 34, 34),
        },

        'ALGORITHM_PERIOD_VOLATILITY': {
            'Monthly': DataIndex('Sim Period', 'AH', 23, 34),
            '3-Month': DataIndex('Sim Period', 'AI', 25, 34),
            '6-month': DataIndex('Sim Period', 'AJ', 28, 34),
            'year': DataIndex('Sim Period', 'AK', 34, 34),
        },

        'ALGORITHM_PERIOD_SHARPE': {
            'Monthly': DataIndex('Sim Period', 'AL', 23, 34),
            '3-Month': DataIndex('Sim Period', 'AM', 25, 34),
            '6-month': DataIndex('Sim Period', 'AN', 28, 34),
            'year': DataIndex('Sim Period', 'AO', 34, 34),
        },

        'ALGORITHM_PERIOD_BETA': {
            'Monthly': DataIndex('Sim Period', 'AP', 23, 34),
            '3-Month': DataIndex('Sim Period', 'AQ', 25, 34),
            '6-month': DataIndex('Sim Period', 'AR', 28, 34),
            'year': DataIndex('Sim Period', 'AS', 34, 34),
        },

        'ALGORITHM_PERIOD_ALPHA': {
            'Monthly': DataIndex('Sim Period', 'AT', 23, 34),
            '3-Month': DataIndex('Sim Period', 'AU', 25, 34),
            '6-month': DataIndex('Sim Period', 'AV', 28, 34),
            'year': DataIndex('Sim Period', 'AW', 34, 34),
        },

        'ALGORITHM_PERIOD_BENCHMARK_VARIANCE': {
            'Monthly': DataIndex('Sim Period', 'BJ', 23, 34),
            '3-Month': DataIndex('Sim Period', 'BK', 25, 34),
            '6-month': DataIndex('Sim Period', 'BL', 28, 34),
            'year': DataIndex('Sim Period', 'BM', 34, 34),
        },

        'ALGORITHM_PERIOD_COVARIANCE': {
            'Monthly': DataIndex('Sim Period', 'BF', 23, 34),
            '3-Month': DataIndex('Sim Period', 'BG', 25, 34),
            '6-month': DataIndex('Sim Period', 'BH', 28, 34),
            'year': DataIndex('Sim Period', 'BI', 34, 34),
        },

        'ALGORITHM_PERIOD_DOWNSIDE_RISK': {
            'Monthly': DataIndex('Sim Period', 'BN', 23, 34),
            '3-Month': DataIndex('Sim Period', 'BO', 25, 34),
            '6-month': DataIndex('Sim Period', 'BP', 28, 34),
            'year': DataIndex('Sim Period', 'BQ', 34, 34),
        },

        'ALGORITHM_PERIOD_SORTINO': {
            'Monthly': DataIndex('Sim Period', 'BR', 23, 34),
            '3-Month': DataIndex('Sim Period', 'BS', 25, 34),
            '6-month': DataIndex('Sim Period', 'BT', 28, 34),
            'year': DataIndex('Sim Period', 'BU', 34, 34),
        },

        'ALGORITHM_RETURN_VALUES': DataIndex(
            'Sim Cumulative', 'D', 4, 254),

        'ALGORITHM_CUMULATIVE_VOLATILITY': DataIndex(
            'Sim Cumulative', 'P', 4, 254),

        'ALGORITHM_CUMULATIVE_SHARPE': DataIndex(
            'Sim Cumulative', 'R', 4, 254),

        'CUMULATIVE_DOWNSIDE_RISK': DataIndex(
            'Sim Cumulative', 'U', 4, 254),

        'CUMULATIVE_SORTINO': DataIndex(
            'Sim Cumulative', 'V', 4, 254),

        'CUMULATIVE_INFORMATION': DataIndex(
            'Sim Cumulative', 'AA', 4, 254),

        'CUMULATIVE_BETA': DataIndex(
            'Sim Cumulative', 'AD', 4, 254),

        'CUMULATIVE_ALPHA': DataIndex(
            'Sim Cumulative', 'AE', 4, 254),

        'CUMULATIVE_MAX_DRAWDOWN': DataIndex(
            'Sim Cumulative', 'AH', 4, 254),

    }

    def __init__(self):
        self.workbook = xlrd.open_workbook(ANSWER_KEY_PATH)

        self.sheets = {}
        self.sheets['Sim Period'] = self.workbook.sheet_by_name('Sim Period')
        self.sheets['Sim Cumulative'] = self.workbook.sheet_by_name(
            'Sim Cumulative')
        self.sheets['s_p'] = self.workbook.sheet_by_name('s_p')

        for name, index in self.INDEXES.items():
            if isinstance(index, dict):
                subvalues = {}
                for subkey, subindex in index.items():
                    subvalues[subkey] = self.get_values(subindex)
                setattr(self, name, subvalues)
            else:
                setattr(self, name, self.get_values(index))

    def parse_date_value(self, value):
        return xlrd.xldate_as_tuple(value, 0)

    def parse_float_value(self, value):
        return value if value != '' else np.nan

    def get_raw_values(self, data_index):
        return self.sheets[data_index.sheet_name].col_values(
            data_index.col_index,
            data_index.row_start_index,
            data_index.row_end_index + 1)

    @property
    def value_type_to_value_func(self):
        return {
            'float': self.parse_float_value,
            'date': self.parse_date_value,
        }

    def get_values(self, data_index):
        value_parser = self.value_type_to_value_func[data_index.value_type]
        return [value for value in
                map(value_parser, self.get_raw_values(data_index))]


ANSWER_KEY = AnswerKey()

BENCHMARK_DATES = ANSWER_KEY.BENCHMARK['Dates']
BENCHMARK_RETURNS = ANSWER_KEY.BENCHMARK['Returns']
DATES = [datetime.datetime(*x, tzinfo=pytz.UTC) for x in BENCHMARK_DATES]
BENCHMARK = pd.Series(dict(zip(DATES, BENCHMARK_RETURNS)))
ALGORITHM_RETURNS = pd.Series(
    dict(zip(DATES, ANSWER_KEY.ALGORITHM_RETURN_VALUES)))
RETURNS_DATA = pd.DataFrame({'Benchmark Returns': BENCHMARK,
                             'Algorithm Returns': ALGORITHM_RETURNS})
RISK_CUMULATIVE = pd.DataFrame({
    'volatility': pd.Series(dict(zip(
        DATES, ANSWER_KEY.ALGORITHM_CUMULATIVE_VOLATILITY))),
    'sharpe': pd.Series(dict(zip(
        DATES, ANSWER_KEY.ALGORITHM_CUMULATIVE_SHARPE))),
    'downside_risk': pd.Series(dict(zip(
        DATES, ANSWER_KEY.CUMULATIVE_DOWNSIDE_RISK))),
    'sortino': pd.Series(dict(zip(
        DATES, ANSWER_KEY.CUMULATIVE_SORTINO))),
    'information': pd.Series(dict(zip(
        DATES, ANSWER_KEY.CUMULATIVE_INFORMATION))),
    'alpha': pd.Series(dict(zip(
        DATES, ANSWER_KEY.CUMULATIVE_ALPHA))),
    'beta': pd.Series(dict(zip(
        DATES, ANSWER_KEY.CUMULATIVE_BETA))),
    'max_drawdown': pd.Series(dict(zip(
        DATES, ANSWER_KEY.CUMULATIVE_MAX_DRAWDOWN))),
})


1			#
2			# Copyright 2014 Quantopian, Inc.
3			#
4			# Licensed under the Apache License, Version 2.0 (the "License");
5			# you may not use this file except in compliance with the License.
6			# You may obtain a copy of the License at
7			#
8			# http://www.apache.org/licenses/LICENSE-2.0
9			#
10			# Unless required by applicable law or agreed to in writing, software
11			# distributed under the License is distributed on an "AS IS" BASIS,
12			# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13			# See the License for the specific language governing permissions and
14			# limitations under the License.
15			import datetime
16			import hashlib
17			import os
18
19			import numpy as np
20			import pandas as pd
21			import pytz
22			import xlrd
23			import requests
24
25			from six.moves import map
26
27
28			def col_letter_to_index(col_letter):
29			# Only supports single letter,
30			# but answer key doesn't need multi-letter, yet.
31			index = 0
32			for i, char in enumerate(reversed(col_letter)):
33			index += ((ord(char) - 65) + 1) * pow(26, i)
34			return index
35
36			DIR = os.path.dirname(os.path.realpath(__file__))
37
38			ANSWER_KEY_CHECKSUMS_PATH = os.path.join(DIR, 'risk-answer-key-checksums')
39			ANSWER_KEY_CHECKSUMS = open(ANSWER_KEY_CHECKSUMS_PATH, 'r').read().splitlines()
40
41			ANSWER_KEY_FILENAME = 'risk-answer-key.xlsx'
42
43			ANSWER_KEY_PATH = os.path.join(DIR, ANSWER_KEY_FILENAME)
44
45			ANSWER_KEY_BUCKET_NAME = 'zipline-test_data'
46
47			ANSWER_KEY_DL_TEMPLATE = """
48			https://s3.amazonaws.com/zipline-test-data/risk/{md5}/risk-answer-key.xlsx
49			""".strip()
50
51			LATEST_ANSWER_KEY_URL = ANSWER_KEY_DL_TEMPLATE.format(
52			md5=ANSWER_KEY_CHECKSUMS[-1])
53
54
55			def answer_key_signature():
56			with open(ANSWER_KEY_PATH, 'rb') as f:
57			md5 = hashlib.md5()
58			buf = f.read(1024)
59			md5.update(buf)
60			while buf != b"":
61			buf = f.read(1024)
62			md5.update(buf)
63			return md5.hexdigest()
64
65
66			def ensure_latest_answer_key():
67			"""
68			Get the latest answer key from a publically available location.
69
70			Logic for determining what and when to download is as such:
71
72			- If there is no local spreadsheet file, then get the lastest answer key,
73			as defined by the last row in the checksum file.
74			- If there is a local spreadsheet file:
75			-- If the spreadsheet's checksum is in the checksum file:
76			--- If the spreadsheet's checksum does not match the latest, then grab the
77			the latest checksum and replace the local checksum file.
78			--- If the spreadsheet's checksum matches the latest, then skip download,
79			and use the local spreadsheet as a cached copy.
80			-- If the spreadsheet's checksum is not in the checksum file, then leave
81			the local file alone, assuming that the local xls's md5 is not in the list
82			due to local modifications during development.
83
84			It is possible that md5's could collide, if that is ever case, we should
85			then find an alternative naming scheme.
86
87			The spreadsheet answer sheet is not kept in SCM, as every edit would
88			increase the repo size by the file size, since it is treated as a binary.
89			"""
90
91			answer_key_dl_checksum = None
92
93			local_answer_key_exists = os.path.exists(ANSWER_KEY_PATH)
94			if local_answer_key_exists:
95			local_hash = answer_key_signature()
96
97			if local_hash in ANSWER_KEY_CHECKSUMS:
98			# Assume previously downloaded version.
99			# Check for latest.
100			if local_hash != ANSWER_KEY_CHECKSUMS[-1]:
101			# More recent checksum, download
102			answer_key_dl_checksum = ANSWER_KEY_CHECKSUMS[-1]
103			else:
104			# Assume local copy that is being developed on
105			answer_key_dl_checksum = None
106			else:
107			answer_key_dl_checksum = ANSWER_KEY_CHECKSUMS[-1]
108
109			if answer_key_dl_checksum:
110			res = requests.get(
111			ANSWER_KEY_DL_TEMPLATE.format(md5=answer_key_dl_checksum))
112			with open(ANSWER_KEY_PATH, 'wb') as f:
113			f.write(res.content)
114
115			# Get latest answer key on load.
116			ensure_latest_answer_key()
117
118
119			class DataIndex(object):
120			"""
121			Coordinates for the spreadsheet, using the values as seen in the notebook.
122			The python-excel libraries use 0 index, while the spreadsheet in a GUI
123			uses a 1 index.
124			"""
125			def __init__(self, sheet_name, col, row_start, row_end,
126			value_type='float'):
127			self.sheet_name = sheet_name
128			self.col = col
129			self.row_start = row_start
130			self.row_end = row_end
131			self.value_type = value_type
132
133			@property
134			def col_index(self):
135			return col_letter_to_index(self.col) - 1
136
137			@property
138			def row_start_index(self):
139			return self.row_start - 1
140
141			@property
142			def row_end_index(self):
143			return self.row_end - 1
144
145			def __str__(self):
146			return "'{sheet_name}'!{col}{row_start}:{col}{row_end}".format(
147			sheet_name=self.sheet_name,
148			col=self.col,
149			row_start=self.row_start,
150			row_end=self.row_end
151			)
152
153
154			class AnswerKey(object):
155
156			INDEXES = {
157			'RETURNS': DataIndex('Sim Period', 'D', 4, 255),
158
159			'BENCHMARK': {
160			'Dates': DataIndex('s_p', 'A', 4, 254, value_type='date'),
161			'Returns': DataIndex('s_p', 'H', 4, 254)
162			},
163
164			# Below matches the inconsistent capitalization in spreadsheet
165			'BENCHMARK_PERIOD_RETURNS': {
166			'Monthly': DataIndex('s_p', 'R', 8, 19),
167			'3-Month': DataIndex('s_p', 'S', 10, 19),
168			'6-month': DataIndex('s_p', 'T', 13, 19),
169			'year': DataIndex('s_p', 'U', 19, 19),
170			},
171
172			'BENCHMARK_PERIOD_VOLATILITY': {
173			'Monthly': DataIndex('s_p', 'V', 8, 19),
174			'3-Month': DataIndex('s_p', 'W', 10, 19),
175			'6-month': DataIndex('s_p', 'X', 13, 19),
176			'year': DataIndex('s_p', 'Y', 19, 19),
177			},
178
179			'ALGORITHM_PERIOD_RETURNS': {
180			'Monthly': DataIndex('Sim Period', 'Z', 23, 34),
181			'3-Month': DataIndex('Sim Period', 'AA', 25, 34),
182			'6-month': DataIndex('Sim Period', 'AB', 28, 34),
183			'year': DataIndex('Sim Period', 'AC', 34, 34),
184			},
185
186			'ALGORITHM_PERIOD_VOLATILITY': {
187			'Monthly': DataIndex('Sim Period', 'AH', 23, 34),
188			'3-Month': DataIndex('Sim Period', 'AI', 25, 34),
189			'6-month': DataIndex('Sim Period', 'AJ', 28, 34),
190			'year': DataIndex('Sim Period', 'AK', 34, 34),
191			},
192
193			'ALGORITHM_PERIOD_SHARPE': {
194			'Monthly': DataIndex('Sim Period', 'AL', 23, 34),
195			'3-Month': DataIndex('Sim Period', 'AM', 25, 34),
196			'6-month': DataIndex('Sim Period', 'AN', 28, 34),
197			'year': DataIndex('Sim Period', 'AO', 34, 34),
198			},
199
200			'ALGORITHM_PERIOD_BETA': {
201			'Monthly': DataIndex('Sim Period', 'AP', 23, 34),
202			'3-Month': DataIndex('Sim Period', 'AQ', 25, 34),
203			'6-month': DataIndex('Sim Period', 'AR', 28, 34),
204			'year': DataIndex('Sim Period', 'AS', 34, 34),
205			},
206
207			'ALGORITHM_PERIOD_ALPHA': {
208			'Monthly': DataIndex('Sim Period', 'AT', 23, 34),
209			'3-Month': DataIndex('Sim Period', 'AU', 25, 34),
210			'6-month': DataIndex('Sim Period', 'AV', 28, 34),
211			'year': DataIndex('Sim Period', 'AW', 34, 34),
212			},
213
214			'ALGORITHM_PERIOD_BENCHMARK_VARIANCE': {
215			'Monthly': DataIndex('Sim Period', 'BJ', 23, 34),
216			'3-Month': DataIndex('Sim Period', 'BK', 25, 34),
217			'6-month': DataIndex('Sim Period', 'BL', 28, 34),
218			'year': DataIndex('Sim Period', 'BM', 34, 34),
219			},
220
221			'ALGORITHM_PERIOD_COVARIANCE': {
222			'Monthly': DataIndex('Sim Period', 'BF', 23, 34),
223			'3-Month': DataIndex('Sim Period', 'BG', 25, 34),
224			'6-month': DataIndex('Sim Period', 'BH', 28, 34),
225			'year': DataIndex('Sim Period', 'BI', 34, 34),
226			},
227
228			'ALGORITHM_PERIOD_DOWNSIDE_RISK': {
229			'Monthly': DataIndex('Sim Period', 'BN', 23, 34),
230			'3-Month': DataIndex('Sim Period', 'BO', 25, 34),
231			'6-month': DataIndex('Sim Period', 'BP', 28, 34),
232			'year': DataIndex('Sim Period', 'BQ', 34, 34),
233			},
234
235			'ALGORITHM_PERIOD_SORTINO': {
236			'Monthly': DataIndex('Sim Period', 'BR', 23, 34),
237			'3-Month': DataIndex('Sim Period', 'BS', 25, 34),
238			'6-month': DataIndex('Sim Period', 'BT', 28, 34),
239			'year': DataIndex('Sim Period', 'BU', 34, 34),
240			},
241
242			'ALGORITHM_RETURN_VALUES': DataIndex(
243			'Sim Cumulative', 'D', 4, 254),
244
245			'ALGORITHM_CUMULATIVE_VOLATILITY': DataIndex(
246			'Sim Cumulative', 'P', 4, 254),
247
248			'ALGORITHM_CUMULATIVE_SHARPE': DataIndex(
249			'Sim Cumulative', 'R', 4, 254),
250
251			'CUMULATIVE_DOWNSIDE_RISK': DataIndex(
252			'Sim Cumulative', 'U', 4, 254),
253
254			'CUMULATIVE_SORTINO': DataIndex(
255			'Sim Cumulative', 'V', 4, 254),
256
257			'CUMULATIVE_INFORMATION': DataIndex(
258			'Sim Cumulative', 'AA', 4, 254),
259
260			'CUMULATIVE_BETA': DataIndex(
261			'Sim Cumulative', 'AD', 4, 254),
262
263			'CUMULATIVE_ALPHA': DataIndex(
264			'Sim Cumulative', 'AE', 4, 254),
265
266			'CUMULATIVE_MAX_DRAWDOWN': DataIndex(
267			'Sim Cumulative', 'AH', 4, 254),
268
269			}
270
271			def __init__(self):
272			self.workbook = xlrd.open_workbook(ANSWER_KEY_PATH)
273
274			self.sheets = {}
275			self.sheets['Sim Period'] = self.workbook.sheet_by_name('Sim Period')
276			self.sheets['Sim Cumulative'] = self.workbook.sheet_by_name(
277			'Sim Cumulative')
278			self.sheets['s_p'] = self.workbook.sheet_by_name('s_p')
279
280			for name, index in self.INDEXES.items():
281			if isinstance(index, dict):
282			subvalues = {}
283			for subkey, subindex in index.items():
284			subvalues[subkey] = self.get_values(subindex)
285			setattr(self, name, subvalues)
286			else:
287			setattr(self, name, self.get_values(index))
288
289			def parse_date_value(self, value):
290			return xlrd.xldate_as_tuple(value, 0)
291
292			def parse_float_value(self, value):
293			return value if value != '' else np.nan
294
295			def get_raw_values(self, data_index):
296			return self.sheets[data_index.sheet_name].col_values(
297			data_index.col_index,
298			data_index.row_start_index,
299			data_index.row_end_index + 1)
300
301			@property
302			def value_type_to_value_func(self):
303			return {
304			'float': self.parse_float_value,
305			'date': self.parse_date_value,
306			}
307
308			def get_values(self, data_index):
309			value_parser = self.value_type_to_value_func[data_index.value_type]
310			return [value for value in
311			map(value_parser, self.get_raw_values(data_index))]
312
313
314			ANSWER_KEY = AnswerKey()
315
316			BENCHMARK_DATES = ANSWER_KEY.BENCHMARK['Dates']
317			BENCHMARK_RETURNS = ANSWER_KEY.BENCHMARK['Returns']
318			DATES = [datetime.datetime(*x, tzinfo=pytz.UTC) for x in BENCHMARK_DATES]
319			BENCHMARK = pd.Series(dict(zip(DATES, BENCHMARK_RETURNS)))
320			ALGORITHM_RETURNS = pd.Series(
321			dict(zip(DATES, ANSWER_KEY.ALGORITHM_RETURN_VALUES)))
322			RETURNS_DATA = pd.DataFrame({'Benchmark Returns': BENCHMARK,
323			'Algorithm Returns': ALGORITHM_RETURNS})
324			RISK_CUMULATIVE = pd.DataFrame({
325			'volatility': pd.Series(dict(zip(
326			DATES, ANSWER_KEY.ALGORITHM_CUMULATIVE_VOLATILITY))),
327			'sharpe': pd.Series(dict(zip(
328			DATES, ANSWER_KEY.ALGORITHM_CUMULATIVE_SHARPE))),
329			'downside_risk': pd.Series(dict(zip(
330			DATES, ANSWER_KEY.CUMULATIVE_DOWNSIDE_RISK))),
331			'sortino': pd.Series(dict(zip(
332			DATES, ANSWER_KEY.CUMULATIVE_SORTINO))),
333			'information': pd.Series(dict(zip(
334			DATES, ANSWER_KEY.CUMULATIVE_INFORMATION))),
335			'alpha': pd.Series(dict(zip(
336			DATES, ANSWER_KEY.CUMULATIVE_ALPHA))),
337			'beta': pd.Series(dict(zip(
338			DATES, ANSWER_KEY.CUMULATIVE_BETA))),
339			'max_drawdown': pd.Series(dict(zip(
340			DATES, ANSWER_KEY.CUMULATIVE_MAX_DRAWDOWN))),
341			})
342

quantopian / zipline

Pull Request — master (#867)

tests.risk.ensure_latest_answer_key() B

Complexity

Size

Duplication

Duplication Side-by-Side

Filter issues like