zipline.utils.run_pipeline() - Code Metrics - quantopian/zipline - Measure and Improve Code Quality continuously with Scrutinizer

zipline.utils.run_pipeline() F
last analyzed 2016-01-08 22:03 UTC

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

108

Duplication

Lines	0
Ratio	0 %

Metric	Value
cc	18
dl	0
loc	108
rs	2

How to fix Long Method Complexity

#
# Copyright 2014 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
import os
import argparse
from copy import copy

from six import print_
from six.moves import configparser
import pandas as pd

try:
    from pygments import highlight
    from pygments.lexers import PythonLexer
    from pygments.formatters import TerminalFormatter
    PYGMENTS = True
except:
    PYGMENTS = False

import zipline
from zipline.errors import NoSourceError, PipelineDateError

DEFAULTS = {
    'data_frequency': 'daily',
    'capital_base': '10e6',
    'source': 'yahoo',
    'symbols': 'AAPL',
    'metadata_index': 'symbol',
    'source_time_column': 'Date',
}


def parse_args(argv, ipython_mode=False):
    """Parse list of arguments.

    If a config file is provided (via -c), it will read in the
    supplied options and overwrite any global defaults.

    All other directly supplied arguments will overwrite the config
    file settings.

    Arguments:
        * argv : list of strings
            List of arguments, e.g. ['-c', 'my.conf']
        * ipython_mode : bool <default=True>
            Whether to parse IPython specific arguments
            like --local_namespace

    Notes:
    Default settings can be found in zipline.utils.cli.DEFAULTS.

    """
    # Parse any conf_file specification
    # We make this parser with add_help=False so that
    # it doesn't parse -h and print help.
    conf_parser = argparse.ArgumentParser(
        # Don't mess with format of description
        formatter_class=argparse.RawDescriptionHelpFormatter,
        # Turn off help, so we print all options in response to -h
        add_help=False
    )
    conf_parser.add_argument("-c", "--conf_file",
                             help="Specify config file",
                             metavar="FILE")
    args, remaining_argv = conf_parser.parse_known_args(argv)

    defaults = copy(DEFAULTS)

    if args.conf_file:
        config = configparser.SafeConfigParser()
        config.read([args.conf_file])
        defaults.update(dict(config.items("Defaults")))

    # Parse rest of arguments
    # Don't suppress add_help here so it will handle -h
    parser = argparse.ArgumentParser(
        # Inherit options from config_parser
        description="Zipline version %s." % zipline.__version__,
        parents=[conf_parser]
    )

    parser.set_defaults(**defaults)

    parser.add_argument('--algofile', '-f')
    parser.add_argument('--data-frequency',
                        choices=('minute', 'daily'))
    parser.add_argument('--start', '-s')
    parser.add_argument('--end', '-e')
    parser.add_argument('--capital_base')
    parser.add_argument('--source', '-d', choices=('yahoo',))
    parser.add_argument('--source_time_column', '-t')
    parser.add_argument('--symbols')
    parser.add_argument('--output', '-o')
    parser.add_argument('--metadata_path', '-m')
    parser.add_argument('--metadata_index', '-x')
    parser.add_argument('--print-algo', '-p', dest='print_algo',
                        action='store_true')
    parser.add_argument('--no-print-algo', '-q', dest='print_algo',
                        action='store_false')

    if ipython_mode:
        parser.add_argument('--local_namespace', action='store_true')

    args = parser.parse_args(remaining_argv)

    return(vars(args))


def parse_cell_magic(line, cell):
    """Parse IPython magic
    """
    args_list = line.split(' ')
    args = parse_args(args_list, ipython_mode=True)

    # Remove print_algo kwarg to overwrite below.
    args.pop('print_algo')

    local_namespace = args.pop('local_namespace', False)
    # By default, execute inside IPython namespace
    if not local_namespace:
        args['namespace'] = get_ipython().user_ns  # flake8: noqa

    # If we are running inside NB, do not output to file but create a
    # variable instead
    output_var_name = args.pop('output', None)

    perf = run_pipeline(print_algo=False, algo_text=cell, **args)

    if output_var_name is not None:
        get_ipython().user_ns[output_var_name] = perf  # flake8: noqa


def run_pipeline(print_algo=True, **kwargs):
    """Runs a full zipline pipeline given configuration keyword
    arguments.

    1. Load data (start and end dates can be provided a strings as
    well as the source and symobls).

    2. Instantiate algorithm (supply either algo_text or algofile
    kwargs containing initialize() and handle_data() functions). If
    algofile is supplied, will try to look for algofile_analyze.py and
    append it.

    3. Run algorithm (supply capital_base as float).

    4. Return performance dataframe.

    :Arguments:
        * print_algo : bool <default=True>
           Whether to print the algorithm to command line. Will use
           pygments syntax coloring if pygments is found.

    """
    start = kwargs['start']
    end = kwargs['end']
    # Compare against None because strings/timestamps may have been given
    if start is not None:
        start = pd.Timestamp(start, tz='UTC')
    if end is not None:
        end = pd.Timestamp(end, tz='UTC')

    # Fail out if only one bound is provided
    if ((start is None) or (end is None)) and (start != end):
        raise PipelineDateError(start=start, end=end)

    # Check if start and end are provided, and if the sim_params need to read
    # a start and end from the DataSource
    if start is None:
        overwrite_sim_params = True
    else:
        overwrite_sim_params = False

    symbols = kwargs['symbols'].split(',')
    asset_identifier = kwargs['metadata_index']

    # Pull asset metadata
    asset_metadata = kwargs.get('asset_metadata', None)
    asset_metadata_path = kwargs['metadata_path']
    # Read in a CSV file, if applicable
    if asset_metadata_path is not None:
        if os.path.isfile(asset_metadata_path):
            asset_metadata = pd.read_csv(asset_metadata_path,
                                         index_col=asset_identifier)

    source_arg = kwargs['source']
    source_time_column = kwargs['source_time_column']

    if source_arg is None:
        raise NoSourceError()

    elif source_arg == 'yahoo':
        source = zipline.data.load_bars_from_yahoo(
            stocks=symbols, start=start, end=end)

    elif os.path.isfile(source_arg):
        source = zipline.data.load_prices_from_csv(
            filepath=source_arg,
            identifier_col=source_time_column
        )

    elif os.path.isdir(source_arg):
        source = zipline.data.load_prices_from_csv_folder(
            folderpath=source_arg,
            identifier_col=source_time_column
        )

    else:
        raise NotImplementedError(
            'Source %s not implemented.' % kwargs['source'])

    algo_text = kwargs.get('algo_text', None)
    if algo_text is None:
        # Expect algofile to be set
        algo_fname = kwargs['algofile']
        with open(algo_fname, 'r') as fd:
            algo_text = fd.read()

    if print_algo:
        if PYGMENTS:
            highlight(algo_text, PythonLexer(), TerminalFormatter(),
                      outfile=sys.stdout)
        else:
            print_(algo_text)

    algo = zipline.TradingAlgorithm(script=algo_text,
                                    namespace=kwargs.get('namespace', {}),
                                    capital_base=float(kwargs['capital_base']),
                                    algo_filename=kwargs.get('algofile'),
                                    equities_metadata=asset_metadata,
                                    start=start,
                                    end=end)

    perf = algo.run(source, overwrite_sim_params=overwrite_sim_params)

    output_fname = kwargs.get('output', None)
    if output_fname is not None:
        perf.to_pickle(output_fname)

    return perf


1			#
2			# Copyright 2014 Quantopian, Inc.
3			#
4			# Licensed under the Apache License, Version 2.0 (the "License");
5			# you may not use this file except in compliance with the License.
6			# You may obtain a copy of the License at
7			#
8			# http://www.apache.org/licenses/LICENSE-2.0
9			#
10			# Unless required by applicable law or agreed to in writing, software
11			# distributed under the License is distributed on an "AS IS" BASIS,
12			# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13			# See the License for the specific language governing permissions and
14			# limitations under the License.
15
16			import sys
17			import os
18			import argparse
19			from copy import copy
20
21			from six import print_
22			from six.moves import configparser
23			import pandas as pd
24
25			try:
26			from pygments import highlight
27			from pygments.lexers import PythonLexer
28			from pygments.formatters import TerminalFormatter
29			PYGMENTS = True
30			except:
31			PYGMENTS = False
32
33			import zipline
34			from zipline.errors import NoSourceError, PipelineDateError
35
36			DEFAULTS = {
37			'data_frequency': 'daily',
38			'capital_base': '10e6',
39			'source': 'yahoo',
40			'symbols': 'AAPL',
41			'metadata_index': 'symbol',
42			'source_time_column': 'Date',
43			}
44
45
46			def parse_args(argv, ipython_mode=False):
47			"""Parse list of arguments.
48
49			If a config file is provided (via -c), it will read in the
50			supplied options and overwrite any global defaults.
51
52			All other directly supplied arguments will overwrite the config
53			file settings.
54
55			Arguments:
56			* argv : list of strings
57			List of arguments, e.g. ['-c', 'my.conf']
58			* ipython_mode : bool <default=True>
59			Whether to parse IPython specific arguments
60			like --local_namespace
61
62			Notes:
63			Default settings can be found in zipline.utils.cli.DEFAULTS.
64
65			"""
66			# Parse any conf_file specification
67			# We make this parser with add_help=False so that
68			# it doesn't parse -h and print help.
69			conf_parser = argparse.ArgumentParser(
70			# Don't mess with format of description
71			formatter_class=argparse.RawDescriptionHelpFormatter,
72			# Turn off help, so we print all options in response to -h
73			add_help=False
74			)
75			conf_parser.add_argument("-c", "--conf_file",
76			help="Specify config file",
77			metavar="FILE")
78			args, remaining_argv = conf_parser.parse_known_args(argv)
79
80			defaults = copy(DEFAULTS)
81
82			if args.conf_file:
83			config = configparser.SafeConfigParser()
84			config.read([args.conf_file])
85			defaults.update(dict(config.items("Defaults")))
86
87			# Parse rest of arguments
88			# Don't suppress add_help here so it will handle -h
89			parser = argparse.ArgumentParser(
90			# Inherit options from config_parser
91			description="Zipline version %s." % zipline.__version__,
92			parents=[conf_parser]
93			)
94
95			parser.set_defaults(**defaults)
96
97			parser.add_argument('--algofile', '-f')
98			parser.add_argument('--data-frequency',
99			choices=('minute', 'daily'))
100			parser.add_argument('--start', '-s')
101			parser.add_argument('--end', '-e')
102			parser.add_argument('--capital_base')
103			parser.add_argument('--source', '-d', choices=('yahoo',))
104			parser.add_argument('--source_time_column', '-t')
105			parser.add_argument('--symbols')
106			parser.add_argument('--output', '-o')
107			parser.add_argument('--metadata_path', '-m')
108			parser.add_argument('--metadata_index', '-x')
109			parser.add_argument('--print-algo', '-p', dest='print_algo',
110			action='store_true')
111			parser.add_argument('--no-print-algo', '-q', dest='print_algo',
112			action='store_false')
113
114			if ipython_mode:
115			parser.add_argument('--local_namespace', action='store_true')
116
117			args = parser.parse_args(remaining_argv)
118
119			return(vars(args))
120
121
122			def parse_cell_magic(line, cell):
123			"""Parse IPython magic
124			"""
125			args_list = line.split(' ')
126			args = parse_args(args_list, ipython_mode=True)
127
128			# Remove print_algo kwarg to overwrite below.
129			args.pop('print_algo')
130
131			local_namespace = args.pop('local_namespace', False)
132			# By default, execute inside IPython namespace
133			if not local_namespace:
134			args['namespace'] = get_ipython().user_ns # flake8: noqa
135
136			# If we are running inside NB, do not output to file but create a
137			# variable instead
138			output_var_name = args.pop('output', None)
139
140			perf = run_pipeline(print_algo=False, algo_text=cell, **args)
141
142			if output_var_name is not None:
143			get_ipython().user_ns[output_var_name] = perf # flake8: noqa
144
145
146			def run_pipeline(print_algo=True, **kwargs):
147			"""Runs a full zipline pipeline given configuration keyword
148			arguments.
149
150			1. Load data (start and end dates can be provided a strings as
151			well as the source and symobls).
152
153			2. Instantiate algorithm (supply either algo_text or algofile
154			kwargs containing initialize() and handle_data() functions). If
155			algofile is supplied, will try to look for algofile_analyze.py and
156			append it.
157
158			3. Run algorithm (supply capital_base as float).
159
160			4. Return performance dataframe.
161
162			:Arguments:
163			* print_algo : bool <default=True>
164			Whether to print the algorithm to command line. Will use
165			pygments syntax coloring if pygments is found.
166
167			"""
168			start = kwargs['start']
169			end = kwargs['end']
170			# Compare against None because strings/timestamps may have been given
171			if start is not None:
172			start = pd.Timestamp(start, tz='UTC')
173			if end is not None:
174			end = pd.Timestamp(end, tz='UTC')
175
176			# Fail out if only one bound is provided
177			if ((start is None) or (end is None)) and (start != end):
178			raise PipelineDateError(start=start, end=end)
179
180			# Check if start and end are provided, and if the sim_params need to read
181			# a start and end from the DataSource
182			if start is None:
183			overwrite_sim_params = True
184			else:
185			overwrite_sim_params = False
186
187			symbols = kwargs['symbols'].split(',')
188			asset_identifier = kwargs['metadata_index']
189
190			# Pull asset metadata
191			asset_metadata = kwargs.get('asset_metadata', None)
192			asset_metadata_path = kwargs['metadata_path']
193			# Read in a CSV file, if applicable
194			if asset_metadata_path is not None:
195			if os.path.isfile(asset_metadata_path):
196			asset_metadata = pd.read_csv(asset_metadata_path,
197			index_col=asset_identifier)
198
199			source_arg = kwargs['source']
200			source_time_column = kwargs['source_time_column']
201
202			if source_arg is None:
203			raise NoSourceError()
204
205			elif source_arg == 'yahoo':
206			source = zipline.data.load_bars_from_yahoo(
207			stocks=symbols, start=start, end=end)
208
209			elif os.path.isfile(source_arg):
210			source = zipline.data.load_prices_from_csv(
211			filepath=source_arg,
212			identifier_col=source_time_column
213			)
214
215			elif os.path.isdir(source_arg):
216			source = zipline.data.load_prices_from_csv_folder(
217			folderpath=source_arg,
218			identifier_col=source_time_column
219			)
220
221			else:
222			raise NotImplementedError(
223			'Source %s not implemented.' % kwargs['source'])
224
225			algo_text = kwargs.get('algo_text', None)
226			if algo_text is None:
227			# Expect algofile to be set
228			algo_fname = kwargs['algofile']
229			with open(algo_fname, 'r') as fd:
230			algo_text = fd.read()
231
232			if print_algo:
233			if PYGMENTS:
234			highlight(algo_text, PythonLexer(), TerminalFormatter(),
235			outfile=sys.stdout)
236			else:
237			print_(algo_text)
238
239			algo = zipline.TradingAlgorithm(script=algo_text,
240			namespace=kwargs.get('namespace', {}),
241			capital_base=float(kwargs['capital_base']),
242			algo_filename=kwargs.get('algofile'),
243			equities_metadata=asset_metadata,
244			start=start,
245			end=end)
246
247			perf = algo.run(source, overwrite_sim_params=overwrite_sim_params)
248
249			output_fname = kwargs.get('output', None)
250			if output_fname is not None:
251			perf.to_pickle(output_fname)
252
253			return perf
254

quantopian / zipline

zipline.utils.run_pipeline() F last analyzed 2016-01-08 22:03 UTC

Complexity

Size

Duplication

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like

zipline.utils.run_pipeline() F
last analyzed 2016-01-08 22:03 UTC