zipline.utils.run_pipeline()   F
last analyzed

Complexity

Conditions 18

Size

Total Lines 108

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 18
dl 0
loc 108
rs 2

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like zipline.utils.run_pipeline() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#
2
# Copyright 2014 Quantopian, Inc.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
16
import sys
17
import os
18
import argparse
19
from copy import copy
20
21
from six import print_
22
from six.moves import configparser
23
import pandas as pd
24
25
try:
26
    from pygments import highlight
27
    from pygments.lexers import PythonLexer
28
    from pygments.formatters import TerminalFormatter
29
    PYGMENTS = True
30
except:
31
    PYGMENTS = False
32
33
import zipline
34
from zipline.errors import NoSourceError, PipelineDateError
35
36
DEFAULTS = {
37
    'data_frequency': 'daily',
38
    'capital_base': '10e6',
39
    'source': 'yahoo',
40
    'symbols': 'AAPL',
41
    'metadata_index': 'symbol',
42
    'source_time_column': 'Date',
43
}
44
45
46
def parse_args(argv, ipython_mode=False):
47
    """Parse list of arguments.
48
49
    If a config file is provided (via -c), it will read in the
50
    supplied options and overwrite any global defaults.
51
52
    All other directly supplied arguments will overwrite the config
53
    file settings.
54
55
    Arguments:
56
        * argv : list of strings
57
            List of arguments, e.g. ['-c', 'my.conf']
58
        * ipython_mode : bool <default=True>
59
            Whether to parse IPython specific arguments
60
            like --local_namespace
61
62
    Notes:
63
    Default settings can be found in zipline.utils.cli.DEFAULTS.
64
65
    """
66
    # Parse any conf_file specification
67
    # We make this parser with add_help=False so that
68
    # it doesn't parse -h and print help.
69
    conf_parser = argparse.ArgumentParser(
70
        # Don't mess with format of description
71
        formatter_class=argparse.RawDescriptionHelpFormatter,
72
        # Turn off help, so we print all options in response to -h
73
        add_help=False
74
    )
75
    conf_parser.add_argument("-c", "--conf_file",
76
                             help="Specify config file",
77
                             metavar="FILE")
78
    args, remaining_argv = conf_parser.parse_known_args(argv)
79
80
    defaults = copy(DEFAULTS)
81
82
    if args.conf_file:
83
        config = configparser.SafeConfigParser()
84
        config.read([args.conf_file])
85
        defaults.update(dict(config.items("Defaults")))
86
87
    # Parse rest of arguments
88
    # Don't suppress add_help here so it will handle -h
89
    parser = argparse.ArgumentParser(
90
        # Inherit options from config_parser
91
        description="Zipline version %s." % zipline.__version__,
92
        parents=[conf_parser]
93
    )
94
95
    parser.set_defaults(**defaults)
96
97
    parser.add_argument('--algofile', '-f')
98
    parser.add_argument('--data-frequency',
99
                        choices=('minute', 'daily'))
100
    parser.add_argument('--start', '-s')
101
    parser.add_argument('--end', '-e')
102
    parser.add_argument('--capital_base')
103
    parser.add_argument('--source', '-d', choices=('yahoo',))
104
    parser.add_argument('--source_time_column', '-t')
105
    parser.add_argument('--symbols')
106
    parser.add_argument('--output', '-o')
107
    parser.add_argument('--metadata_path', '-m')
108
    parser.add_argument('--metadata_index', '-x')
109
    parser.add_argument('--print-algo', '-p', dest='print_algo',
110
                        action='store_true')
111
    parser.add_argument('--no-print-algo', '-q', dest='print_algo',
112
                        action='store_false')
113
114
    if ipython_mode:
115
        parser.add_argument('--local_namespace', action='store_true')
116
117
    args = parser.parse_args(remaining_argv)
118
119
    return(vars(args))
120
121
122
def parse_cell_magic(line, cell):
123
    """Parse IPython magic
124
    """
125
    args_list = line.split(' ')
126
    args = parse_args(args_list, ipython_mode=True)
127
128
    # Remove print_algo kwarg to overwrite below.
129
    args.pop('print_algo')
130
131
    local_namespace = args.pop('local_namespace', False)
132
    # By default, execute inside IPython namespace
133
    if not local_namespace:
134
        args['namespace'] = get_ipython().user_ns  # flake8: noqa
135
136
    # If we are running inside NB, do not output to file but create a
137
    # variable instead
138
    output_var_name = args.pop('output', None)
139
140
    perf = run_pipeline(print_algo=False, algo_text=cell, **args)
141
142
    if output_var_name is not None:
143
        get_ipython().user_ns[output_var_name] = perf  # flake8: noqa
144
145
146
def run_pipeline(print_algo=True, **kwargs):
147
    """Runs a full zipline pipeline given configuration keyword
148
    arguments.
149
150
    1. Load data (start and end dates can be provided a strings as
151
    well as the source and symobls).
152
153
    2. Instantiate algorithm (supply either algo_text or algofile
154
    kwargs containing initialize() and handle_data() functions). If
155
    algofile is supplied, will try to look for algofile_analyze.py and
156
    append it.
157
158
    3. Run algorithm (supply capital_base as float).
159
160
    4. Return performance dataframe.
161
162
    :Arguments:
163
        * print_algo : bool <default=True>
164
           Whether to print the algorithm to command line. Will use
165
           pygments syntax coloring if pygments is found.
166
167
    """
168
    start = kwargs['start']
169
    end = kwargs['end']
170
    # Compare against None because strings/timestamps may have been given
171
    if start is not None:
172
        start = pd.Timestamp(start, tz='UTC')
173
    if end is not None:
174
        end = pd.Timestamp(end, tz='UTC')
175
176
    # Fail out if only one bound is provided
177
    if ((start is None) or (end is None)) and (start != end):
178
        raise PipelineDateError(start=start, end=end)
179
180
    # Check if start and end are provided, and if the sim_params need to read
181
    # a start and end from the DataSource
182
    if start is None:
183
        overwrite_sim_params = True
184
    else:
185
        overwrite_sim_params = False
186
187
    symbols = kwargs['symbols'].split(',')
188
    asset_identifier = kwargs['metadata_index']
189
190
    # Pull asset metadata
191
    asset_metadata = kwargs.get('asset_metadata', None)
192
    asset_metadata_path = kwargs['metadata_path']
193
    # Read in a CSV file, if applicable
194
    if asset_metadata_path is not None:
195
        if os.path.isfile(asset_metadata_path):
196
            asset_metadata = pd.read_csv(asset_metadata_path,
197
                                         index_col=asset_identifier)
198
199
    source_arg = kwargs['source']
200
    source_time_column = kwargs['source_time_column']
201
202
    if source_arg is None:
203
        raise NoSourceError()
204
205
    elif source_arg == 'yahoo':
206
        source = zipline.data.load_bars_from_yahoo(
207
            stocks=symbols, start=start, end=end)
208
209
    elif os.path.isfile(source_arg):
210
        source = zipline.data.load_prices_from_csv(
211
            filepath=source_arg,
212
            identifier_col=source_time_column
213
        )
214
215
    elif os.path.isdir(source_arg):
216
        source = zipline.data.load_prices_from_csv_folder(
217
            folderpath=source_arg,
218
            identifier_col=source_time_column
219
        )
220
221
    else:
222
        raise NotImplementedError(
223
            'Source %s not implemented.' % kwargs['source'])
224
225
    algo_text = kwargs.get('algo_text', None)
226
    if algo_text is None:
227
        # Expect algofile to be set
228
        algo_fname = kwargs['algofile']
229
        with open(algo_fname, 'r') as fd:
230
            algo_text = fd.read()
231
232
    if print_algo:
233
        if PYGMENTS:
234
            highlight(algo_text, PythonLexer(), TerminalFormatter(),
235
                      outfile=sys.stdout)
236
        else:
237
            print_(algo_text)
238
239
    algo = zipline.TradingAlgorithm(script=algo_text,
240
                                    namespace=kwargs.get('namespace', {}),
241
                                    capital_base=float(kwargs['capital_base']),
242
                                    algo_filename=kwargs.get('algofile'),
243
                                    equities_metadata=asset_metadata,
244
                                    start=start,
245
                                    end=end)
246
247
    perf = algo.run(source, overwrite_sim_params=overwrite_sim_params)
248
249
    output_fname = kwargs.get('output', None)
250
    if output_fname is not None:
251
        perf.to_pickle(output_fname)
252
253
    return perf
254