1
|
|
|
""" |
2
|
|
|
factor.py |
3
|
|
|
""" |
4
|
|
|
from operator import attrgetter |
5
|
|
|
from numbers import Number |
6
|
|
|
|
7
|
|
|
from numpy import ( |
8
|
|
|
apply_along_axis, |
9
|
|
|
float64, |
10
|
|
|
nan, |
11
|
|
|
inf, |
12
|
|
|
) |
13
|
|
|
from scipy.stats import rankdata |
14
|
|
|
|
15
|
|
|
from zipline.errors import ( |
16
|
|
|
UnknownRankMethod, |
17
|
|
|
UnsupportedDataType, |
18
|
|
|
) |
19
|
|
|
from zipline.lib.rank import rankdata_2d_ordinal |
20
|
|
|
from zipline.pipeline.term import ( |
21
|
|
|
CustomTermMixin, |
22
|
|
|
NotSpecified, |
23
|
|
|
RequiredWindowLengthMixin, |
24
|
|
|
SingleInputMixin, |
25
|
|
|
CompositeTerm, |
26
|
|
|
) |
27
|
|
|
from zipline.pipeline.expression import ( |
28
|
|
|
BadBinaryOperator, |
29
|
|
|
COMPARISONS, |
30
|
|
|
is_comparison, |
31
|
|
|
MATH_BINOPS, |
32
|
|
|
method_name_for_op, |
33
|
|
|
NumericalExpression, |
34
|
|
|
NUMEXPR_MATH_FUNCS, |
35
|
|
|
UNARY_OPS, |
36
|
|
|
) |
37
|
|
|
from zipline.pipeline.filters import ( |
38
|
|
|
NumExprFilter, |
39
|
|
|
PercentileFilter, |
40
|
|
|
) |
41
|
|
|
from zipline.utils.control_flow import nullctx |
42
|
|
|
|
43
|
|
|
|
44
|
|
|
_RANK_METHODS = frozenset(['average', 'min', 'max', 'dense', 'ordinal']) |
45
|
|
|
|
46
|
|
|
|
47
|
|
|
def binop_return_type(op): |
48
|
|
|
if is_comparison(op): |
49
|
|
|
return NumExprFilter |
50
|
|
|
else: |
51
|
|
|
return NumExprFactor |
52
|
|
|
|
53
|
|
|
|
54
|
|
|
def binary_operator(op): |
55
|
|
|
""" |
56
|
|
|
Factory function for making binary operator methods on a Factor subclass. |
57
|
|
|
|
58
|
|
|
Returns a function, "binary_operator" suitable for implementing functions |
59
|
|
|
like __add__. |
60
|
|
|
""" |
61
|
|
|
# When combining a Factor with a NumericalExpression, we use this |
62
|
|
|
# attrgetter instance to defer to the commuted implementation of the |
63
|
|
|
# NumericalExpression operator. |
64
|
|
|
commuted_method_getter = attrgetter(method_name_for_op(op, commute=True)) |
65
|
|
|
|
66
|
|
|
def binary_operator(self, other): |
67
|
|
|
# This can't be hoisted up a scope because the types returned by |
68
|
|
|
# binop_return_type aren't defined when the top-level function is |
69
|
|
|
# invoked in the class body of Factor. |
70
|
|
|
return_type = binop_return_type(op) |
71
|
|
|
if isinstance(self, NumExprFactor): |
|
|
|
|
72
|
|
|
self_expr, other_expr, new_inputs = self.build_binary_op( |
73
|
|
|
op, other, |
74
|
|
|
) |
75
|
|
|
return return_type( |
76
|
|
|
"({left}) {op} ({right})".format( |
77
|
|
|
left=self_expr, |
78
|
|
|
op=op, |
79
|
|
|
right=other_expr, |
80
|
|
|
), |
81
|
|
|
new_inputs, |
82
|
|
|
) |
83
|
|
|
elif isinstance(other, NumExprFactor): |
84
|
|
|
# NumericalExpression overrides ops to correctly handle merging of |
85
|
|
|
# inputs. Look up and call the appropriate reflected operator with |
86
|
|
|
# ourself as the input. |
87
|
|
|
return commuted_method_getter(other)(self) |
88
|
|
|
elif isinstance(other, Factor): |
89
|
|
|
if self is other: |
90
|
|
|
return return_type( |
91
|
|
|
"x_0 {op} x_0".format(op=op), |
92
|
|
|
(self,), |
93
|
|
|
) |
94
|
|
|
return return_type( |
95
|
|
|
"x_0 {op} x_1".format(op=op), |
96
|
|
|
(self, other), |
97
|
|
|
) |
98
|
|
|
elif isinstance(other, Number): |
99
|
|
|
return return_type( |
100
|
|
|
"x_0 {op} ({constant})".format(op=op, constant=other), |
101
|
|
|
binds=(self,), |
102
|
|
|
) |
103
|
|
|
raise BadBinaryOperator(op, self, other) |
104
|
|
|
|
105
|
|
|
binary_operator.__doc__ = "Binary Operator: '%s'" % op |
106
|
|
|
return binary_operator |
107
|
|
|
|
108
|
|
|
|
109
|
|
|
def reflected_binary_operator(op): |
110
|
|
|
""" |
111
|
|
|
Factory function for making binary operator methods on a Factor. |
112
|
|
|
|
113
|
|
|
Returns a function, "reflected_binary_operator" suitable for implementing |
114
|
|
|
functions like __radd__. |
115
|
|
|
""" |
116
|
|
|
assert not is_comparison(op) |
117
|
|
|
|
118
|
|
|
def reflected_binary_operator(self, other): |
119
|
|
|
|
120
|
|
|
if isinstance(self, NumericalExpression): |
121
|
|
|
self_expr, other_expr, new_inputs = self.build_binary_op( |
122
|
|
|
op, other |
123
|
|
|
) |
124
|
|
|
return NumExprFactor( |
125
|
|
|
"({left}) {op} ({right})".format( |
126
|
|
|
left=other_expr, |
127
|
|
|
right=self_expr, |
128
|
|
|
op=op, |
129
|
|
|
), |
130
|
|
|
new_inputs, |
131
|
|
|
) |
132
|
|
|
|
133
|
|
|
# Only have to handle the numeric case because in all other valid cases |
134
|
|
|
# the corresponding left-binding method will be called. |
135
|
|
|
elif isinstance(other, Number): |
136
|
|
|
return NumExprFactor( |
137
|
|
|
"{constant} {op} x_0".format(op=op, constant=other), |
138
|
|
|
binds=(self,), |
139
|
|
|
) |
140
|
|
|
raise BadBinaryOperator(op, other, self) |
141
|
|
|
return reflected_binary_operator |
142
|
|
|
|
143
|
|
|
|
144
|
|
|
def unary_operator(op): |
|
|
|
|
145
|
|
|
""" |
146
|
|
|
Factory function for making unary operator methods for Factors. |
147
|
|
|
""" |
148
|
|
|
# Only negate is currently supported for all our possible input types. |
149
|
|
|
valid_ops = {'-'} |
150
|
|
|
if op not in valid_ops: |
151
|
|
|
raise ValueError("Invalid unary operator %s." % op) |
152
|
|
|
|
153
|
|
|
def unary_operator(self): |
154
|
|
|
# This can't be hoisted up a scope because the types returned by |
155
|
|
|
# unary_op_return_type aren't defined when the top-level function is |
156
|
|
|
# invoked. |
157
|
|
|
if isinstance(self, NumericalExpression): |
158
|
|
|
return NumExprFactor( |
159
|
|
|
"{op}({expr})".format(op=op, expr=self._expr), |
160
|
|
|
self.inputs, |
161
|
|
|
) |
162
|
|
|
else: |
163
|
|
|
return NumExprFactor("{op}x_0".format(op=op), (self,)) |
164
|
|
|
|
165
|
|
|
unary_operator.__doc__ = "Unary Operator: '%s'" % op |
166
|
|
|
return unary_operator |
167
|
|
|
|
168
|
|
|
|
169
|
|
|
def function_application(func): |
170
|
|
|
""" |
171
|
|
|
Factory function for producing function application methods for Factor |
172
|
|
|
subclasses. |
173
|
|
|
""" |
174
|
|
|
if func not in NUMEXPR_MATH_FUNCS: |
175
|
|
|
raise ValueError("Unsupported mathematical function '%s'" % func) |
176
|
|
|
|
177
|
|
|
def mathfunc(self): |
178
|
|
|
if isinstance(self, NumericalExpression): |
179
|
|
|
return NumExprFactor( |
180
|
|
|
"{func}({expr})".format(func=func, expr=self._expr), |
181
|
|
|
self.inputs, |
182
|
|
|
) |
183
|
|
|
else: |
184
|
|
|
return NumExprFactor("{func}(x_0)".format(func=func), (self,)) |
185
|
|
|
return mathfunc |
186
|
|
|
|
187
|
|
|
|
188
|
|
|
class Factor(CompositeTerm): |
189
|
|
|
""" |
190
|
|
|
Pipeline API expression producing numerically-valued outputs. |
191
|
|
|
""" |
192
|
|
|
dtype = float64 |
193
|
|
|
|
194
|
|
|
# Dynamically add functions for creating NumExprFactor/NumExprFilter |
195
|
|
|
# instances. |
196
|
|
|
clsdict = locals() |
197
|
|
|
clsdict.update( |
198
|
|
|
{ |
199
|
|
|
method_name_for_op(op): binary_operator(op) |
200
|
|
|
# Don't override __eq__ because it breaks comparisons on tuples of |
201
|
|
|
# Factors. |
202
|
|
|
for op in MATH_BINOPS.union(COMPARISONS - {'=='}) |
203
|
|
|
} |
204
|
|
|
) |
205
|
|
|
clsdict.update( |
206
|
|
|
{ |
207
|
|
|
method_name_for_op(op, commute=True): reflected_binary_operator(op) |
208
|
|
|
for op in MATH_BINOPS |
209
|
|
|
} |
210
|
|
|
) |
211
|
|
|
clsdict.update( |
212
|
|
|
{ |
213
|
|
|
'__neg__': unary_operator(op) |
214
|
|
|
for op in UNARY_OPS |
215
|
|
|
} |
216
|
|
|
) |
217
|
|
|
clsdict.update( |
218
|
|
|
{ |
219
|
|
|
funcname: function_application(funcname) |
220
|
|
|
for funcname in NUMEXPR_MATH_FUNCS |
221
|
|
|
} |
222
|
|
|
) |
223
|
|
|
|
224
|
|
|
__truediv__ = clsdict['__div__'] |
225
|
|
|
__rtruediv__ = clsdict['__rdiv__'] |
226
|
|
|
|
227
|
|
|
eq = binary_operator('==') |
228
|
|
|
|
229
|
|
|
def rank(self, method='ordinal', ascending=True, mask=NotSpecified): |
230
|
|
|
""" |
231
|
|
|
Construct a new Factor representing the sorted rank of each column |
232
|
|
|
within each row. |
233
|
|
|
|
234
|
|
|
Parameters |
235
|
|
|
---------- |
236
|
|
|
method : str, {'ordinal', 'min', 'max', 'dense', 'average'} |
237
|
|
|
The method used to assign ranks to tied elements. See |
238
|
|
|
`scipy.stats.rankdata` for a full description of the semantics for |
239
|
|
|
each ranking method. Default is 'ordinal'. |
240
|
|
|
ascending : bool, optional |
241
|
|
|
Whether to return sorted rank in ascending or descending order. |
242
|
|
|
Default is True. |
243
|
|
|
mask : zipline.pipeline.Filter, optional |
244
|
|
|
A Filter representing assets to consider when computing ranks. |
245
|
|
|
If mask is supplied, ranks are computed ignoring any asset/date |
246
|
|
|
pairs for which `mask` produces a value of False. |
247
|
|
|
|
248
|
|
|
Returns |
249
|
|
|
------- |
250
|
|
|
ranks : zipline.pipeline.factors.Rank |
251
|
|
|
A new factor that will compute the ranking of the data produced by |
252
|
|
|
`self`. |
253
|
|
|
|
254
|
|
|
Notes |
255
|
|
|
----- |
256
|
|
|
The default value for `method` is different from the default for |
257
|
|
|
`scipy.stats.rankdata`. See that function's documentation for a full |
258
|
|
|
description of the valid inputs to `method`. |
259
|
|
|
|
260
|
|
|
Missing or non-existent data on a given day will cause an asset to be |
261
|
|
|
given a rank of NaN for that day. |
262
|
|
|
|
263
|
|
|
See Also |
264
|
|
|
-------- |
265
|
|
|
scipy.stats.rankdata |
266
|
|
|
zipline.lib.rank |
267
|
|
|
zipline.pipeline.factors.Rank |
268
|
|
|
""" |
269
|
|
|
return Rank(self if ascending else -self, method=method, mask=mask) |
270
|
|
|
|
271
|
|
|
def top(self, N, mask=NotSpecified): |
272
|
|
|
""" |
273
|
|
|
Construct a Filter matching the top N asset values of self each day. |
274
|
|
|
|
275
|
|
|
Parameters |
276
|
|
|
---------- |
277
|
|
|
N : int |
278
|
|
|
Number of assets passing the returned filter each day. |
279
|
|
|
mask : zipline.pipeline.Filter, optional |
280
|
|
|
A Filter representing assets to consider when computing ranks. |
281
|
|
|
If mask is supplied, top values are computed ignoring any |
282
|
|
|
asset/date pairs for which `mask` produces a value of False. |
283
|
|
|
|
284
|
|
|
Returns |
285
|
|
|
------- |
286
|
|
|
filter : zipline.pipeline.filters.Filter |
287
|
|
|
""" |
288
|
|
|
return self.rank(ascending=False, mask=mask) <= N |
289
|
|
|
|
290
|
|
|
def bottom(self, N, mask=NotSpecified): |
291
|
|
|
""" |
292
|
|
|
Construct a Filter matching the bottom N asset values of self each day. |
293
|
|
|
|
294
|
|
|
Parameters |
295
|
|
|
---------- |
296
|
|
|
N : int |
297
|
|
|
Number of assets passing the returned filter each day. |
298
|
|
|
mask : zipline.pipeline.Filter, optional |
299
|
|
|
A Filter representing assets to consider when computing ranks. |
300
|
|
|
If mask is supplied, bottom values are computed ignoring any |
301
|
|
|
asset/date pairs for which `mask` produces a value of False. |
302
|
|
|
|
303
|
|
|
Returns |
304
|
|
|
------- |
305
|
|
|
filter : zipline.pipeline.Filter |
306
|
|
|
""" |
307
|
|
|
return self.rank(ascending=True, mask=mask) <= N |
308
|
|
|
|
309
|
|
|
def percentile_between(self, |
310
|
|
|
min_percentile, |
311
|
|
|
max_percentile, |
312
|
|
|
mask=NotSpecified): |
313
|
|
|
""" |
314
|
|
|
Construct a new Filter representing entries from the output of this |
315
|
|
|
Factor that fall within the percentile range defined by min_percentile |
316
|
|
|
and max_percentile. |
317
|
|
|
|
318
|
|
|
Parameters |
319
|
|
|
---------- |
320
|
|
|
min_percentile : float [0.0, 100.0] |
321
|
|
|
Return True for assets falling above this percentile in the data. |
322
|
|
|
max_percentile : float [0.0, 100.0] |
323
|
|
|
Return True for assets falling below this percentile in the data. |
324
|
|
|
mask : zipline.pipeline.Filter, optional |
325
|
|
|
A Filter representing assets to consider when percentile |
326
|
|
|
thresholds. If mask is supplied, percentile cutoffs are computed |
327
|
|
|
each day using only assets for which `mask` returns True, and |
328
|
|
|
assets not passing `mask` will produce False in the output of this |
329
|
|
|
filter as well. |
330
|
|
|
|
331
|
|
|
Returns |
332
|
|
|
------- |
333
|
|
|
out : zipline.pipeline.filters.PercentileFilter |
334
|
|
|
A new filter that will compute the specified percentile-range mask. |
335
|
|
|
|
336
|
|
|
See Also |
337
|
|
|
-------- |
338
|
|
|
zipline.pipeline.filters.PercentileFilter |
339
|
|
|
""" |
340
|
|
|
return PercentileFilter( |
341
|
|
|
self, |
342
|
|
|
min_percentile=min_percentile, |
343
|
|
|
max_percentile=max_percentile, |
344
|
|
|
mask=mask, |
345
|
|
|
) |
346
|
|
|
|
347
|
|
|
def isnan(self): |
348
|
|
|
""" |
349
|
|
|
A Filter producing True for all values where this Factor is NaN. |
350
|
|
|
""" |
351
|
|
|
return self != self |
352
|
|
|
|
353
|
|
|
def notnan(self): |
354
|
|
|
""" |
355
|
|
|
A Filter producing True for values where this Factor is not NaN. |
356
|
|
|
|
357
|
|
|
Returns |
358
|
|
|
------- |
359
|
|
|
nanfilter : zipline.pipeline.filters.Filter |
360
|
|
|
""" |
361
|
|
|
return ~self.isnan() |
362
|
|
|
|
363
|
|
|
def isfinite(self): |
364
|
|
|
""" |
365
|
|
|
A Filter producing True for values where this Factor is anything but |
366
|
|
|
NaN, inf, or -inf. |
367
|
|
|
""" |
368
|
|
|
return (-inf < self) & (self < inf) |
369
|
|
|
|
370
|
|
|
|
371
|
|
|
class NumExprFactor(NumericalExpression, Factor): |
372
|
|
|
""" |
373
|
|
|
Factor computed from a numexpr expression. |
374
|
|
|
|
375
|
|
|
Parameters |
376
|
|
|
---------- |
377
|
|
|
expr : string |
378
|
|
|
A string suitable for passing to numexpr. All variables in 'expr' |
379
|
|
|
should be of the form "x_i", where i is the index of the corresponding |
380
|
|
|
factor input in 'binds'. |
381
|
|
|
binds : tuple |
382
|
|
|
A tuple of factors to use as inputs. |
383
|
|
|
|
384
|
|
|
Notes |
385
|
|
|
----- |
386
|
|
|
NumExprFactors are constructed by numerical operators like `+` and `-`. |
387
|
|
|
Users should rarely need to construct a NumExprFactor directly. |
388
|
|
|
""" |
389
|
|
|
pass |
390
|
|
|
|
391
|
|
|
|
392
|
|
|
class Rank(SingleInputMixin, Factor): |
393
|
|
|
""" |
394
|
|
|
A Factor representing the row-wise rank data of another Factor. |
395
|
|
|
|
396
|
|
|
Parameters |
397
|
|
|
---------- |
398
|
|
|
factor : zipline.pipeline.factors.Factor |
399
|
|
|
The factor on which to compute ranks. |
400
|
|
|
method : str, {'average', 'min', 'max', 'dense', 'ordinal'} |
401
|
|
|
The method used to assign ranks to tied elements. See |
402
|
|
|
`scipy.stats.rankdata` for a full description of the semantics for each |
403
|
|
|
ranking method. |
404
|
|
|
|
405
|
|
|
See Also |
406
|
|
|
-------- |
407
|
|
|
scipy.stats.rankdata : Underlying ranking algorithm. |
408
|
|
|
zipline.factors.Factor.rank : Method-style interface to same functionality. |
409
|
|
|
|
410
|
|
|
Notes |
411
|
|
|
----- |
412
|
|
|
Most users should call Factor.rank rather than directly construct an |
413
|
|
|
instance of this class. |
414
|
|
|
""" |
415
|
|
|
window_length = 0 |
416
|
|
|
dtype = float64 |
417
|
|
|
|
418
|
|
|
def __new__(cls, factor, method, mask): |
419
|
|
|
return super(Rank, cls).__new__( |
420
|
|
|
cls, |
421
|
|
|
inputs=(factor,), |
422
|
|
|
method=method, |
423
|
|
|
mask=mask, |
424
|
|
|
) |
425
|
|
|
|
426
|
|
|
def _init(self, method, *args, **kwargs): |
427
|
|
|
self._method = method |
428
|
|
|
return super(Rank, self)._init(*args, **kwargs) |
429
|
|
|
|
430
|
|
|
@classmethod |
431
|
|
|
def static_identity(cls, method, *args, **kwargs): |
432
|
|
|
return ( |
433
|
|
|
super(Rank, cls).static_identity(*args, **kwargs), |
434
|
|
|
method, |
435
|
|
|
) |
436
|
|
|
|
437
|
|
|
def _validate(self): |
438
|
|
|
""" |
439
|
|
|
Verify that the stored rank method is valid. |
440
|
|
|
""" |
441
|
|
|
if self._method not in _RANK_METHODS: |
442
|
|
|
raise UnknownRankMethod( |
443
|
|
|
method=self._method, |
444
|
|
|
choices=set(_RANK_METHODS), |
445
|
|
|
) |
446
|
|
|
return super(Rank, self)._validate() |
447
|
|
|
|
448
|
|
|
def _compute(self, arrays, dates, assets, mask): |
449
|
|
|
""" |
450
|
|
|
For each row in the input, compute a like-shaped array of per-row |
451
|
|
|
ranks. |
452
|
|
|
""" |
453
|
|
|
inv_mask = ~mask |
454
|
|
|
data = arrays[0].copy() |
455
|
|
|
data[inv_mask] = nan |
456
|
|
|
# OPTIMIZATION: Fast path the default case with our own specialized |
457
|
|
|
# Cython implementation. |
458
|
|
|
if self._method == 'ordinal': |
459
|
|
|
result = rankdata_2d_ordinal(data) |
460
|
|
|
else: |
461
|
|
|
# FUTURE OPTIMIZATION: |
462
|
|
|
# Write a less general "apply to rows" method that doesn't do all |
463
|
|
|
# the extra work that apply_along_axis does. |
464
|
|
|
result = apply_along_axis(rankdata, 1, data, method=self._method) |
465
|
|
|
|
466
|
|
|
# rankdata will sort nan values into last place, but we want our |
467
|
|
|
# nans to propagate, so explicitly re-apply. |
468
|
|
|
result[inv_mask] = nan |
469
|
|
|
return result |
470
|
|
|
|
471
|
|
|
def __repr__(self): |
472
|
|
|
return "{type}({input_}, method='{method}', mask={mask})".format( |
473
|
|
|
type=type(self).__name__, |
474
|
|
|
input_=self.inputs[0], |
475
|
|
|
method=self._method, |
476
|
|
|
mask=self.mask, |
477
|
|
|
) |
478
|
|
|
|
479
|
|
|
|
480
|
|
|
class CustomFactor(RequiredWindowLengthMixin, CustomTermMixin, Factor): |
481
|
|
|
''' |
482
|
|
|
Base class for user-defined Factors. |
483
|
|
|
|
484
|
|
|
Parameters |
485
|
|
|
---------- |
486
|
|
|
inputs : iterable, optional |
487
|
|
|
An iterable of `BoundColumn` instances (e.g. USEquityPricing.close), |
488
|
|
|
describing the data to load and pass to `self.compute`. If this |
489
|
|
|
argument is passed to the CustomFactor constructor, we look for a |
490
|
|
|
class-level attribute named `inputs`. |
491
|
|
|
window_length : int, optional |
492
|
|
|
Number of rows of rows to pass for each input. If this |
493
|
|
|
argument is not passed to the CustomFactor constructor, we look for a |
494
|
|
|
class-level attribute named `window_length`. |
495
|
|
|
|
496
|
|
|
Notes |
497
|
|
|
----- |
498
|
|
|
Users implementing their own Factors should subclass CustomFactor and |
499
|
|
|
implement a method named `compute` with the following signature: |
500
|
|
|
|
501
|
|
|
.. code-block:: python |
502
|
|
|
|
503
|
|
|
def compute(self, today, assets, out, *inputs): |
504
|
|
|
... |
505
|
|
|
|
506
|
|
|
On each simulation date, ``compute`` will be called with the current date, |
507
|
|
|
an array of sids, an output array, and an input array for each expression |
508
|
|
|
passed as inputs to the CustomFactor constructor. |
509
|
|
|
|
510
|
|
|
The specific types of the values passed to `compute` are as follows:: |
511
|
|
|
|
512
|
|
|
today : np.datetime64[ns] |
513
|
|
|
Row label for the last row of all arrays passed as `inputs`. |
514
|
|
|
assets : np.array[int64, ndim=1] |
515
|
|
|
Column labels for `out` and`inputs`. |
516
|
|
|
out : np.array[float64, ndim=1] |
517
|
|
|
Output array of the same shape as `assets`. `compute` should write |
518
|
|
|
its desired return values into `out`. |
519
|
|
|
*inputs : tuple of np.array |
520
|
|
|
Raw data arrays corresponding to the values of `self.inputs`. |
521
|
|
|
|
522
|
|
|
``compute`` functions should expect to be passed NaN values for dates on |
523
|
|
|
which no data was available for an asset. This may include dates on which |
524
|
|
|
an asset did not yet exist. |
525
|
|
|
|
526
|
|
|
For example, if a CustomFactor requires 10 rows of close price data, and |
527
|
|
|
asset A started trading on Monday June 2nd, 2014, then on Tuesday, June |
528
|
|
|
3rd, 2014, the column of input data for asset A will have 9 leading NaNs |
529
|
|
|
for the preceding days on which data was not yet available. |
530
|
|
|
|
531
|
|
|
Examples |
532
|
|
|
-------- |
533
|
|
|
|
534
|
|
|
A CustomFactor with pre-declared defaults: |
535
|
|
|
|
536
|
|
|
.. code-block:: python |
537
|
|
|
|
538
|
|
|
class TenDayRange(CustomFactor): |
539
|
|
|
""" |
540
|
|
|
Computes the difference between the highest high in the last 10 |
541
|
|
|
days and the lowest low. |
542
|
|
|
|
543
|
|
|
Pre-declares high and low as default inputs and `window_length` as |
544
|
|
|
10. |
545
|
|
|
""" |
546
|
|
|
|
547
|
|
|
inputs = [USEquityPricing.high, USEquityPricing.low] |
548
|
|
|
window_length = 10 |
549
|
|
|
|
550
|
|
|
def compute(self, today, assets, out, highs, lows): |
551
|
|
|
from numpy import nanmin, nanmax |
552
|
|
|
|
553
|
|
|
highest_highs = nanmax(highs, axis=0) |
554
|
|
|
lowest_lows = nanmin(lows, axis=0) |
555
|
|
|
out[:] = highest_highs - lowest_lows |
556
|
|
|
|
557
|
|
|
|
558
|
|
|
# Doesn't require passing inputs or window_length because they're |
559
|
|
|
# pre-declared as defaults for the TenDayRange class. |
560
|
|
|
ten_day_range = TenDayRange() |
561
|
|
|
|
562
|
|
|
A CustomFactor without defaults: |
563
|
|
|
|
564
|
|
|
.. code-block:: python |
565
|
|
|
|
566
|
|
|
class MedianValue(CustomFactor): |
567
|
|
|
""" |
568
|
|
|
Computes the median value of an arbitrary single input over an |
569
|
|
|
arbitrary window.. |
570
|
|
|
|
571
|
|
|
Does not declare any defaults, so values for `window_length` and |
572
|
|
|
`inputs` must be passed explicitly on every construction. |
573
|
|
|
""" |
574
|
|
|
|
575
|
|
|
def compute(self, today, assets, out, data): |
576
|
|
|
from numpy import nanmedian |
577
|
|
|
out[:] = data.nanmedian(data, axis=0) |
578
|
|
|
|
579
|
|
|
# Values for `inputs` and `window_length` must be passed explicitly to |
580
|
|
|
# MedianValue. |
581
|
|
|
median_close10 = MedianValue([USEquityPricing.close], window_length=10) |
582
|
|
|
median_low15 = MedianValue([USEquityPricing.low], window_length=15) |
583
|
|
|
''' |
584
|
|
|
ctx = nullctx() |
585
|
|
|
|
586
|
|
|
def _validate(self): |
587
|
|
|
if self.dtype != float64: |
588
|
|
|
raise UnsupportedDataType(dtype=self.dtype) |
589
|
|
|
return super(CustomFactor, self)._validate() |
590
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.