1
|
|
|
""" |
2
|
|
|
Tests for Factor terms. |
3
|
|
|
""" |
4
|
|
|
from itertools import product |
5
|
|
|
from nose_parameterized import parameterized |
6
|
|
|
|
7
|
|
|
from numpy import ( |
8
|
|
|
arange, |
9
|
|
|
array, |
10
|
|
|
datetime64, |
11
|
|
|
empty, |
12
|
|
|
eye, |
13
|
|
|
nan, |
14
|
|
|
ones, |
15
|
|
|
) |
16
|
|
|
from numpy.random import randn, seed |
17
|
|
|
|
18
|
|
|
from zipline.errors import UnknownRankMethod |
19
|
|
|
from zipline.lib.rank import masked_rankdata_2d |
20
|
|
|
from zipline.pipeline import Factor, Filter, TermGraph |
21
|
|
|
from zipline.pipeline.factors import RSI, Returns |
22
|
|
|
from zipline.utils.test_utils import check_allclose, check_arrays |
23
|
|
|
from zipline.utils.numpy_utils import datetime64ns_dtype, float64_dtype, np_NaT |
24
|
|
|
|
25
|
|
|
from .base import BasePipelineTestCase |
26
|
|
|
|
27
|
|
|
|
28
|
|
|
class F(Factor): |
29
|
|
|
dtype = float64_dtype |
30
|
|
|
inputs = () |
31
|
|
|
window_length = 0 |
32
|
|
|
|
33
|
|
|
|
34
|
|
|
class Mask(Filter): |
35
|
|
|
inputs = () |
36
|
|
|
window_length = 0 |
37
|
|
|
|
38
|
|
|
|
39
|
|
|
for_each_factor_dtype = parameterized.expand([ |
40
|
|
|
('datetime64[ns]', datetime64ns_dtype), |
41
|
|
|
('float', float64_dtype), |
42
|
|
|
]) |
43
|
|
|
|
44
|
|
|
|
45
|
|
|
class FactorTestCase(BasePipelineTestCase): |
46
|
|
|
|
47
|
|
|
def setUp(self): |
48
|
|
|
super(FactorTestCase, self).setUp() |
49
|
|
|
self.f = F() |
50
|
|
|
|
51
|
|
|
def test_bad_input(self): |
52
|
|
|
with self.assertRaises(UnknownRankMethod): |
53
|
|
|
self.f.rank("not a real rank method") |
54
|
|
|
|
55
|
|
|
@for_each_factor_dtype |
56
|
|
|
def test_rank_ascending(self, name, factor_dtype): |
57
|
|
|
|
58
|
|
|
f = F(dtype=factor_dtype) |
59
|
|
|
|
60
|
|
|
# Generated with: |
61
|
|
|
# data = arange(25).reshape(5, 5).transpose() % 4 |
62
|
|
|
data = array([[0, 1, 2, 3, 0], |
63
|
|
|
[1, 2, 3, 0, 1], |
64
|
|
|
[2, 3, 0, 1, 2], |
65
|
|
|
[3, 0, 1, 2, 3], |
66
|
|
|
[0, 1, 2, 3, 0]], dtype=factor_dtype) |
67
|
|
|
|
68
|
|
|
expected_ranks = { |
69
|
|
|
'ordinal': array([[1., 3., 4., 5., 2.], |
70
|
|
|
[2., 4., 5., 1., 3.], |
71
|
|
|
[3., 5., 1., 2., 4.], |
72
|
|
|
[4., 1., 2., 3., 5.], |
73
|
|
|
[1., 3., 4., 5., 2.]]), |
74
|
|
|
'average': array([[1.5, 3., 4., 5., 1.5], |
75
|
|
|
[2.5, 4., 5., 1., 2.5], |
76
|
|
|
[3.5, 5., 1., 2., 3.5], |
77
|
|
|
[4.5, 1., 2., 3., 4.5], |
78
|
|
|
[1.5, 3., 4., 5., 1.5]]), |
79
|
|
|
'min': array([[1., 3., 4., 5., 1.], |
80
|
|
|
[2., 4., 5., 1., 2.], |
81
|
|
|
[3., 5., 1., 2., 3.], |
82
|
|
|
[4., 1., 2., 3., 4.], |
83
|
|
|
[1., 3., 4., 5., 1.]]), |
84
|
|
|
'max': array([[2., 3., 4., 5., 2.], |
85
|
|
|
[3., 4., 5., 1., 3.], |
86
|
|
|
[4., 5., 1., 2., 4.], |
87
|
|
|
[5., 1., 2., 3., 5.], |
88
|
|
|
[2., 3., 4., 5., 2.]]), |
89
|
|
|
'dense': array([[1., 2., 3., 4., 1.], |
90
|
|
|
[2., 3., 4., 1., 2.], |
91
|
|
|
[3., 4., 1., 2., 3.], |
92
|
|
|
[4., 1., 2., 3., 4.], |
93
|
|
|
[1., 2., 3., 4., 1.]]), |
94
|
|
|
} |
95
|
|
|
|
96
|
|
|
def check(terms): |
97
|
|
|
graph = TermGraph(terms) |
98
|
|
|
results = self.run_graph( |
99
|
|
|
graph, |
100
|
|
|
initial_workspace={f: data}, |
101
|
|
|
mask=self.build_mask(ones((5, 5))), |
102
|
|
|
) |
103
|
|
|
for method in terms: |
104
|
|
|
check_arrays(results[method], expected_ranks[method]) |
105
|
|
|
|
106
|
|
|
check({meth: f.rank(method=meth) for meth in expected_ranks}) |
107
|
|
|
check({ |
108
|
|
|
meth: f.rank(method=meth, ascending=True) |
109
|
|
|
for meth in expected_ranks |
110
|
|
|
}) |
111
|
|
|
# Not passing a method should default to ordinal. |
112
|
|
|
check({'ordinal': f.rank()}) |
113
|
|
|
check({'ordinal': f.rank(ascending=True)}) |
114
|
|
|
|
115
|
|
|
@for_each_factor_dtype |
116
|
|
|
def test_rank_descending(self, name, factor_dtype): |
117
|
|
|
|
118
|
|
|
f = F(dtype=factor_dtype) |
119
|
|
|
|
120
|
|
|
# Generated with: |
121
|
|
|
# data = arange(25).reshape(5, 5).transpose() % 4 |
122
|
|
|
data = array([[0, 1, 2, 3, 0], |
123
|
|
|
[1, 2, 3, 0, 1], |
124
|
|
|
[2, 3, 0, 1, 2], |
125
|
|
|
[3, 0, 1, 2, 3], |
126
|
|
|
[0, 1, 2, 3, 0]], dtype=factor_dtype) |
127
|
|
|
expected_ranks = { |
128
|
|
|
'ordinal': array([[4., 3., 2., 1., 5.], |
129
|
|
|
[3., 2., 1., 5., 4.], |
130
|
|
|
[2., 1., 5., 4., 3.], |
131
|
|
|
[1., 5., 4., 3., 2.], |
132
|
|
|
[4., 3., 2., 1., 5.]]), |
133
|
|
|
'average': array([[4.5, 3., 2., 1., 4.5], |
134
|
|
|
[3.5, 2., 1., 5., 3.5], |
135
|
|
|
[2.5, 1., 5., 4., 2.5], |
136
|
|
|
[1.5, 5., 4., 3., 1.5], |
137
|
|
|
[4.5, 3., 2., 1., 4.5]]), |
138
|
|
|
'min': array([[4., 3., 2., 1., 4.], |
139
|
|
|
[3., 2., 1., 5., 3.], |
140
|
|
|
[2., 1., 5., 4., 2.], |
141
|
|
|
[1., 5., 4., 3., 1.], |
142
|
|
|
[4., 3., 2., 1., 4.]]), |
143
|
|
|
'max': array([[5., 3., 2., 1., 5.], |
144
|
|
|
[4., 2., 1., 5., 4.], |
145
|
|
|
[3., 1., 5., 4., 3.], |
146
|
|
|
[2., 5., 4., 3., 2.], |
147
|
|
|
[5., 3., 2., 1., 5.]]), |
148
|
|
|
'dense': array([[4., 3., 2., 1., 4.], |
149
|
|
|
[3., 2., 1., 4., 3.], |
150
|
|
|
[2., 1., 4., 3., 2.], |
151
|
|
|
[1., 4., 3., 2., 1.], |
152
|
|
|
[4., 3., 2., 1., 4.]]), |
153
|
|
|
} |
154
|
|
|
|
155
|
|
|
def check(terms): |
156
|
|
|
graph = TermGraph(terms) |
157
|
|
|
results = self.run_graph( |
158
|
|
|
graph, |
159
|
|
|
initial_workspace={f: data}, |
160
|
|
|
mask=self.build_mask(ones((5, 5))), |
161
|
|
|
) |
162
|
|
|
for method in terms: |
163
|
|
|
check_arrays(results[method], expected_ranks[method]) |
164
|
|
|
|
165
|
|
|
check({ |
166
|
|
|
meth: f.rank(method=meth, ascending=False) |
167
|
|
|
for meth in expected_ranks |
168
|
|
|
}) |
169
|
|
|
# Not passing a method should default to ordinal. |
170
|
|
|
check({'ordinal': f.rank(ascending=False)}) |
171
|
|
|
|
172
|
|
|
@for_each_factor_dtype |
173
|
|
|
def test_rank_after_mask(self, name, factor_dtype): |
174
|
|
|
|
175
|
|
|
f = F(dtype=factor_dtype) |
176
|
|
|
# data = arange(25).reshape(5, 5).transpose() % 4 |
177
|
|
|
data = array([[0, 1, 2, 3, 0], |
178
|
|
|
[1, 2, 3, 0, 1], |
179
|
|
|
[2, 3, 0, 1, 2], |
180
|
|
|
[3, 0, 1, 2, 3], |
181
|
|
|
[0, 1, 2, 3, 0]], dtype=factor_dtype) |
182
|
|
|
mask_data = ~eye(5, dtype=bool) |
183
|
|
|
initial_workspace = {f: data, Mask(): mask_data} |
184
|
|
|
|
185
|
|
|
graph = TermGraph( |
186
|
|
|
{ |
187
|
|
|
"ascending_nomask": f.rank(ascending=True), |
188
|
|
|
"ascending_mask": f.rank(ascending=True, mask=Mask()), |
189
|
|
|
"descending_nomask": f.rank(ascending=False), |
190
|
|
|
"descending_mask": f.rank(ascending=False, mask=Mask()), |
191
|
|
|
} |
192
|
|
|
) |
193
|
|
|
|
194
|
|
|
expected = { |
195
|
|
|
"ascending_nomask": array([[1., 3., 4., 5., 2.], |
196
|
|
|
[2., 4., 5., 1., 3.], |
197
|
|
|
[3., 5., 1., 2., 4.], |
198
|
|
|
[4., 1., 2., 3., 5.], |
199
|
|
|
[1., 3., 4., 5., 2.]]), |
200
|
|
|
"descending_nomask": array([[4., 3., 2., 1., 5.], |
201
|
|
|
[3., 2., 1., 5., 4.], |
202
|
|
|
[2., 1., 5., 4., 3.], |
203
|
|
|
[1., 5., 4., 3., 2.], |
204
|
|
|
[4., 3., 2., 1., 5.]]), |
205
|
|
|
# Diagonal should be all nans, and anything whose rank was less |
206
|
|
|
# than the diagonal in the unmasked calc should go down by 1. |
207
|
|
|
"ascending_mask": array([[nan, 2., 3., 4., 1.], |
208
|
|
|
[2., nan, 4., 1., 3.], |
209
|
|
|
[2., 4., nan, 1., 3.], |
210
|
|
|
[3., 1., 2., nan, 4.], |
211
|
|
|
[1., 2., 3., 4., nan]]), |
212
|
|
|
"descending_mask": array([[nan, 3., 2., 1., 4.], |
213
|
|
|
[2., nan, 1., 4., 3.], |
214
|
|
|
[2., 1., nan, 4., 3.], |
215
|
|
|
[1., 4., 3., nan, 2.], |
216
|
|
|
[4., 3., 2., 1., nan]]), |
217
|
|
|
} |
218
|
|
|
|
219
|
|
|
results = self.run_graph( |
220
|
|
|
graph, |
221
|
|
|
initial_workspace, |
222
|
|
|
mask=self.build_mask(ones((5, 5))), |
223
|
|
|
) |
224
|
|
|
for method in results: |
225
|
|
|
check_arrays(expected[method], results[method]) |
226
|
|
|
|
227
|
|
|
@parameterized.expand([ |
228
|
|
|
# Test cases computed by doing: |
229
|
|
|
# from numpy.random import seed, randn |
230
|
|
|
# from talib import RSI |
231
|
|
|
# seed(seed_value) |
232
|
|
|
# data = abs(randn(15, 3)) |
233
|
|
|
# expected = [RSI(data[:, i])[-1] for i in range(3)] |
234
|
|
|
(100, array([41.032913785966, 51.553585468393, 51.022005016446])), |
235
|
|
|
(101, array([43.506969935466, 46.145367530182, 50.57407044197])), |
236
|
|
|
(102, array([46.610102205934, 47.646892444315, 52.13182788538])), |
237
|
|
|
]) |
238
|
|
|
def test_rsi(self, seed_value, expected): |
239
|
|
|
|
240
|
|
|
rsi = RSI() |
241
|
|
|
|
242
|
|
|
today = datetime64(1, 'ns') |
243
|
|
|
assets = arange(3) |
244
|
|
|
out = empty((3,), dtype=float) |
245
|
|
|
|
246
|
|
|
seed(seed_value) # Seed so we get deterministic results. |
247
|
|
|
test_data = abs(randn(15, 3)) |
248
|
|
|
|
249
|
|
|
out = empty((3,), dtype=float) |
250
|
|
|
rsi.compute(today, assets, out, test_data) |
251
|
|
|
|
252
|
|
|
check_allclose(expected, out) |
253
|
|
|
|
254
|
|
|
@parameterized.expand([ |
255
|
|
|
(100, 15), |
256
|
|
|
(101, 4), |
257
|
|
|
(102, 100), |
258
|
|
|
]) |
259
|
|
|
def test_returns(self, seed_value, window_length): |
260
|
|
|
|
261
|
|
|
returns = Returns(window_length=window_length) |
262
|
|
|
|
263
|
|
|
today = datetime64(1, 'ns') |
264
|
|
|
assets = arange(3) |
265
|
|
|
out = empty((3,), dtype=float) |
266
|
|
|
|
267
|
|
|
seed(seed_value) # Seed so we get deterministic results. |
268
|
|
|
test_data = abs(randn(window_length, 3)) |
269
|
|
|
|
270
|
|
|
# Calculate the expected returns |
271
|
|
|
expected = (test_data[-1] - test_data[0]) / test_data[0] |
272
|
|
|
|
273
|
|
|
out = empty((3,), dtype=float) |
274
|
|
|
returns.compute(today, assets, out, test_data) |
275
|
|
|
|
276
|
|
|
check_allclose(expected, out) |
277
|
|
|
|
278
|
|
|
def gen_ranking_cases(): |
279
|
|
|
seeds = range(int(1e4), int(1e5), int(1e4)) |
280
|
|
|
methods = ('ordinal', 'average') |
281
|
|
|
use_mask_values = (True, False) |
282
|
|
|
set_missing_values = (True, False) |
283
|
|
|
ascending_values = (True, False) |
284
|
|
|
return product( |
285
|
|
|
seeds, |
286
|
|
|
methods, |
287
|
|
|
use_mask_values, |
288
|
|
|
set_missing_values, |
289
|
|
|
ascending_values, |
290
|
|
|
) |
291
|
|
|
|
292
|
|
|
@parameterized.expand(gen_ranking_cases()) |
293
|
|
|
def test_masked_rankdata_2d(self, |
294
|
|
|
seed_value, |
295
|
|
|
method, |
296
|
|
|
use_mask, |
297
|
|
|
set_missing, |
298
|
|
|
ascending): |
299
|
|
|
eyemask = ~eye(5, dtype=bool) |
300
|
|
|
nomask = ones((5, 5), dtype=bool) |
301
|
|
|
|
302
|
|
|
seed(seed_value) |
303
|
|
|
asfloat = (randn(5, 5) * seed_value) |
304
|
|
|
asdatetime = (asfloat).copy().view('datetime64[ns]') |
305
|
|
|
|
306
|
|
|
mask = eyemask if use_mask else nomask |
307
|
|
|
if set_missing: |
308
|
|
|
asfloat[:, 2] = nan |
309
|
|
|
asdatetime[:, 2] = np_NaT |
310
|
|
|
|
311
|
|
|
float_result = masked_rankdata_2d( |
312
|
|
|
data=asfloat, |
313
|
|
|
mask=mask, |
314
|
|
|
missing_value=nan, |
315
|
|
|
method=method, |
316
|
|
|
ascending=True, |
317
|
|
|
) |
318
|
|
|
datetime_result = masked_rankdata_2d( |
319
|
|
|
data=asdatetime, |
320
|
|
|
mask=mask, |
321
|
|
|
missing_value=np_NaT, |
322
|
|
|
method=method, |
323
|
|
|
ascending=True, |
324
|
|
|
) |
325
|
|
|
|
326
|
|
|
check_arrays(float_result, datetime_result) |
327
|
|
|
|