zipline.pipeline.CustomTermMixin - Code Metrics - Inspection of "Interactive conveniences" - quantopian/zipline - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#836)

unknown

created 2015-11-21 02:12 UTC

zipline.pipeline.CustomTermMixin A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	53
Duplicated Lines	0 %

Metric	Value
dl	0
loc	53
rs	10
wmc	8

5 Methods

Rating	Name	Size	Complexity
A	__new__()	6	1
A	_compute()	20	4
A	__init__()	4	1
A	compute()	5	1
A	short_repr()	2	1

"""
Base class for Filters, Factors and Classifiers
"""
from abc import ABCMeta, abstractproperty
from weakref import WeakValueDictionary

from numpy import bool_, full, nan
from six import with_metaclass

from zipline.errors import (
    DTypeNotSpecified,
    InputTermNotAtomic,
    TermInputsNotSpecified,
    WindowLengthNotPositive,
    WindowLengthNotSpecified,
)
from zipline.utils.memoize import lazyval
from zipline.utils.sentinel import sentinel


NotSpecified = sentinel(
    'NotSpecified',
    'Singleton sentinel value used for Term defaults.',
)


class Term(with_metaclass(ABCMeta, object)):
    """
    Base class for terms in a Pipeline API compute graph.
    """
    # These are NotSpecified because a subclass is required to provide them.
    dtype = NotSpecified
    domain = NotSpecified

    _term_cache = WeakValueDictionary()

    def __new__(cls,
                domain=NotSpecified,
                dtype=NotSpecified,
                *args,
                **kwargs):
        """
        Memoized constructor for Terms.

        Caching previously-constructed Terms is useful because it allows us to
        only compute equivalent sub-expressions once when traversing a Pipeline
        dependency graph.

        Caching previously-constructed Terms is **sane** because terms and
        their inputs are both conceptually immutable.
        """
        # Class-level attributes can be used to provide defaults for Term
        # subclasses.

        if domain is NotSpecified:
            domain = cls.domain

        if dtype is NotSpecified:
            dtype = cls.dtype

        identity = cls.static_identity(
            domain=domain,
            dtype=dtype,
            *args, **kwargs
        )

        try:
            return cls._term_cache[identity]
        except KeyError:
            new_instance = cls._term_cache[identity] = \
                super(Term, cls).__new__(cls)._init(
                    domain=domain,
                    dtype=dtype,
                    *args, **kwargs
                )
            return new_instance

    def __init__(self, *args, **kwargs):
        """
        Noop constructor to play nicely with our caching __new__.  Subclasses
        should implement _init instead of this method.

        When a class' __new__ returns an instance of that class, Python will
        automatically call __init__ on the object, even if a new object wasn't
        actually constructed.  Because we memoize instances, we often return an
        object that was already initialized from __new__, in which case we
        don't want to call __init__ again.

        Subclasses that need to initialize new instances should override _init,
        which is guaranteed to be called only once.
        """
        pass

    def _init(self, domain, dtype):
        self.domain = domain
        self.dtype = dtype

        self._validate()
        return self

    @classmethod
    def static_identity(cls, domain, dtype):
        """
        Return the identity of the Term that would be constructed from the
        given arguments.

        Identities that compare equal will cause us to return a cached instance
        rather than constructing a new one.  We do this primarily because it
        makes dependency resolution easier.

        This is a classmethod so that it can be called from Term.__new__ to
        determine whether to produce a new instance.
        """
        return (cls, domain, dtype)

    def _validate(self):
        """
        Assert that this term is well-formed.  This should be called exactly
        once, at the end of Term._init().
        """
        if self.dtype is NotSpecified:
            raise DTypeNotSpecified(termname=type(self).__name__)

    @abstractproperty
    def inputs(self):
        """
        A tuple of other Terms that this Term requires for computation.
        """
        raise NotImplementedError()

    @abstractproperty
    def mask(self):
        """
        A 2D Filter representing asset/date pairs to include while
        computing this Term. (True means include; False means exclude.)
        """
        raise NotImplementedError()

    @lazyval
    def dependencies(self):
        return self.inputs + (self.mask,)

    @lazyval
    def atomic(self):
        return not any(dep for dep in self.dependencies
                       if dep is not AssetExists())


class AssetExists(Term):
    """
    Pseudo-filter describing whether or not an asset existed on a given day.
    This is the default mask for all terms that haven't been passed a mask
    explicitly.

    This is morally a Filter, in the sense that it produces a boolean value for
    every asset on every date.  We don't subclass Filter, however, because
    `AssetExists` is computed directly by the PipelineEngine.

    See Also
    --------
    zipline.assets.AssetFinder.lifetimes
    """
    dtype = bool_
    dataset = None
    extra_input_rows = 0
    inputs = ()
    dependencies = ()
    mask = None

    def __repr__(self):
        return "AssetExists()"


# TODO: Move mixins to a separate file?
class SingleInputMixin(object):

    def _validate(self):
        num_inputs = len(self.inputs)
        if num_inputs != 1:
            raise ValueError(
                "{typename} expects only one input, "
                "but received {num_inputs} instead.".format(
                    typename=type(self).__name__,
                    num_inputs=num_inputs
                )
            )
        return super(SingleInputMixin, self)._validate()


class RequiredWindowLengthMixin(object):
    def _validate(self):
        if not self.windowed:
            raise WindowLengthNotPositive(window_length=self.window_length)
        return super(RequiredWindowLengthMixin, self)._validate()


class CustomTermMixin(object):
    """
    Mixin for user-defined rolling-window Terms.

    Implements `_compute` in terms of a user-defined `compute` function, which
    is mapped over the input windows.

    Used by CustomFactor, CustomFilter, CustomClassifier, etc.
    """

    def __new__(cls, inputs=NotSpecified, window_length=NotSpecified):

        return super(CustomTermMixin, cls).__new__(
            cls,
            inputs=inputs,
            window_length=window_length,
        )

    def __init__(self, inputs=NotSpecified, window_length=NotSpecified):
        return super(CustomTermMixin, self).__init__(
            inputs=inputs,
            window_length=window_length,
        )

    def compute(self, today, assets, out, *arrays):
        """
        Override this method with a function that writes a value into `out`.
        """
        raise NotImplementedError()

    def _compute(self, windows, dates, assets, mask):
        """
        Call the user's `compute` function on each window with a pre-built
        output array.
        """
        # TODO: Make mask available to user's `compute`.
        compute = self.compute
        out = full(mask.shape, nan, dtype=self.dtype)
        with self.ctx:
            # TODO: Consider pre-filtering columns that are all-nan at each
            # time-step?
            for idx, date in enumerate(dates):
                compute(
                    date,
                    assets,
                    out[idx],
                    *(next(w) for w in windows)
                )
        out[~mask] = nan
        return out

    def short_repr(self):
        return type(self).__name__ + '(%d)' % self.window_length


class CompositeTerm(Term):
    inputs = NotSpecified
    window_length = NotSpecified
    mask = NotSpecified

    def __new__(cls, inputs=NotSpecified, window_length=NotSpecified,
                mask=NotSpecified, *args, **kwargs):

        if inputs is NotSpecified:
            inputs = cls.inputs
        # Having inputs = NotSpecified is an error, but we handle it later
        # in self._validate rather than here.
        if inputs is not NotSpecified:
            # Allow users to specify lists as class-level defaults, but
            # normalize to a tuple so that inputs is hashable.
            inputs = tuple(inputs)

        if mask is NotSpecified:
            mask = cls.mask
        if mask is NotSpecified:
            mask = AssetExists()

        if window_length is NotSpecified:
            window_length = cls.window_length

        return super(CompositeTerm, cls).__new__(cls, inputs=inputs, mask=mask,
                                                 window_length=window_length,
                                                 *args, **kwargs)

    def _init(self, inputs, window_length, mask, *args, **kwargs):
        self.inputs = inputs
        self.window_length = window_length
        self.mask = mask
        return super(CompositeTerm, self)._init(*args, **kwargs)

    @classmethod
    def static_identity(cls, inputs, window_length, mask, *args, **kwargs):
        return (
            super(CompositeTerm, cls).static_identity(*args, **kwargs),
            inputs,
            window_length,
            mask,
        )

    def _validate(self):
        """
        Assert that this term is well-formed.  This should be called exactly
        once, at the end of Term._init().
        """
        if self.inputs is NotSpecified:
            raise TermInputsNotSpecified(termname=type(self).__name__)
        if self.window_length is NotSpecified:
            raise WindowLengthNotSpecified(termname=type(self).__name__)
        if self.mask is NotSpecified:
            # This isn't user error, this is a bug in our code.
            raise AssertionError("{term} has no mask".format(term=self))

        if self.window_length:
            for child in self.inputs:
                if not child.atomic:
                    raise InputTermNotAtomic(parent=self, child=child)

        return super(CompositeTerm, self)._validate()

    def _compute(self, inputs, dates, assets, mask):
        """
        Subclasses should implement this to perform actual computation.
        This is `_compute` rather than just `compute` because `compute` is
        reserved for user-supplied functions in CustomFactor.
        """
        raise NotImplementedError()

    @lazyval
    def windowed(self):
        """
        Whether or not this term represents a trailing window computation.

        If term.windowed is truthy, its compute_from_windows method will be
        called with instances of AdjustedArray as inputs.

        If term.windowed is falsey, its compute_from_baseline will be called
        with instances of np.ndarray as inputs.
        """
        return (
            self.window_length is not NotSpecified
            and self.window_length > 0
        )

    @lazyval
    def extra_input_rows(self):
        """
        The number of extra rows needed for each of our inputs to compute this
        term.
        """
        return max(0, self.window_length - 1)

    def __repr__(self):
        return (
            "{type}({inputs}, window_length={window_length})"
        ).format(
            type=type(self).__name__,
            inputs=self.inputs,
            window_length=self.window_length,
        )


1			"""
2			Base class for Filters, Factors and Classifiers
3			"""
4			from abc import ABCMeta, abstractproperty
5			from weakref import WeakValueDictionary
6
7			from numpy import bool_, full, nan
8			from six import with_metaclass
9
10			from zipline.errors import (
11			DTypeNotSpecified,
12			InputTermNotAtomic,
13			TermInputsNotSpecified,
14			WindowLengthNotPositive,
15			WindowLengthNotSpecified,
16			)
17			from zipline.utils.memoize import lazyval
18			from zipline.utils.sentinel import sentinel
19
20
21			NotSpecified = sentinel(
22			'NotSpecified',
23			'Singleton sentinel value used for Term defaults.',
24			)
25
26
27			class Term(with_metaclass(ABCMeta, object)):
28			"""
29			Base class for terms in a Pipeline API compute graph.
30			"""
31			# These are NotSpecified because a subclass is required to provide them.
32			dtype = NotSpecified
33			domain = NotSpecified
34
35			_term_cache = WeakValueDictionary()
36
37			def __new__(cls,
38			domain=NotSpecified,
39			dtype=NotSpecified,
40			*args,
41			**kwargs):
42			"""
43			Memoized constructor for Terms.
44
45			Caching previously-constructed Terms is useful because it allows us to
46			only compute equivalent sub-expressions once when traversing a Pipeline
47			dependency graph.
48
49			Caching previously-constructed Terms is sane because terms and
50			their inputs are both conceptually immutable.
51			"""
52			# Class-level attributes can be used to provide defaults for Term
53			# subclasses.
54
55			if domain is NotSpecified:
56			domain = cls.domain
57
58			if dtype is NotSpecified:
59			dtype = cls.dtype
60
61			identity = cls.static_identity(
62			domain=domain,
63			dtype=dtype,
64			args, *kwargs
65			)
66
67			try:
68			return cls._term_cache[identity]
69			except KeyError:
70			new_instance = cls._term_cache[identity] = \
71			super(Term, cls).__new__(cls)._init(
72			domain=domain,
73			dtype=dtype,
74			args, *kwargs
75			)
76			return new_instance
77
78			def __init__(self, args, *kwargs):
79			"""
80			Noop constructor to play nicely with our caching __new__. Subclasses
81			should implement _init instead of this method.
82
83			When a class' __new__ returns an instance of that class, Python will
84			automatically call __init__ on the object, even if a new object wasn't
85			actually constructed. Because we memoize instances, we often return an
86			object that was already initialized from __new__, in which case we
87			don't want to call __init__ again.
88
89			Subclasses that need to initialize new instances should override _init,
90			which is guaranteed to be called only once.
91			"""
92			pass
93
94			def _init(self, domain, dtype):
95			self.domain = domain
96			self.dtype = dtype
97
98			self._validate()
99			return self
100
101			@classmethod
102			def static_identity(cls, domain, dtype):
103			"""
104			Return the identity of the Term that would be constructed from the
105			given arguments.
106
107			Identities that compare equal will cause us to return a cached instance
108			rather than constructing a new one. We do this primarily because it
109			makes dependency resolution easier.
110
111			This is a classmethod so that it can be called from Term.__new__ to
112			determine whether to produce a new instance.
113			"""
114			return (cls, domain, dtype)
115
116			def _validate(self):
117			"""
118			Assert that this term is well-formed. This should be called exactly
119			once, at the end of Term._init().
120			"""
121			if self.dtype is NotSpecified:
122			raise DTypeNotSpecified(termname=type(self).__name__)
123
124			@abstractproperty
125			def inputs(self):
126			"""
127			A tuple of other Terms that this Term requires for computation.
128			"""
129			raise NotImplementedError()
130
131			@abstractproperty
132			def mask(self):
133			"""
134			A 2D Filter representing asset/date pairs to include while
135			computing this Term. (True means include; False means exclude.)
136			"""
137			raise NotImplementedError()
138
139			@lazyval
140			def dependencies(self):
141			return self.inputs + (self.mask,)
142
143			@lazyval
144			def atomic(self):
145			return not any(dep for dep in self.dependencies
146			if dep is not AssetExists())
147
148
149			class AssetExists(Term):
150			"""
151			Pseudo-filter describing whether or not an asset existed on a given day.
152			This is the default mask for all terms that haven't been passed a mask
153			explicitly.
154
155			This is morally a Filter, in the sense that it produces a boolean value for
156			every asset on every date. We don't subclass Filter, however, because
157			`AssetExists` is computed directly by the PipelineEngine.
158
159			See Also
160			--------
161			zipline.assets.AssetFinder.lifetimes
162			"""
163			dtype = bool_
164			dataset = None
165			extra_input_rows = 0
166			inputs = ()
167			dependencies = ()
168			mask = None
169
170			def __repr__(self):
171			return "AssetExists()"
172
173
174			# TODO: Move mixins to a separate file?
175			class SingleInputMixin(object):
176
177			def _validate(self):
178			num_inputs = len(self.inputs)
179			if num_inputs != 1:
180			raise ValueError(
181			"{typename} expects only one input, "
182			"but received {num_inputs} instead.".format(
183			typename=type(self).__name__,
184			num_inputs=num_inputs
185			)
186			)
187			return super(SingleInputMixin, self)._validate()
188
189
190			class RequiredWindowLengthMixin(object):
191			def _validate(self):
192			if not self.windowed:
193			raise WindowLengthNotPositive(window_length=self.window_length)
194			return super(RequiredWindowLengthMixin, self)._validate()
195
196
197			class CustomTermMixin(object):
198			"""
199			Mixin for user-defined rolling-window Terms.
200
201			Implements `_compute` in terms of a user-defined `compute` function, which
202			is mapped over the input windows.
203
204			Used by CustomFactor, CustomFilter, CustomClassifier, etc.
205			"""
206
207			def __new__(cls, inputs=NotSpecified, window_length=NotSpecified):
208
209			return super(CustomTermMixin, cls).__new__(
210			cls,
211			inputs=inputs,
212			window_length=window_length,
213			)
214
215			def __init__(self, inputs=NotSpecified, window_length=NotSpecified):
216			return super(CustomTermMixin, self).__init__(
217			inputs=inputs,
218			window_length=window_length,
219			)
220
221			def compute(self, today, assets, out, *arrays):
222			"""
223			Override this method with a function that writes a value into `out`.
224			"""
225			raise NotImplementedError()
226
227			def _compute(self, windows, dates, assets, mask):
228			"""
229			Call the user's `compute` function on each window with a pre-built
230			output array.
231			"""
232			# TODO: Make mask available to user's `compute`.
233			compute = self.compute
234			out = full(mask.shape, nan, dtype=self.dtype)
235			with self.ctx:
236			# TODO: Consider pre-filtering columns that are all-nan at each
237			# time-step?
238			for idx, date in enumerate(dates):
239			compute(
240			date,
241			assets,
242			out[idx],
243			*(next(w) for w in windows)
244			)
245			out[~mask] = nan
246			return out
247
248			def short_repr(self):
249			return type(self).__name__ + '(%d)' % self.window_length
250
251
252			class CompositeTerm(Term):
253			inputs = NotSpecified
254			window_length = NotSpecified
255			mask = NotSpecified
256
257			def __new__(cls, inputs=NotSpecified, window_length=NotSpecified,
258			mask=NotSpecified, args, *kwargs):
259
260			if inputs is NotSpecified:
261			inputs = cls.inputs
262			# Having inputs = NotSpecified is an error, but we handle it later
263			# in self._validate rather than here.
264			if inputs is not NotSpecified:
265			# Allow users to specify lists as class-level defaults, but
266			# normalize to a tuple so that inputs is hashable.
267			inputs = tuple(inputs)
268
269			if mask is NotSpecified:
270			mask = cls.mask
271			if mask is NotSpecified:
272			mask = AssetExists()
273
274			if window_length is NotSpecified:
275			window_length = cls.window_length
276
277			return super(CompositeTerm, cls).__new__(cls, inputs=inputs, mask=mask,
278			window_length=window_length,
279			args, *kwargs)
280
281			def _init(self, inputs, window_length, mask, args, *kwargs):
282			self.inputs = inputs
283			self.window_length = window_length
284			self.mask = mask
285			return super(CompositeTerm, self)._init(args, *kwargs)
286
287			@classmethod
288			def static_identity(cls, inputs, window_length, mask, args, *kwargs):
289			return (
290			super(CompositeTerm, cls).static_identity(args, *kwargs),
291			inputs,
292			window_length,
293			mask,
294			)
295
296			def _validate(self):
297			"""
298			Assert that this term is well-formed. This should be called exactly
299			once, at the end of Term._init().
300			"""
301			if self.inputs is NotSpecified:
302			raise TermInputsNotSpecified(termname=type(self).__name__)
303			if self.window_length is NotSpecified:
304			raise WindowLengthNotSpecified(termname=type(self).__name__)
305			if self.mask is NotSpecified:
306			# This isn't user error, this is a bug in our code.
307			raise AssertionError("{term} has no mask".format(term=self))
308
309			if self.window_length:
310			for child in self.inputs:
311			if not child.atomic:
312			raise InputTermNotAtomic(parent=self, child=child)
313
314			return super(CompositeTerm, self)._validate()
315
316			def _compute(self, inputs, dates, assets, mask):
317			"""
318			Subclasses should implement this to perform actual computation.
319			This is `_compute` rather than just `compute` because `compute` is
320			reserved for user-supplied functions in CustomFactor.
321			"""
322			raise NotImplementedError()
323
324			@lazyval
325			def windowed(self):
326			"""
327			Whether or not this term represents a trailing window computation.
328
329			If term.windowed is truthy, its compute_from_windows method will be
330			called with instances of AdjustedArray as inputs.
331
332			If term.windowed is falsey, its compute_from_baseline will be called
333			with instances of np.ndarray as inputs.
334			"""
335			return (
336			self.window_length is not NotSpecified
337			and self.window_length > 0
338			)
339
340			@lazyval
341			def extra_input_rows(self):
342			"""
343			The number of extra rows needed for each of our inputs to compute this
344			term.
345			"""
346			return max(0, self.window_length - 1)
347
348			def __repr__(self):
349			return (
350			"{type}({inputs}, window_length={window_length})"
351			).format(
352			type=type(self).__name__,
353			inputs=self.inputs,
354			window_length=self.window_length,
355			)
356

quantopian / zipline

Pull Request — master (#836)

zipline.pipeline.CustomTermMixin A

Complexity

Size/Duplication

5 Methods

Duplication Side-by-Side

Filter issues like