zipline.pipeline.factors.function_application() - Code Metrics - Inspection of "Adds support for different typed adjusted arrays a..." - quantopian/zipline - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#905)

unknown

created 2015-12-11 18:55 UTC

zipline.pipeline.factors.function_application() B

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
cc	4
dl	0
loc	23
rs	8.7972

1 Method

Rating	Name	Duplication	Size	Complexity
A	zipline.pipeline.factors.mathfunc()	0	13	2

"""
factor.py
"""
from operator import attrgetter
from numbers import Number

from numpy import float64, inf
from toolz import curry

from zipline.errors import (
    UnknownRankMethod,
    UnsupportedDataType,
)
from zipline.lib.rank import masked_rankdata_2d
from zipline.pipeline.term import (
    CustomTermMixin,
    NotSpecified,
    RequiredWindowLengthMixin,
    SingleInputMixin,
    CompositeTerm,
)
from zipline.pipeline.expression import (
    BadBinaryOperator,
    COMPARISONS,
    is_comparison,
    MATH_BINOPS,
    method_name_for_op,
    NumericalExpression,
    NUMEXPR_MATH_FUNCS,
    UNARY_OPS,
    unary_op_name,
)
from zipline.pipeline.filters import (
    NumExprFilter,
    PercentileFilter,
)
from zipline.utils.control_flow import nullctx
from zipline.utils.numpy_utils import (
    bool_dtype,
    datetime64ns_dtype,
    float64_dtype,
)
from zipline.utils.preprocess import preprocess


_RANK_METHODS = frozenset(['average', 'min', 'max', 'dense', 'ordinal'])


def numbers_to_float64(func, argname, argvalue):
    """
    Preprocessor for converting numerical inputs into floats.

    This is used in the binary operator constructors for Factor so that
    `2 + Factor()` has the same behavior as `2.0 + Factor()`.
    """
    if isinstance(argvalue, Number):
        return float64(argvalue)
    return argvalue


@curry
def set_attribute(name, value):
    """
    Decorator factory for setting attributes on a function.

    Doesn't change the behavior of the wrapped function.

    Usage
    -----
    >>> @set_attribute('__name__', 'foo')
    ... def bar():
    ...     return 3
    ...
    >>> bar()
    3
    >>> bar.__name__
    'foo'
    """
    def decorator(f):
        setattr(f, name, value)
        return f
    return decorator


# Decorators for setting the __name__ and __doc__ properties of a decorated
# function.
# Example:
with_name = set_attribute('__name__')
with_doc = set_attribute('__doc__')


def binop_return_type(op):
    if is_comparison(op):
        return NumExprFilter
    else:
        return NumExprFactor


def binop_return_dtype(op, left, right):
    """
    Compute the expected return dtype for the given binary operator.

    Parameters
    ----------
    op : str
        Operator symbol, (e.g. '+', '-', ...).
    left : numpy.dtype
        Dtype of left hand side.
    right : numpy.dtype
        Dtype of right hand side.

    Returns
    -------
    outdtype : numpy.dtype
        The dtype of the result of `left <op> right`.
    """
    if is_comparison(op):
        if left != right:
            raise TypeError(
                "Don't know how to compute {left} {op} {right}.\n"
                "Comparisons are only supported between Factors of equal "
                "dtypes.".format(left=left, op=op, right=right)
            )
        return bool_dtype

    elif left != float64_dtype or right != float64_dtype:
        raise TypeError(
            "Don't know how to compute {left} {op} {right}.\n"
            "Arithmetic operators are only supported on Factors of "
            "dtype 'float64'.".format(
                left=left.name,
                op=op,
                right=right.name,
            )
        )
    return float64_dtype


def binary_operator(op):
    """
    Factory function for making binary operator methods on a Factor subclass.

    Returns a function, "binary_operator" suitable for implementing functions
    like __add__.
    """
    # When combining a Factor with a NumericalExpression, we use this
    # attrgetter instance to defer to the commuted implementation of the
    # NumericalExpression operator.
    commuted_method_getter = attrgetter(method_name_for_op(op, commute=True))

    @preprocess(other=numbers_to_float64)
    @with_doc("Binary Operator: '%s'" % op)
    @with_name(method_name_for_op(op))
    def binary_operator(self, other):
        # This can't be hoisted up a scope because the types returned by
        # binop_return_type aren't defined when the top-level function is
        # invoked in the class body of Factor.
        return_type = binop_return_type(op)
        if isinstance(self, NumExprFactor):
            self_expr, other_expr, new_inputs = self.build_binary_op(
                op, other,
            )
            return return_type(
                "({left}) {op} ({right})".format(
                    left=self_expr,
                    op=op,
                    right=other_expr,
                ),
                new_inputs,
                dtype=binop_return_dtype(op, self.dtype, other.dtype),
            )
        elif isinstance(other, NumExprFactor):
            # NumericalExpression overrides ops to correctly handle merging of
            # inputs.  Look up and call the appropriate reflected operator with
            # ourself as the input.
            return commuted_method_getter(other)(self)
        elif isinstance(other, Factor):
            if self is other:
                return return_type(
                    "x_0 {op} x_0".format(op=op),
                    (self,),
                    dtype=binop_return_dtype(op, self.dtype, other.dtype),
                )
            return return_type(
                "x_0 {op} x_1".format(op=op),
                (self, other),
                dtype=binop_return_dtype(op, self.dtype, other.dtype),
            )
        elif isinstance(other, Number):
            return return_type(
                "x_0 {op} ({constant})".format(op=op, constant=other),
                binds=(self,),
                # Interpret numeric literals as floats.
                dtype=binop_return_dtype(op, self.dtype, other.dtype)
            )
        raise BadBinaryOperator(op, self, other)

    return binary_operator


def reflected_binary_operator(op):
    """
    Factory function for making binary operator methods on a Factor.

    Returns a function, "reflected_binary_operator" suitable for implementing
    functions like __radd__.
    """
    assert not is_comparison(op)

    @preprocess(other=numbers_to_float64)
    @with_name(method_name_for_op(op, commute=True))
    def reflected_binary_operator(self, other):

        if isinstance(self, NumericalExpression):
            self_expr, other_expr, new_inputs = self.build_binary_op(
                op, other
            )
            return NumExprFactor(
                "({left}) {op} ({right})".format(
                    left=other_expr,
                    right=self_expr,
                    op=op,
                ),
                new_inputs,
                dtype=binop_return_dtype(op, other.dtype, self.dtype)
            )

        # Only have to handle the numeric case because in all other valid cases
        # the corresponding left-binding method will be called.
        elif isinstance(other, Number):
            return NumExprFactor(
                "{constant} {op} x_0".format(op=op, constant=other),
                binds=(self,),
                dtype=binop_return_dtype(op, other.dtype, self.dtype),
            )
        raise BadBinaryOperator(op, other, self)
    return reflected_binary_operator


def unary_operator(op):
    """
    Factory function for making unary operator methods for Factors.
    """
    # Only negate is currently supported.
    valid_ops = {'-'}
    if op not in valid_ops:
        raise ValueError("Invalid unary operator %s." % op)

    @with_doc("Unary Operator: '%s'" % op)
    @with_name(unary_op_name(op))
    def unary_operator(self):
        if self.dtype != float64_dtype:
            raise TypeError(
                "Can't apply unary operator {op!r} to instance of "
                "{typename!r} with dtype {dtypename!r}.\n"
                "{op!r} is only supported for Factors of dtype "
                "'float64'.".format(
                    op=op,
                    typename=type(self).__name__,
                    dtypename=self.dtype.name,
                )
            )

        # This can't be hoisted up a scope because the types returned by
        # unary_op_return_type aren't defined when the top-level function is
        # invoked.
        if isinstance(self, NumericalExpression):
            return NumExprFactor(
                "{op}({expr})".format(op=op, expr=self._expr),
                self.inputs,
                dtype=float64_dtype,
            )
        else:
            return NumExprFactor(
                "{op}x_0".format(op=op),
                (self,),
                dtype=float64_dtype,
            )
    return unary_operator


def function_application(func):
    """
    Factory function for producing function application methods for Factor
    subclasses.
    """
    if func not in NUMEXPR_MATH_FUNCS:
        raise ValueError("Unsupported mathematical function '%s'" % func)

    @with_name(func)
    def mathfunc(self):
        if isinstance(self, NumericalExpression):
            return NumExprFactor(
                "{func}({expr})".format(func=func, expr=self._expr),
                self.inputs,
                dtype=float64_dtype,
            )
        else:
            return NumExprFactor(
                "{func}(x_0)".format(func=func),
                (self,),
                dtype=float64_dtype,
            )
    return mathfunc


FACTOR_DTYPES = frozenset([datetime64ns_dtype, float64_dtype])


class Factor(CompositeTerm):
    """
    Pipeline API expression producing numerically-valued outputs.
    """
    # Dynamically add functions for creating NumExprFactor/NumExprFilter
    # instances.
    clsdict = locals()
    clsdict.update(
        {
            method_name_for_op(op): binary_operator(op)
            # Don't override __eq__ because it breaks comparisons on tuples of
            # Factors.
            for op in MATH_BINOPS.union(COMPARISONS - {'=='})
        }
    )
    clsdict.update(
        {
            method_name_for_op(op, commute=True): reflected_binary_operator(op)
            for op in MATH_BINOPS
        }
    )
    clsdict.update(
        {
            unary_op_name(op): unary_operator(op)
            for op in UNARY_OPS
        }
    )

    clsdict.update(
        {
            funcname: function_application(funcname)
            for funcname in NUMEXPR_MATH_FUNCS
        }
    )

    __truediv__ = clsdict['__div__']
    __rtruediv__ = clsdict['__rdiv__']

    eq = binary_operator('==')

    def _validate(self):
        # Do superclass validation first so that `NotSpecified` dtypes get
        # handled.
        retval = super(Factor, self)._validate()
        if self.dtype not in FACTOR_DTYPES:
            raise UnsupportedDataType(
                typename=type(self).__name__,
                dtype=self.dtype
            )
        return retval

    def rank(self, method='ordinal', ascending=True, mask=NotSpecified):
        """
        Construct a new Factor representing the sorted rank of each column
        within each row.

        Parameters
        ----------
        method : str, {'ordinal', 'min', 'max', 'dense', 'average'}
            The method used to assign ranks to tied elements. See
            `scipy.stats.rankdata` for a full description of the semantics for
            each ranking method. Default is 'ordinal'.
        ascending : bool, optional
            Whether to return sorted rank in ascending or descending order.
            Default is True.
        mask : zipline.pipeline.Filter, optional
            A Filter representing assets to consider when computing ranks.
            If mask is supplied, ranks are computed ignoring any asset/date
            pairs for which `mask` produces a value of False.

        Returns
        -------
        ranks : zipline.pipeline.factors.Rank
            A new factor that will compute the ranking of the data produced by
            `self`.

        Notes
        -----
        The default value for `method` is different from the default for
        `scipy.stats.rankdata`.  See that function's documentation for a full
        description of the valid inputs to `method`.

        Missing or non-existent data on a given day will cause an asset to be
        given a rank of NaN for that day.

        See Also
        --------
        scipy.stats.rankdata
        zipline.lib.rank
        zipline.pipeline.factors.Rank
        """
        return Rank(self, method=method, ascending=ascending, mask=mask)

    def top(self, N, mask=NotSpecified):
        """
        Construct a Filter matching the top N asset values of self each day.

        Parameters
        ----------
        N : int
            Number of assets passing the returned filter each day.
        mask : zipline.pipeline.Filter, optional
            A Filter representing assets to consider when computing ranks.
            If mask is supplied, top values are computed ignoring any
            asset/date pairs for which `mask` produces a value of False.

        Returns
        -------
        filter : zipline.pipeline.filters.Filter
        """
        return self.rank(ascending=False, mask=mask) <= N

    def bottom(self, N, mask=NotSpecified):
        """
        Construct a Filter matching the bottom N asset values of self each day.

        Parameters
        ----------
        N : int
            Number of assets passing the returned filter each day.
        mask : zipline.pipeline.Filter, optional
            A Filter representing assets to consider when computing ranks.
            If mask is supplied, bottom values are computed ignoring any
            asset/date pairs for which `mask` produces a value of False.

        Returns
        -------
        filter : zipline.pipeline.Filter
        """
        return self.rank(ascending=True, mask=mask) <= N

    def percentile_between(self,
                           min_percentile,
                           max_percentile,
                           mask=NotSpecified):
        """
        Construct a new Filter representing entries from the output of this
        Factor that fall within the percentile range defined by min_percentile
        and max_percentile.

        Parameters
        ----------
        min_percentile : float [0.0, 100.0]
            Return True for assets falling above this percentile in the data.
        max_percentile : float [0.0, 100.0]
            Return True for assets falling below this percentile in the data.
        mask : zipline.pipeline.Filter, optional
            A Filter representing assets to consider when percentile
            thresholds.  If mask is supplied, percentile cutoffs are computed
            each day using only assets for which `mask` returns True, and
            assets not passing `mask` will produce False in the output of this
            filter as well.

        Returns
        -------
        out : zipline.pipeline.filters.PercentileFilter
            A new filter that will compute the specified percentile-range mask.

        See Also
        --------
        zipline.pipeline.filters.PercentileFilter
        """
        return PercentileFilter(
            self,
            min_percentile=min_percentile,
            max_percentile=max_percentile,
            mask=mask,
        )

    def isnan(self):
        """
        A Filter producing True for all values where this Factor is NaN.

        Returns
        -------
        nanfilter : zipline.pipeline.filters.Filter
        """
        return self != self

    def notnan(self):
        """
        A Filter producing True for values where this Factor is not NaN.

        Returns
        -------
        nanfilter : zipline.pipeline.filters.Filter
        """
        return ~self.isnan()

    def isfinite(self):
        """
        A Filter producing True for values where this Factor is anything but
        NaN, inf, or -inf.
        """
        return (-inf < self) & (self < inf)


class NumExprFactor(NumericalExpression, Factor):
    """
    Factor computed from a numexpr expression.

    Parameters
    ----------
    expr : string
       A string suitable for passing to numexpr.  All variables in 'expr'
       should be of the form "x_i", where i is the index of the corresponding
       factor input in 'binds'.
    binds : tuple
       A tuple of factors to use as inputs.

    Notes
    -----
    NumExprFactors are constructed by numerical operators like `+` and `-`.
    Users should rarely need to construct a NumExprFactor directly.
    """
    pass


class Rank(SingleInputMixin, Factor):
    """
    A Factor representing the row-wise rank data of another Factor.

    Parameters
    ----------
    factor : zipline.pipeline.factors.Factor
        The factor on which to compute ranks.
    method : str, {'average', 'min', 'max', 'dense', 'ordinal'}
        The method used to assign ranks to tied elements.  See
        `scipy.stats.rankdata` for a full description of the semantics for each
        ranking method.

    See Also
    --------
    scipy.stats.rankdata : Underlying ranking algorithm.
    zipline.factors.Factor.rank : Method-style interface to same functionality.

    Notes
    -----
    Most users should call Factor.rank rather than directly construct an
    instance of this class.
    """
    window_length = 0
    dtype = float64_dtype

    def __new__(cls, factor, method, ascending, mask):
        return super(Rank, cls).__new__(
            cls,
            inputs=(factor,),
            method=method,
            ascending=ascending,
            mask=mask,
        )

    def _init(self, method, ascending, *args, **kwargs):
        self._method = method
        self._ascending = ascending
        return super(Rank, self)._init(*args, **kwargs)

    @classmethod
    def static_identity(cls, method, ascending, *args, **kwargs):
        return (
            super(Rank, cls).static_identity(*args, **kwargs),
            method,
            ascending,
        )

    def _validate(self):
        """
        Verify that the stored rank method is valid.
        """
        if self._method not in _RANK_METHODS:
            raise UnknownRankMethod(
                method=self._method,
                choices=set(_RANK_METHODS),
            )
        return super(Rank, self)._validate()

    def _compute(self, arrays, dates, assets, mask):
        """
        For each row in the input, compute a like-shaped array of per-row
        ranks.
        """
        return masked_rankdata_2d(
            arrays[0],
            mask,
            self.inputs[0].missing_value,
            self._method,
            self._ascending,
        )

    def __repr__(self):
        return "{type}({input_}, method='{method}', mask={mask})".format(
            type=type(self).__name__,
            input_=self.inputs[0],
            method=self._method,
            mask=self.mask,
        )


class CustomFactor(RequiredWindowLengthMixin, CustomTermMixin, Factor):
    '''
    Base class for user-defined Factors.

    Parameters
    ----------
    inputs : iterable, optional
        An iterable of `BoundColumn` instances (e.g. USEquityPricing.close),
        describing the data to load and pass to `self.compute`.  If this
        argument is passed to the CustomFactor constructor, we look for a
        class-level attribute named `inputs`.
    window_length : int, optional
        Number of rows of rows to pass for each input.  If this
        argument is not passed to the CustomFactor constructor, we look for a
        class-level attribute named `window_length`.

    Notes
    -----
    Users implementing their own Factors should subclass CustomFactor and
    implement a method named `compute` with the following signature:

    .. code-block:: python

        def compute(self, today, assets, out, *inputs):
           ...

    On each simulation date, ``compute`` will be called with the current date,
    an array of sids, an output array, and an input array for each expression
    passed as inputs to the CustomFactor constructor.

    The specific types of the values passed to `compute` are as follows::

        today : np.datetime64[ns]
            Row label for the last row of all arrays passed as `inputs`.
        assets : np.array[int64, ndim=1]
            Column labels for `out` and`inputs`.
        out : np.array[self.dtype, ndim=1]
            Output array of the same shape as `assets`.  `compute` should write
            its desired return values into `out`.
        *inputs : tuple of np.array
            Raw data arrays corresponding to the values of `self.inputs`.

    ``compute`` functions should expect to be passed NaN values for dates on
    which no data was available for an asset.  This may include dates on which
    an asset did not yet exist.

    For example, if a CustomFactor requires 10 rows of close price data, and
    asset A started trading on Monday June 2nd, 2014, then on Tuesday, June
    3rd, 2014, the column of input data for asset A will have 9 leading NaNs
    for the preceding days on which data was not yet available.

    Examples
    --------

    A CustomFactor with pre-declared defaults:

    .. code-block:: python

        class TenDayRange(CustomFactor):
            """
            Computes the difference between the highest high in the last 10
            days and the lowest low.

            Pre-declares high and low as default inputs and `window_length` as
            10.
            """

            inputs = [USEquityPricing.high, USEquityPricing.low]
            window_length = 10

            def compute(self, today, assets, out, highs, lows):
                from numpy import nanmin, nanmax

                highest_highs = nanmax(highs, axis=0)
                lowest_lows = nanmin(lows, axis=0)
                out[:] = highest_highs - lowest_lows


        # Doesn't require passing inputs or window_length because they're
        # pre-declared as defaults for the TenDayRange class.
        ten_day_range = TenDayRange()

    A CustomFactor without defaults:

    .. code-block:: python

        class MedianValue(CustomFactor):
            """
            Computes the median value of an arbitrary single input over an
            arbitrary window..

            Does not declare any defaults, so values for `window_length` and
            `inputs` must be passed explicitly on every construction.
            """

            def compute(self, today, assets, out, data):
                from numpy import nanmedian
                out[:] = data.nanmedian(data, axis=0)

        # Values for `inputs` and `window_length` must be passed explicitly to
        # MedianValue.
        median_close10 = MedianValue([USEquityPricing.close], window_length=10)
        median_low15 = MedianValue([USEquityPricing.low], window_length=15)
    '''
    dtype = float64_dtype
    ctx = nullctx()


1			"""
2			factor.py
3			"""
4			from operator import attrgetter
5			from numbers import Number
6
7			from numpy import float64, inf
8			from toolz import curry
9
10			from zipline.errors import (
11			UnknownRankMethod,
12			UnsupportedDataType,
13			)
14			from zipline.lib.rank import masked_rankdata_2d
15			from zipline.pipeline.term import (
16			CustomTermMixin,
17			NotSpecified,
18			RequiredWindowLengthMixin,
19			SingleInputMixin,
20			CompositeTerm,
21			)
22			from zipline.pipeline.expression import (
23			BadBinaryOperator,
24			COMPARISONS,
25			is_comparison,
26			MATH_BINOPS,
27			method_name_for_op,
28			NumericalExpression,
29			NUMEXPR_MATH_FUNCS,
30			UNARY_OPS,
31			unary_op_name,
32			)
33			from zipline.pipeline.filters import (
34			NumExprFilter,
35			PercentileFilter,
36			)
37			from zipline.utils.control_flow import nullctx
38			from zipline.utils.numpy_utils import (
39			bool_dtype,
40			datetime64ns_dtype,
41			float64_dtype,
42			)
43			from zipline.utils.preprocess import preprocess
44
45
46			_RANK_METHODS = frozenset(['average', 'min', 'max', 'dense', 'ordinal'])
47
48
49			def numbers_to_float64(func, argname, argvalue):
50			"""
51			Preprocessor for converting numerical inputs into floats.
52
53			This is used in the binary operator constructors for Factor so that
54			`2 + Factor()` has the same behavior as `2.0 + Factor()`.
55			"""
56			if isinstance(argvalue, Number):
57			return float64(argvalue)
58			return argvalue
59
60
61			@curry
62			def set_attribute(name, value):
63			"""
64			Decorator factory for setting attributes on a function.
65
66			Doesn't change the behavior of the wrapped function.
67
68			Usage
69			-----
70			>>> @set_attribute('__name__', 'foo')
71			... def bar():
72			... return 3
73			...
74			>>> bar()
75			3
76			>>> bar.__name__
77			'foo'
78			"""
79			def decorator(f):
80			setattr(f, name, value)
81			return f
82			return decorator
83
84
85			# Decorators for setting the __name__ and __doc__ properties of a decorated
86			# function.
87			# Example:
88			with_name = set_attribute('__name__')
89			with_doc = set_attribute('__doc__')
90
91
92			def binop_return_type(op):
93			if is_comparison(op):
94			return NumExprFilter
95			else:
96			return NumExprFactor
97
98
99			def binop_return_dtype(op, left, right):
100			"""
101			Compute the expected return dtype for the given binary operator.
102
103			Parameters
104			----------
105			op : str
106			Operator symbol, (e.g. '+', '-', ...).
107			left : numpy.dtype
108			Dtype of left hand side.
109			right : numpy.dtype
110			Dtype of right hand side.
111
112			Returns
113			-------
114			outdtype : numpy.dtype
115			The dtype of the result of `left <op> right`.
116			"""
117			if is_comparison(op):
118			if left != right:
119			raise TypeError(
120			"Don't know how to compute {left} {op} {right}.\n"
121			"Comparisons are only supported between Factors of equal "
122			"dtypes.".format(left=left, op=op, right=right)
123			)
124			return bool_dtype
125
126			elif left != float64_dtype or right != float64_dtype:
127			raise TypeError(
128			"Don't know how to compute {left} {op} {right}.\n"
129			"Arithmetic operators are only supported on Factors of "
130			"dtype 'float64'.".format(
131			left=left.name,
132			op=op,
133			right=right.name,
134			)
135			)
136			return float64_dtype
137
138
139			def binary_operator(op):
140			"""
141			Factory function for making binary operator methods on a Factor subclass.
142
143			Returns a function, "binary_operator" suitable for implementing functions
144			like __add__.
145			"""
146			# When combining a Factor with a NumericalExpression, we use this
147			# attrgetter instance to defer to the commuted implementation of the
148			# NumericalExpression operator.
149			commuted_method_getter = attrgetter(method_name_for_op(op, commute=True))
150
151			@preprocess(other=numbers_to_float64)
152			@with_doc("Binary Operator: '%s'" % op)
153			@with_name(method_name_for_op(op))
154			def binary_operator(self, other):
155			# This can't be hoisted up a scope because the types returned by
156			# binop_return_type aren't defined when the top-level function is
157			# invoked in the class body of Factor.
158			return_type = binop_return_type(op)
159			if isinstance(self, NumExprFactor):
160			self_expr, other_expr, new_inputs = self.build_binary_op(
161			op, other,
162			)
163			return return_type(
164			"({left}) {op} ({right})".format(
165			left=self_expr,
166			op=op,
167			right=other_expr,
168			),
169			new_inputs,
170			dtype=binop_return_dtype(op, self.dtype, other.dtype),
171			)
172			elif isinstance(other, NumExprFactor):
173			# NumericalExpression overrides ops to correctly handle merging of
174			# inputs. Look up and call the appropriate reflected operator with
175			# ourself as the input.
176			return commuted_method_getter(other)(self)
177			elif isinstance(other, Factor):
178			if self is other:
179			return return_type(
180			"x_0 {op} x_0".format(op=op),
181			(self,),
182			dtype=binop_return_dtype(op, self.dtype, other.dtype),
183			)
184			return return_type(
185			"x_0 {op} x_1".format(op=op),
186			(self, other),
187			dtype=binop_return_dtype(op, self.dtype, other.dtype),
188			)
189			elif isinstance(other, Number):
190			return return_type(
191			"x_0 {op} ({constant})".format(op=op, constant=other),
192			binds=(self,),
193			# Interpret numeric literals as floats.
194			dtype=binop_return_dtype(op, self.dtype, other.dtype)
195			)
196			raise BadBinaryOperator(op, self, other)
197
198			return binary_operator
199
200
201			def reflected_binary_operator(op):
202			"""
203			Factory function for making binary operator methods on a Factor.
204
205			Returns a function, "reflected_binary_operator" suitable for implementing
206			functions like __radd__.
207			"""
208			assert not is_comparison(op)
209
210			@preprocess(other=numbers_to_float64)
211			@with_name(method_name_for_op(op, commute=True))
212			def reflected_binary_operator(self, other):
213
214			if isinstance(self, NumericalExpression):
215			self_expr, other_expr, new_inputs = self.build_binary_op(
216			op, other
217			)
218			return NumExprFactor(
219			"({left}) {op} ({right})".format(
220			left=other_expr,
221			right=self_expr,
222			op=op,
223			),
224			new_inputs,
225			dtype=binop_return_dtype(op, other.dtype, self.dtype)
226			)
227
228			# Only have to handle the numeric case because in all other valid cases
229			# the corresponding left-binding method will be called.
230			elif isinstance(other, Number):
231			return NumExprFactor(
232			"{constant} {op} x_0".format(op=op, constant=other),
233			binds=(self,),
234			dtype=binop_return_dtype(op, other.dtype, self.dtype),
235			)
236			raise BadBinaryOperator(op, other, self)
237			return reflected_binary_operator
238
239
240			def unary_operator(op):
241			"""
242			Factory function for making unary operator methods for Factors.
243			"""
244			# Only negate is currently supported.
245			valid_ops = {'-'}
246			if op not in valid_ops:
247			raise ValueError("Invalid unary operator %s." % op)
248
249			@with_doc("Unary Operator: '%s'" % op)
250			@with_name(unary_op_name(op))
251			def unary_operator(self):
252			if self.dtype != float64_dtype:
253			raise TypeError(
254			"Can't apply unary operator {op!r} to instance of "
255			"{typename!r} with dtype {dtypename!r}.\n"
256			"{op!r} is only supported for Factors of dtype "
257			"'float64'.".format(
258			op=op,
259			typename=type(self).__name__,
260			dtypename=self.dtype.name,
261			)
262			)
263
264			# This can't be hoisted up a scope because the types returned by
265			# unary_op_return_type aren't defined when the top-level function is
266			# invoked.
267			if isinstance(self, NumericalExpression):
268			return NumExprFactor(
269			"{op}({expr})".format(op=op, expr=self._expr),
270			self.inputs,
271			dtype=float64_dtype,
272			)
273			else:
274			return NumExprFactor(
275			"{op}x_0".format(op=op),
276			(self,),
277			dtype=float64_dtype,
278			)
279			return unary_operator
280
281
282			def function_application(func):
283			"""
284			Factory function for producing function application methods for Factor
285			subclasses.
286			"""
287			if func not in NUMEXPR_MATH_FUNCS:
288			raise ValueError("Unsupported mathematical function '%s'" % func)
289
290			@with_name(func)
291			def mathfunc(self):
292			if isinstance(self, NumericalExpression):
293			return NumExprFactor(
294			"{func}({expr})".format(func=func, expr=self._expr),
295			self.inputs,
296			dtype=float64_dtype,
297			)
298			else:
299			return NumExprFactor(
300			"{func}(x_0)".format(func=func),
301			(self,),
302			dtype=float64_dtype,
303			)
304			return mathfunc
305
306
307			FACTOR_DTYPES = frozenset([datetime64ns_dtype, float64_dtype])
308
309
310			class Factor(CompositeTerm):
311			"""
312			Pipeline API expression producing numerically-valued outputs.
313			"""
314			# Dynamically add functions for creating NumExprFactor/NumExprFilter
315			# instances.
316			clsdict = locals()
317			clsdict.update(
318			{
319			method_name_for_op(op): binary_operator(op)
320			# Don't override __eq__ because it breaks comparisons on tuples of
321			# Factors.
322			for op in MATH_BINOPS.union(COMPARISONS - {'=='})
323			}
324			)
325			clsdict.update(
326			{
327			method_name_for_op(op, commute=True): reflected_binary_operator(op)
328			for op in MATH_BINOPS
329			}
330			)
331			clsdict.update(
332			{
333			unary_op_name(op): unary_operator(op)
334			for op in UNARY_OPS
335			}
336			)
337
338			clsdict.update(
339			{
340			funcname: function_application(funcname)
341			for funcname in NUMEXPR_MATH_FUNCS
342			}
343			)
344
345			__truediv__ = clsdict['__div__']
346			__rtruediv__ = clsdict['__rdiv__']
347
348			eq = binary_operator('==')
349
350			def _validate(self):
351			# Do superclass validation first so that `NotSpecified` dtypes get
352			# handled.
353			retval = super(Factor, self)._validate()
354			if self.dtype not in FACTOR_DTYPES:
355			raise UnsupportedDataType(
356			typename=type(self).__name__,
357			dtype=self.dtype
358			)
359			return retval
360
361			def rank(self, method='ordinal', ascending=True, mask=NotSpecified):
362			"""
363			Construct a new Factor representing the sorted rank of each column
364			within each row.
365
366			Parameters
367			----------
368			method : str, {'ordinal', 'min', 'max', 'dense', 'average'}
369			The method used to assign ranks to tied elements. See
370			`scipy.stats.rankdata` for a full description of the semantics for
371			each ranking method. Default is 'ordinal'.
372			ascending : bool, optional
373			Whether to return sorted rank in ascending or descending order.
374			Default is True.
375			mask : zipline.pipeline.Filter, optional
376			A Filter representing assets to consider when computing ranks.
377			If mask is supplied, ranks are computed ignoring any asset/date
378			pairs for which `mask` produces a value of False.
379
380			Returns
381			-------
382			ranks : zipline.pipeline.factors.Rank
383			A new factor that will compute the ranking of the data produced by
384			`self`.
385
386			Notes
387			-----
388			The default value for `method` is different from the default for
389			`scipy.stats.rankdata`. See that function's documentation for a full
390			description of the valid inputs to `method`.
391
392			Missing or non-existent data on a given day will cause an asset to be
393			given a rank of NaN for that day.
394
395			See Also
396			--------
397			scipy.stats.rankdata
398			zipline.lib.rank
399			zipline.pipeline.factors.Rank
400			"""
401			return Rank(self, method=method, ascending=ascending, mask=mask)
402
403			def top(self, N, mask=NotSpecified):
404			"""
405			Construct a Filter matching the top N asset values of self each day.
406
407			Parameters
408			----------
409			N : int
410			Number of assets passing the returned filter each day.
411			mask : zipline.pipeline.Filter, optional
412			A Filter representing assets to consider when computing ranks.
413			If mask is supplied, top values are computed ignoring any
414			asset/date pairs for which `mask` produces a value of False.
415
416			Returns
417			-------
418			filter : zipline.pipeline.filters.Filter
419			"""
420			return self.rank(ascending=False, mask=mask) <= N
421
422			def bottom(self, N, mask=NotSpecified):
423			"""
424			Construct a Filter matching the bottom N asset values of self each day.
425
426			Parameters
427			----------
428			N : int
429			Number of assets passing the returned filter each day.
430			mask : zipline.pipeline.Filter, optional
431			A Filter representing assets to consider when computing ranks.
432			If mask is supplied, bottom values are computed ignoring any
433			asset/date pairs for which `mask` produces a value of False.
434
435			Returns
436			-------
437			filter : zipline.pipeline.Filter
438			"""
439			return self.rank(ascending=True, mask=mask) <= N
440
441			def percentile_between(self,
442			min_percentile,
443			max_percentile,
444			mask=NotSpecified):
445			"""
446			Construct a new Filter representing entries from the output of this
447			Factor that fall within the percentile range defined by min_percentile
448			and max_percentile.
449
450			Parameters
451			----------
452			min_percentile : float [0.0, 100.0]
453			Return True for assets falling above this percentile in the data.
454			max_percentile : float [0.0, 100.0]
455			Return True for assets falling below this percentile in the data.
456			mask : zipline.pipeline.Filter, optional
457			A Filter representing assets to consider when percentile
458			thresholds. If mask is supplied, percentile cutoffs are computed
459			each day using only assets for which `mask` returns True, and
460			assets not passing `mask` will produce False in the output of this
461			filter as well.
462
463			Returns
464			-------
465			out : zipline.pipeline.filters.PercentileFilter
466			A new filter that will compute the specified percentile-range mask.
467
468			See Also
469			--------
470			zipline.pipeline.filters.PercentileFilter
471			"""
472			return PercentileFilter(
473			self,
474			min_percentile=min_percentile,
475			max_percentile=max_percentile,
476			mask=mask,
477			)
478
479			def isnan(self):
480			"""
481			A Filter producing True for all values where this Factor is NaN.
482
483			Returns
484			-------
485			nanfilter : zipline.pipeline.filters.Filter
486			"""
487			return self != self
488
489			def notnan(self):
490			"""
491			A Filter producing True for values where this Factor is not NaN.
492
493			Returns
494			-------
495			nanfilter : zipline.pipeline.filters.Filter
496			"""
497			return ~self.isnan()
498
499			def isfinite(self):
500			"""
501			A Filter producing True for values where this Factor is anything but
502			NaN, inf, or -inf.
503			"""
504			return (-inf < self) & (self < inf)
505
506
507			class NumExprFactor(NumericalExpression, Factor):
508			"""
509			Factor computed from a numexpr expression.
510
511			Parameters
512			----------
513			expr : string
514			A string suitable for passing to numexpr. All variables in 'expr'
515			should be of the form "x_i", where i is the index of the corresponding
516			factor input in 'binds'.
517			binds : tuple
518			A tuple of factors to use as inputs.
519
520			Notes
521			-----
522			NumExprFactors are constructed by numerical operators like `+` and `-`.
523			Users should rarely need to construct a NumExprFactor directly.
524			"""
525			pass
526
527
528			class Rank(SingleInputMixin, Factor):
529			"""
530			A Factor representing the row-wise rank data of another Factor.
531
532			Parameters
533			----------
534			factor : zipline.pipeline.factors.Factor
535			The factor on which to compute ranks.
536			method : str, {'average', 'min', 'max', 'dense', 'ordinal'}
537			The method used to assign ranks to tied elements. See
538			`scipy.stats.rankdata` for a full description of the semantics for each
539			ranking method.
540
541			See Also
542			--------
543			scipy.stats.rankdata : Underlying ranking algorithm.
544			zipline.factors.Factor.rank : Method-style interface to same functionality.
545
546			Notes
547			-----
548			Most users should call Factor.rank rather than directly construct an
549			instance of this class.
550			"""
551			window_length = 0
552			dtype = float64_dtype
553
554			def __new__(cls, factor, method, ascending, mask):
555			return super(Rank, cls).__new__(
556			cls,
557			inputs=(factor,),
558			method=method,
559			ascending=ascending,
560			mask=mask,
561			)
562
563			def _init(self, method, ascending, args, *kwargs):
564			self._method = method
565			self._ascending = ascending
566			return super(Rank, self)._init(args, *kwargs)
567
568			@classmethod
569			def static_identity(cls, method, ascending, args, *kwargs):
570			return (
571			super(Rank, cls).static_identity(args, *kwargs),
572			method,
573			ascending,
574			)
575
576			def _validate(self):
577			"""
578			Verify that the stored rank method is valid.
579			"""
580			if self._method not in _RANK_METHODS:
581			raise UnknownRankMethod(
582			method=self._method,
583			choices=set(_RANK_METHODS),
584			)
585			return super(Rank, self)._validate()
586
587			def _compute(self, arrays, dates, assets, mask):
588			"""
589			For each row in the input, compute a like-shaped array of per-row
590			ranks.
591			"""
592			return masked_rankdata_2d(
593			arrays[0],
594			mask,
595			self.inputs[0].missing_value,
596			self._method,
597			self._ascending,
598			)
599
600			def __repr__(self):
601			return "{type}({input_}, method='{method}', mask={mask})".format(
602			type=type(self).__name__,
603			input_=self.inputs[0],
604			method=self._method,
605			mask=self.mask,
606			)
607
608
609			class CustomFactor(RequiredWindowLengthMixin, CustomTermMixin, Factor):
610			'''
611			Base class for user-defined Factors.
612
613			Parameters
614			----------
615			inputs : iterable, optional
616			An iterable of `BoundColumn` instances (e.g. USEquityPricing.close),
617			describing the data to load and pass to `self.compute`. If this
618			argument is passed to the CustomFactor constructor, we look for a
619			class-level attribute named `inputs`.
620			window_length : int, optional
621			Number of rows of rows to pass for each input. If this
622			argument is not passed to the CustomFactor constructor, we look for a
623			class-level attribute named `window_length`.
624
625			Notes
626			-----
627			Users implementing their own Factors should subclass CustomFactor and
628			implement a method named `compute` with the following signature:
629
630			.. code-block:: python
631
632			def compute(self, today, assets, out, *inputs):
633			...
634
635			On each simulation date, ``compute`` will be called with the current date,
636			an array of sids, an output array, and an input array for each expression
637			passed as inputs to the CustomFactor constructor.
638
639			The specific types of the values passed to `compute` are as follows::
640
641			today : np.datetime64[ns]
642			Row label for the last row of all arrays passed as `inputs`.
643			assets : np.array[int64, ndim=1]
644			Column labels for `out` and`inputs`.
645			out : np.array[self.dtype, ndim=1]
646			Output array of the same shape as `assets`. `compute` should write
647			its desired return values into `out`.
648			*inputs : tuple of np.array
649			Raw data arrays corresponding to the values of `self.inputs`.
650
651			``compute`` functions should expect to be passed NaN values for dates on
652			which no data was available for an asset. This may include dates on which
653			an asset did not yet exist.
654
655			For example, if a CustomFactor requires 10 rows of close price data, and
656			asset A started trading on Monday June 2nd, 2014, then on Tuesday, June
657			3rd, 2014, the column of input data for asset A will have 9 leading NaNs
658			for the preceding days on which data was not yet available.
659
660			Examples
661			--------
662
663			A CustomFactor with pre-declared defaults:
664
665			.. code-block:: python
666
667			class TenDayRange(CustomFactor):
668			"""
669			Computes the difference between the highest high in the last 10
670			days and the lowest low.
671
672			Pre-declares high and low as default inputs and `window_length` as
673			10.
674			"""
675
676			inputs = [USEquityPricing.high, USEquityPricing.low]
677			window_length = 10
678
679			def compute(self, today, assets, out, highs, lows):
680			from numpy import nanmin, nanmax
681
682			highest_highs = nanmax(highs, axis=0)
683			lowest_lows = nanmin(lows, axis=0)
684			out[:] = highest_highs - lowest_lows
685
686
687			# Doesn't require passing inputs or window_length because they're
688			# pre-declared as defaults for the TenDayRange class.
689			ten_day_range = TenDayRange()
690
691			A CustomFactor without defaults:
692
693			.. code-block:: python
694
695			class MedianValue(CustomFactor):
696			"""
697			Computes the median value of an arbitrary single input over an
698			arbitrary window..
699
700			Does not declare any defaults, so values for `window_length` and
701			`inputs` must be passed explicitly on every construction.
702			"""
703
704			def compute(self, today, assets, out, data):
705			from numpy import nanmedian
706			out[:] = data.nanmedian(data, axis=0)
707
708			# Values for `inputs` and `window_length` must be passed explicitly to
709			# MedianValue.
710			median_close10 = MedianValue([USEquityPricing.close], window_length=10)
711			median_low15 = MedianValue([USEquityPricing.low], window_length=15)
712			'''
713			dtype = float64_dtype
714			ctx = nullctx()
715

quantopian / zipline

Pull Request — master (#905)

zipline.pipeline.factors.function_application() B

Complexity

Size

Duplication

1 Method

Duplication Side-by-Side

Filter issues like