zipline.pipeline.factors.binary_operator() - Code Metrics - Inspection of "Merge pull request #884 from quantopian/returns-fa..." - quantopian/zipline - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( efcb01...ce3727 )

unknown

created 2015-12-02 14:34 UTC

zipline.pipeline.factors.binary_operator() C

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
cc	7
dl	0
loc	38
rs	5.5

"""
factor.py
"""
from operator import attrgetter
from numbers import Number

from numpy import (
    apply_along_axis,
    float64,
    nan,
    inf,
)
from scipy.stats import rankdata

from zipline.errors import (
    UnknownRankMethod,
    UnsupportedDataType,
)
from zipline.lib.rank import rankdata_2d_ordinal
from zipline.pipeline.term import (
    CustomTermMixin,
    NotSpecified,
    RequiredWindowLengthMixin,
    SingleInputMixin,
    CompositeTerm,
)
from zipline.pipeline.expression import (
    BadBinaryOperator,
    COMPARISONS,
    is_comparison,
    MATH_BINOPS,
    method_name_for_op,
    NumericalExpression,
    NUMEXPR_MATH_FUNCS,
    UNARY_OPS,
)
from zipline.pipeline.filters import (
    NumExprFilter,
    PercentileFilter,
)
from zipline.utils.control_flow import nullctx


_RANK_METHODS = frozenset(['average', 'min', 'max', 'dense', 'ordinal'])


def binop_return_type(op):
    if is_comparison(op):
        return NumExprFilter
    else:
        return NumExprFactor


def binary_operator(op):
    """
    Factory function for making binary operator methods on a Factor subclass.

    Returns a function, "binary_operator" suitable for implementing functions
    like __add__.
    """
    # When combining a Factor with a NumericalExpression, we use this
    # attrgetter instance to defer to the commuted implementation of the
    # NumericalExpression operator.
    commuted_method_getter = attrgetter(method_name_for_op(op, commute=True))

    def binary_operator(self, other):
        # This can't be hoisted up a scope because the types returned by
        # binop_return_type aren't defined when the top-level function is
        # invoked in the class body of Factor.
        return_type = binop_return_type(op)
        if isinstance(self, NumExprFactor):

            self_expr, other_expr, new_inputs = self.build_binary_op(
                op, other,
            )
            return return_type(
                "({left}) {op} ({right})".format(
                    left=self_expr,
                    op=op,
                    right=other_expr,
                ),
                new_inputs,
            )
        elif isinstance(other, NumExprFactor):
            # NumericalExpression overrides ops to correctly handle merging of
            # inputs.  Look up and call the appropriate reflected operator with
            # ourself as the input.
            return commuted_method_getter(other)(self)
        elif isinstance(other, Factor):
            if self is other:
                return return_type(
                    "x_0 {op} x_0".format(op=op),
                    (self,),
                )
            return return_type(
                "x_0 {op} x_1".format(op=op),
                (self, other),
            )
        elif isinstance(other, Number):
            return return_type(
                "x_0 {op} ({constant})".format(op=op, constant=other),
                binds=(self,),
            )
        raise BadBinaryOperator(op, self, other)

    binary_operator.__doc__ = "Binary Operator: '%s'" % op
    return binary_operator


def reflected_binary_operator(op):
    """
    Factory function for making binary operator methods on a Factor.

    Returns a function, "reflected_binary_operator" suitable for implementing
    functions like __radd__.
    """
    assert not is_comparison(op)

    def reflected_binary_operator(self, other):

        if isinstance(self, NumericalExpression):
            self_expr, other_expr, new_inputs = self.build_binary_op(
                op, other
            )
            return NumExprFactor(
                "({left}) {op} ({right})".format(
                    left=other_expr,
                    right=self_expr,
                    op=op,
                ),
                new_inputs,
            )

        # Only have to handle the numeric case because in all other valid cases
        # the corresponding left-binding method will be called.
        elif isinstance(other, Number):
            return NumExprFactor(
                "{constant} {op} x_0".format(op=op, constant=other),
                binds=(self,),
            )
        raise BadBinaryOperator(op, other, self)
    return reflected_binary_operator


def unary_operator(op):

    """
    Factory function for making unary operator methods for Factors.
    """
    # Only negate is currently supported for all our possible input types.
    valid_ops = {'-'}
    if op not in valid_ops:
        raise ValueError("Invalid unary operator %s." % op)

    def unary_operator(self):
        # This can't be hoisted up a scope because the types returned by
        # unary_op_return_type aren't defined when the top-level function is
        # invoked.
        if isinstance(self, NumericalExpression):
            return NumExprFactor(
                "{op}({expr})".format(op=op, expr=self._expr),
                self.inputs,
            )
        else:
            return NumExprFactor("{op}x_0".format(op=op), (self,))

    unary_operator.__doc__ = "Unary Operator: '%s'" % op
    return unary_operator


def function_application(func):
    """
    Factory function for producing function application methods for Factor
    subclasses.
    """
    if func not in NUMEXPR_MATH_FUNCS:
        raise ValueError("Unsupported mathematical function '%s'" % func)

    def mathfunc(self):
        if isinstance(self, NumericalExpression):
            return NumExprFactor(
                "{func}({expr})".format(func=func, expr=self._expr),
                self.inputs,
            )
        else:
            return NumExprFactor("{func}(x_0)".format(func=func), (self,))
    return mathfunc


class Factor(CompositeTerm):
    """
    Pipeline API expression producing numerically-valued outputs.
    """
    dtype = float64

    # Dynamically add functions for creating NumExprFactor/NumExprFilter
    # instances.
    clsdict = locals()
    clsdict.update(
        {
            method_name_for_op(op): binary_operator(op)
            # Don't override __eq__ because it breaks comparisons on tuples of
            # Factors.
            for op in MATH_BINOPS.union(COMPARISONS - {'=='})
        }
    )
    clsdict.update(
        {
            method_name_for_op(op, commute=True): reflected_binary_operator(op)
            for op in MATH_BINOPS
        }
    )
    clsdict.update(
        {
            '__neg__': unary_operator(op)
            for op in UNARY_OPS
        }
    )
    clsdict.update(
        {
            funcname: function_application(funcname)
            for funcname in NUMEXPR_MATH_FUNCS
        }
    )

    __truediv__ = clsdict['__div__']
    __rtruediv__ = clsdict['__rdiv__']

    eq = binary_operator('==')

    def rank(self, method='ordinal', ascending=True, mask=NotSpecified):
        """
        Construct a new Factor representing the sorted rank of each column
        within each row.

        Parameters
        ----------
        method : str, {'ordinal', 'min', 'max', 'dense', 'average'}
            The method used to assign ranks to tied elements. See
            `scipy.stats.rankdata` for a full description of the semantics for
            each ranking method. Default is 'ordinal'.
        ascending : bool, optional
            Whether to return sorted rank in ascending or descending order.
            Default is True.
        mask : zipline.pipeline.Filter, optional
            A Filter representing assets to consider when computing ranks.
            If mask is supplied, ranks are computed ignoring any asset/date
            pairs for which `mask` produces a value of False.

        Returns
        -------
        ranks : zipline.pipeline.factors.Rank
            A new factor that will compute the ranking of the data produced by
            `self`.

        Notes
        -----
        The default value for `method` is different from the default for
        `scipy.stats.rankdata`.  See that function's documentation for a full
        description of the valid inputs to `method`.

        Missing or non-existent data on a given day will cause an asset to be
        given a rank of NaN for that day.

        See Also
        --------
        scipy.stats.rankdata
        zipline.lib.rank
        zipline.pipeline.factors.Rank
        """
        return Rank(self if ascending else -self, method=method, mask=mask)

    def top(self, N, mask=NotSpecified):
        """
        Construct a Filter matching the top N asset values of self each day.

        Parameters
        ----------
        N : int
            Number of assets passing the returned filter each day.
        mask : zipline.pipeline.Filter, optional
            A Filter representing assets to consider when computing ranks.
            If mask is supplied, top values are computed ignoring any
            asset/date pairs for which `mask` produces a value of False.

        Returns
        -------
        filter : zipline.pipeline.filters.Filter
        """
        return self.rank(ascending=False, mask=mask) <= N

    def bottom(self, N, mask=NotSpecified):
        """
        Construct a Filter matching the bottom N asset values of self each day.

        Parameters
        ----------
        N : int
            Number of assets passing the returned filter each day.
        mask : zipline.pipeline.Filter, optional
            A Filter representing assets to consider when computing ranks.
            If mask is supplied, bottom values are computed ignoring any
            asset/date pairs for which `mask` produces a value of False.

        Returns
        -------
        filter : zipline.pipeline.Filter
        """
        return self.rank(ascending=True, mask=mask) <= N

    def percentile_between(self,
                           min_percentile,
                           max_percentile,
                           mask=NotSpecified):
        """
        Construct a new Filter representing entries from the output of this
        Factor that fall within the percentile range defined by min_percentile
        and max_percentile.

        Parameters
        ----------
        min_percentile : float [0.0, 100.0]
            Return True for assets falling above this percentile in the data.
        max_percentile : float [0.0, 100.0]
            Return True for assets falling below this percentile in the data.
        mask : zipline.pipeline.Filter, optional
            A Filter representing assets to consider when percentile
            thresholds.  If mask is supplied, percentile cutoffs are computed
            each day using only assets for which `mask` returns True, and
            assets not passing `mask` will produce False in the output of this
            filter as well.

        Returns
        -------
        out : zipline.pipeline.filters.PercentileFilter
            A new filter that will compute the specified percentile-range mask.

        See Also
        --------
        zipline.pipeline.filters.PercentileFilter
        """
        return PercentileFilter(
            self,
            min_percentile=min_percentile,
            max_percentile=max_percentile,
            mask=mask,
        )

    def isnan(self):
        """
        A Filter producing True for all values where this Factor is NaN.
        """
        return self != self

    def notnan(self):
        """
        A Filter producing True for values where this Factor is not NaN.

        Returns
        -------
        nanfilter : zipline.pipeline.filters.Filter
        """
        return ~self.isnan()

    def isfinite(self):
        """
        A Filter producing True for values where this Factor is anything but
        NaN, inf, or -inf.
        """
        return (-inf < self) & (self < inf)


class NumExprFactor(NumericalExpression, Factor):
    """
    Factor computed from a numexpr expression.

    Parameters
    ----------
    expr : string
       A string suitable for passing to numexpr.  All variables in 'expr'
       should be of the form "x_i", where i is the index of the corresponding
       factor input in 'binds'.
    binds : tuple
       A tuple of factors to use as inputs.

    Notes
    -----
    NumExprFactors are constructed by numerical operators like `+` and `-`.
    Users should rarely need to construct a NumExprFactor directly.
    """
    pass


class Rank(SingleInputMixin, Factor):
    """
    A Factor representing the row-wise rank data of another Factor.

    Parameters
    ----------
    factor : zipline.pipeline.factors.Factor
        The factor on which to compute ranks.
    method : str, {'average', 'min', 'max', 'dense', 'ordinal'}
        The method used to assign ranks to tied elements.  See
        `scipy.stats.rankdata` for a full description of the semantics for each
        ranking method.

    See Also
    --------
    scipy.stats.rankdata : Underlying ranking algorithm.
    zipline.factors.Factor.rank : Method-style interface to same functionality.

    Notes
    -----
    Most users should call Factor.rank rather than directly construct an
    instance of this class.
    """
    window_length = 0
    dtype = float64

    def __new__(cls, factor, method, mask):
        return super(Rank, cls).__new__(
            cls,
            inputs=(factor,),
            method=method,
            mask=mask,
        )

    def _init(self, method, *args, **kwargs):
        self._method = method
        return super(Rank, self)._init(*args, **kwargs)

    @classmethod
    def static_identity(cls, method, *args, **kwargs):
        return (
            super(Rank, cls).static_identity(*args, **kwargs),
            method,
        )

    def _validate(self):
        """
        Verify that the stored rank method is valid.
        """
        if self._method not in _RANK_METHODS:
            raise UnknownRankMethod(
                method=self._method,
                choices=set(_RANK_METHODS),
            )
        return super(Rank, self)._validate()

    def _compute(self, arrays, dates, assets, mask):
        """
        For each row in the input, compute a like-shaped array of per-row
        ranks.
        """
        inv_mask = ~mask
        data = arrays[0].copy()
        data[inv_mask] = nan
        # OPTIMIZATION: Fast path the default case with our own specialized
        # Cython implementation.
        if self._method == 'ordinal':
            result = rankdata_2d_ordinal(data)
        else:
            # FUTURE OPTIMIZATION:
            # Write a less general "apply to rows" method that doesn't do all
            # the extra work that apply_along_axis does.
            result = apply_along_axis(rankdata, 1, data, method=self._method)

        # rankdata will sort nan values into last place, but we want our
        # nans to propagate, so explicitly re-apply.
        result[inv_mask] = nan
        return result

    def __repr__(self):
        return "{type}({input_}, method='{method}', mask={mask})".format(
            type=type(self).__name__,
            input_=self.inputs[0],
            method=self._method,
            mask=self.mask,
        )


class CustomFactor(RequiredWindowLengthMixin, CustomTermMixin, Factor):
    '''
    Base class for user-defined Factors.

    Parameters
    ----------
    inputs : iterable, optional
        An iterable of `BoundColumn` instances (e.g. USEquityPricing.close),
        describing the data to load and pass to `self.compute`.  If this
        argument is passed to the CustomFactor constructor, we look for a
        class-level attribute named `inputs`.
    window_length : int, optional
        Number of rows of rows to pass for each input.  If this
        argument is not passed to the CustomFactor constructor, we look for a
        class-level attribute named `window_length`.

    Notes
    -----
    Users implementing their own Factors should subclass CustomFactor and
    implement a method named `compute` with the following signature:

    .. code-block:: python

        def compute(self, today, assets, out, *inputs):
           ...

    On each simulation date, ``compute`` will be called with the current date,
    an array of sids, an output array, and an input array for each expression
    passed as inputs to the CustomFactor constructor.

    The specific types of the values passed to `compute` are as follows::

        today : np.datetime64[ns]
            Row label for the last row of all arrays passed as `inputs`.
        assets : np.array[int64, ndim=1]
            Column labels for `out` and`inputs`.
        out : np.array[float64, ndim=1]
            Output array of the same shape as `assets`.  `compute` should write
            its desired return values into `out`.
        *inputs : tuple of np.array
            Raw data arrays corresponding to the values of `self.inputs`.

    ``compute`` functions should expect to be passed NaN values for dates on
    which no data was available for an asset.  This may include dates on which
    an asset did not yet exist.

    For example, if a CustomFactor requires 10 rows of close price data, and
    asset A started trading on Monday June 2nd, 2014, then on Tuesday, June
    3rd, 2014, the column of input data for asset A will have 9 leading NaNs
    for the preceding days on which data was not yet available.

    Examples
    --------

    A CustomFactor with pre-declared defaults:

    .. code-block:: python

        class TenDayRange(CustomFactor):
            """
            Computes the difference between the highest high in the last 10
            days and the lowest low.

            Pre-declares high and low as default inputs and `window_length` as
            10.
            """

            inputs = [USEquityPricing.high, USEquityPricing.low]
            window_length = 10

            def compute(self, today, assets, out, highs, lows):
                from numpy import nanmin, nanmax

                highest_highs = nanmax(highs, axis=0)
                lowest_lows = nanmin(lows, axis=0)
                out[:] = highest_highs - lowest_lows


        # Doesn't require passing inputs or window_length because they're
        # pre-declared as defaults for the TenDayRange class.
        ten_day_range = TenDayRange()

    A CustomFactor without defaults:

    .. code-block:: python

        class MedianValue(CustomFactor):
            """
            Computes the median value of an arbitrary single input over an
            arbitrary window..

            Does not declare any defaults, so values for `window_length` and
            `inputs` must be passed explicitly on every construction.
            """

            def compute(self, today, assets, out, data):
                from numpy import nanmedian
                out[:] = data.nanmedian(data, axis=0)

        # Values for `inputs` and `window_length` must be passed explicitly to
        # MedianValue.
        median_close10 = MedianValue([USEquityPricing.close], window_length=10)
        median_low15 = MedianValue([USEquityPricing.low], window_length=15)
    '''
    ctx = nullctx()

    def _validate(self):
        if self.dtype != float64:
            raise UnsupportedDataType(dtype=self.dtype)
        return super(CustomFactor, self)._validate()


1			"""
2			factor.py
3			"""
4			from operator import attrgetter
5			from numbers import Number
6
7			from numpy import (
8			apply_along_axis,
9			float64,
10			nan,
11			inf,
12			)
13			from scipy.stats import rankdata
14
15			from zipline.errors import (
16			UnknownRankMethod,
17			UnsupportedDataType,
18			)
19			from zipline.lib.rank import rankdata_2d_ordinal
20			from zipline.pipeline.term import (
21			CustomTermMixin,
22			NotSpecified,
23			RequiredWindowLengthMixin,
24			SingleInputMixin,
25			CompositeTerm,
26			)
27			from zipline.pipeline.expression import (
28			BadBinaryOperator,
29			COMPARISONS,
30			is_comparison,
31			MATH_BINOPS,
32			method_name_for_op,
33			NumericalExpression,
34			NUMEXPR_MATH_FUNCS,
35			UNARY_OPS,
36			)
37			from zipline.pipeline.filters import (
38			NumExprFilter,
39			PercentileFilter,
40			)
41			from zipline.utils.control_flow import nullctx
42
43
44			_RANK_METHODS = frozenset(['average', 'min', 'max', 'dense', 'ordinal'])
45
46
47			def binop_return_type(op):
48			if is_comparison(op):
49			return NumExprFilter
50			else:
51			return NumExprFactor
52
53
54			def binary_operator(op):
55			"""
56			Factory function for making binary operator methods on a Factor subclass.
57
58			Returns a function, "binary_operator" suitable for implementing functions
59			like __add__.
60			"""
61			# When combining a Factor with a NumericalExpression, we use this
62			# attrgetter instance to defer to the commuted implementation of the
63			# NumericalExpression operator.
64			commuted_method_getter = attrgetter(method_name_for_op(op, commute=True))
65
66			def binary_operator(self, other):
67			# This can't be hoisted up a scope because the types returned by
68			# binop_return_type aren't defined when the top-level function is
69			# invoked in the class body of Factor.
70			return_type = binop_return_type(op)
71			if isinstance(self, NumExprFactor):
			0 ignored issues – show Duplication introduced 2015-11-21 01:04 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
72			self_expr, other_expr, new_inputs = self.build_binary_op(
73			op, other,
74			)
75			return return_type(
76			"({left}) {op} ({right})".format(
77			left=self_expr,
78			op=op,
79			right=other_expr,
80			),
81			new_inputs,
82			)
83			elif isinstance(other, NumExprFactor):
84			# NumericalExpression overrides ops to correctly handle merging of
85			# inputs. Look up and call the appropriate reflected operator with
86			# ourself as the input.
87			return commuted_method_getter(other)(self)
88			elif isinstance(other, Factor):
89			if self is other:
90			return return_type(
91			"x_0 {op} x_0".format(op=op),
92			(self,),
93			)
94			return return_type(
95			"x_0 {op} x_1".format(op=op),
96			(self, other),
97			)
98			elif isinstance(other, Number):
99			return return_type(
100			"x_0 {op} ({constant})".format(op=op, constant=other),
101			binds=(self,),
102			)
103			raise BadBinaryOperator(op, self, other)
104
105			binary_operator.__doc__ = "Binary Operator: '%s'" % op
106			return binary_operator
107
108
109			def reflected_binary_operator(op):
110			"""
111			Factory function for making binary operator methods on a Factor.
112
113			Returns a function, "reflected_binary_operator" suitable for implementing
114			functions like __radd__.
115			"""
116			assert not is_comparison(op)
117
118			def reflected_binary_operator(self, other):
119
120			if isinstance(self, NumericalExpression):
121			self_expr, other_expr, new_inputs = self.build_binary_op(
122			op, other
123			)
124			return NumExprFactor(
125			"({left}) {op} ({right})".format(
126			left=other_expr,
127			right=self_expr,
128			op=op,
129			),
130			new_inputs,
131			)
132
133			# Only have to handle the numeric case because in all other valid cases
134			# the corresponding left-binding method will be called.
135			elif isinstance(other, Number):
136			return NumExprFactor(
137			"{constant} {op} x_0".format(op=op, constant=other),
138			binds=(self,),
139			)
140			raise BadBinaryOperator(op, other, self)
141			return reflected_binary_operator
142
143
144			def unary_operator(op):
			0 ignored issues – show Duplication introduced 2015-11-21 01:04 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
145			"""
146			Factory function for making unary operator methods for Factors.
147			"""
148			# Only negate is currently supported for all our possible input types.
149			valid_ops = {'-'}
150			if op not in valid_ops:
151			raise ValueError("Invalid unary operator %s." % op)
152
153			def unary_operator(self):
154			# This can't be hoisted up a scope because the types returned by
155			# unary_op_return_type aren't defined when the top-level function is
156			# invoked.
157			if isinstance(self, NumericalExpression):
158			return NumExprFactor(
159			"{op}({expr})".format(op=op, expr=self._expr),
160			self.inputs,
161			)
162			else:
163			return NumExprFactor("{op}x_0".format(op=op), (self,))
164
165			unary_operator.__doc__ = "Unary Operator: '%s'" % op
166			return unary_operator
167
168
169			def function_application(func):
170			"""
171			Factory function for producing function application methods for Factor
172			subclasses.
173			"""
174			if func not in NUMEXPR_MATH_FUNCS:
175			raise ValueError("Unsupported mathematical function '%s'" % func)
176
177			def mathfunc(self):
178			if isinstance(self, NumericalExpression):
179			return NumExprFactor(
180			"{func}({expr})".format(func=func, expr=self._expr),
181			self.inputs,
182			)
183			else:
184			return NumExprFactor("{func}(x_0)".format(func=func), (self,))
185			return mathfunc
186
187
188			class Factor(CompositeTerm):
189			"""
190			Pipeline API expression producing numerically-valued outputs.
191			"""
192			dtype = float64
193
194			# Dynamically add functions for creating NumExprFactor/NumExprFilter
195			# instances.
196			clsdict = locals()
197			clsdict.update(
198			{
199			method_name_for_op(op): binary_operator(op)
200			# Don't override __eq__ because it breaks comparisons on tuples of
201			# Factors.
202			for op in MATH_BINOPS.union(COMPARISONS - {'=='})
203			}
204			)
205			clsdict.update(
206			{
207			method_name_for_op(op, commute=True): reflected_binary_operator(op)
208			for op in MATH_BINOPS
209			}
210			)
211			clsdict.update(
212			{
213			'__neg__': unary_operator(op)
214			for op in UNARY_OPS
215			}
216			)
217			clsdict.update(
218			{
219			funcname: function_application(funcname)
220			for funcname in NUMEXPR_MATH_FUNCS
221			}
222			)
223
224			__truediv__ = clsdict['__div__']
225			__rtruediv__ = clsdict['__rdiv__']
226
227			eq = binary_operator('==')
228
229			def rank(self, method='ordinal', ascending=True, mask=NotSpecified):
230			"""
231			Construct a new Factor representing the sorted rank of each column
232			within each row.
233
234			Parameters
235			----------
236			method : str, {'ordinal', 'min', 'max', 'dense', 'average'}
237			The method used to assign ranks to tied elements. See
238			`scipy.stats.rankdata` for a full description of the semantics for
239			each ranking method. Default is 'ordinal'.
240			ascending : bool, optional
241			Whether to return sorted rank in ascending or descending order.
242			Default is True.
243			mask : zipline.pipeline.Filter, optional
244			A Filter representing assets to consider when computing ranks.
245			If mask is supplied, ranks are computed ignoring any asset/date
246			pairs for which `mask` produces a value of False.
247
248			Returns
249			-------
250			ranks : zipline.pipeline.factors.Rank
251			A new factor that will compute the ranking of the data produced by
252			`self`.
253
254			Notes
255			-----
256			The default value for `method` is different from the default for
257			`scipy.stats.rankdata`. See that function's documentation for a full
258			description of the valid inputs to `method`.
259
260			Missing or non-existent data on a given day will cause an asset to be
261			given a rank of NaN for that day.
262
263			See Also
264			--------
265			scipy.stats.rankdata
266			zipline.lib.rank
267			zipline.pipeline.factors.Rank
268			"""
269			return Rank(self if ascending else -self, method=method, mask=mask)
270
271			def top(self, N, mask=NotSpecified):
272			"""
273			Construct a Filter matching the top N asset values of self each day.
274
275			Parameters
276			----------
277			N : int
278			Number of assets passing the returned filter each day.
279			mask : zipline.pipeline.Filter, optional
280			A Filter representing assets to consider when computing ranks.
281			If mask is supplied, top values are computed ignoring any
282			asset/date pairs for which `mask` produces a value of False.
283
284			Returns
285			-------
286			filter : zipline.pipeline.filters.Filter
287			"""
288			return self.rank(ascending=False, mask=mask) <= N
289
290			def bottom(self, N, mask=NotSpecified):
291			"""
292			Construct a Filter matching the bottom N asset values of self each day.
293
294			Parameters
295			----------
296			N : int
297			Number of assets passing the returned filter each day.
298			mask : zipline.pipeline.Filter, optional
299			A Filter representing assets to consider when computing ranks.
300			If mask is supplied, bottom values are computed ignoring any
301			asset/date pairs for which `mask` produces a value of False.
302
303			Returns
304			-------
305			filter : zipline.pipeline.Filter
306			"""
307			return self.rank(ascending=True, mask=mask) <= N
308
309			def percentile_between(self,
310			min_percentile,
311			max_percentile,
312			mask=NotSpecified):
313			"""
314			Construct a new Filter representing entries from the output of this
315			Factor that fall within the percentile range defined by min_percentile
316			and max_percentile.
317
318			Parameters
319			----------
320			min_percentile : float [0.0, 100.0]
321			Return True for assets falling above this percentile in the data.
322			max_percentile : float [0.0, 100.0]
323			Return True for assets falling below this percentile in the data.
324			mask : zipline.pipeline.Filter, optional
325			A Filter representing assets to consider when percentile
326			thresholds. If mask is supplied, percentile cutoffs are computed
327			each day using only assets for which `mask` returns True, and
328			assets not passing `mask` will produce False in the output of this
329			filter as well.
330
331			Returns
332			-------
333			out : zipline.pipeline.filters.PercentileFilter
334			A new filter that will compute the specified percentile-range mask.
335
336			See Also
337			--------
338			zipline.pipeline.filters.PercentileFilter
339			"""
340			return PercentileFilter(
341			self,
342			min_percentile=min_percentile,
343			max_percentile=max_percentile,
344			mask=mask,
345			)
346
347			def isnan(self):
348			"""
349			A Filter producing True for all values where this Factor is NaN.
350			"""
351			return self != self
352
353			def notnan(self):
354			"""
355			A Filter producing True for values where this Factor is not NaN.
356
357			Returns
358			-------
359			nanfilter : zipline.pipeline.filters.Filter
360			"""
361			return ~self.isnan()
362
363			def isfinite(self):
364			"""
365			A Filter producing True for values where this Factor is anything but
366			NaN, inf, or -inf.
367			"""
368			return (-inf < self) & (self < inf)
369
370
371			class NumExprFactor(NumericalExpression, Factor):
372			"""
373			Factor computed from a numexpr expression.
374
375			Parameters
376			----------
377			expr : string
378			A string suitable for passing to numexpr. All variables in 'expr'
379			should be of the form "x_i", where i is the index of the corresponding
380			factor input in 'binds'.
381			binds : tuple
382			A tuple of factors to use as inputs.
383
384			Notes
385			-----
386			NumExprFactors are constructed by numerical operators like `+` and `-`.
387			Users should rarely need to construct a NumExprFactor directly.
388			"""
389			pass
390
391
392			class Rank(SingleInputMixin, Factor):
393			"""
394			A Factor representing the row-wise rank data of another Factor.
395
396			Parameters
397			----------
398			factor : zipline.pipeline.factors.Factor
399			The factor on which to compute ranks.
400			method : str, {'average', 'min', 'max', 'dense', 'ordinal'}
401			The method used to assign ranks to tied elements. See
402			`scipy.stats.rankdata` for a full description of the semantics for each
403			ranking method.
404
405			See Also
406			--------
407			scipy.stats.rankdata : Underlying ranking algorithm.
408			zipline.factors.Factor.rank : Method-style interface to same functionality.
409
410			Notes
411			-----
412			Most users should call Factor.rank rather than directly construct an
413			instance of this class.
414			"""
415			window_length = 0
416			dtype = float64
417
418			def __new__(cls, factor, method, mask):
419			return super(Rank, cls).__new__(
420			cls,
421			inputs=(factor,),
422			method=method,
423			mask=mask,
424			)
425
426			def _init(self, method, args, *kwargs):
427			self._method = method
428			return super(Rank, self)._init(args, *kwargs)
429
430			@classmethod
431			def static_identity(cls, method, args, *kwargs):
432			return (
433			super(Rank, cls).static_identity(args, *kwargs),
434			method,
435			)
436
437			def _validate(self):
438			"""
439			Verify that the stored rank method is valid.
440			"""
441			if self._method not in _RANK_METHODS:
442			raise UnknownRankMethod(
443			method=self._method,
444			choices=set(_RANK_METHODS),
445			)
446			return super(Rank, self)._validate()
447
448			def _compute(self, arrays, dates, assets, mask):
449			"""
450			For each row in the input, compute a like-shaped array of per-row
451			ranks.
452			"""
453			inv_mask = ~mask
454			data = arrays[0].copy()
455			data[inv_mask] = nan
456			# OPTIMIZATION: Fast path the default case with our own specialized
457			# Cython implementation.
458			if self._method == 'ordinal':
459			result = rankdata_2d_ordinal(data)
460			else:
461			# FUTURE OPTIMIZATION:
462			# Write a less general "apply to rows" method that doesn't do all
463			# the extra work that apply_along_axis does.
464			result = apply_along_axis(rankdata, 1, data, method=self._method)
465
466			# rankdata will sort nan values into last place, but we want our
467			# nans to propagate, so explicitly re-apply.
468			result[inv_mask] = nan
469			return result
470
471			def __repr__(self):
472			return "{type}({input_}, method='{method}', mask={mask})".format(
473			type=type(self).__name__,
474			input_=self.inputs[0],
475			method=self._method,
476			mask=self.mask,
477			)
478
479
480			class CustomFactor(RequiredWindowLengthMixin, CustomTermMixin, Factor):
481			'''
482			Base class for user-defined Factors.
483
484			Parameters
485			----------
486			inputs : iterable, optional
487			An iterable of `BoundColumn` instances (e.g. USEquityPricing.close),
488			describing the data to load and pass to `self.compute`. If this
489			argument is passed to the CustomFactor constructor, we look for a
490			class-level attribute named `inputs`.
491			window_length : int, optional
492			Number of rows of rows to pass for each input. If this
493			argument is not passed to the CustomFactor constructor, we look for a
494			class-level attribute named `window_length`.
495
496			Notes
497			-----
498			Users implementing their own Factors should subclass CustomFactor and
499			implement a method named `compute` with the following signature:
500
501			.. code-block:: python
502
503			def compute(self, today, assets, out, *inputs):
504			...
505
506			On each simulation date, ``compute`` will be called with the current date,
507			an array of sids, an output array, and an input array for each expression
508			passed as inputs to the CustomFactor constructor.
509
510			The specific types of the values passed to `compute` are as follows::
511
512			today : np.datetime64[ns]
513			Row label for the last row of all arrays passed as `inputs`.
514			assets : np.array[int64, ndim=1]
515			Column labels for `out` and`inputs`.
516			out : np.array[float64, ndim=1]
517			Output array of the same shape as `assets`. `compute` should write
518			its desired return values into `out`.
519			*inputs : tuple of np.array
520			Raw data arrays corresponding to the values of `self.inputs`.
521
522			``compute`` functions should expect to be passed NaN values for dates on
523			which no data was available for an asset. This may include dates on which
524			an asset did not yet exist.
525
526			For example, if a CustomFactor requires 10 rows of close price data, and
527			asset A started trading on Monday June 2nd, 2014, then on Tuesday, June
528			3rd, 2014, the column of input data for asset A will have 9 leading NaNs
529			for the preceding days on which data was not yet available.
530
531			Examples
532			--------
533
534			A CustomFactor with pre-declared defaults:
535
536			.. code-block:: python
537
538			class TenDayRange(CustomFactor):
539			"""
540			Computes the difference between the highest high in the last 10
541			days and the lowest low.
542
543			Pre-declares high and low as default inputs and `window_length` as
544			10.
545			"""
546
547			inputs = [USEquityPricing.high, USEquityPricing.low]
548			window_length = 10
549
550			def compute(self, today, assets, out, highs, lows):
551			from numpy import nanmin, nanmax
552
553			highest_highs = nanmax(highs, axis=0)
554			lowest_lows = nanmin(lows, axis=0)
555			out[:] = highest_highs - lowest_lows
556
557
558			# Doesn't require passing inputs or window_length because they're
559			# pre-declared as defaults for the TenDayRange class.
560			ten_day_range = TenDayRange()
561
562			A CustomFactor without defaults:
563
564			.. code-block:: python
565
566			class MedianValue(CustomFactor):
567			"""
568			Computes the median value of an arbitrary single input over an
569			arbitrary window..
570
571			Does not declare any defaults, so values for `window_length` and
572			`inputs` must be passed explicitly on every construction.
573			"""
574
575			def compute(self, today, assets, out, data):
576			from numpy import nanmedian
577			out[:] = data.nanmedian(data, axis=0)
578
579			# Values for `inputs` and `window_length` must be passed explicitly to
580			# MedianValue.
581			median_close10 = MedianValue([USEquityPricing.close], window_length=10)
582			median_low15 = MedianValue([USEquityPricing.low], window_length=15)
583			'''
584			ctx = nullctx()
585
586			def _validate(self):
587			if self.dtype != float64:
588			raise UnsupportedDataType(dtype=self.dtype)
589			return super(CustomFactor, self)._validate()
590

quantopian / zipline

Push — master ( efcb01...ce3727 )

zipline.pipeline.factors.binary_operator() C

Complexity

Size

Duplication

Duplication Side-by-Side

Filter issues like