Completed
Push — master ( 1f137d...7a6ba4 )
by Joe
01:31
created

zipline.pipeline.data._BoundColumnDescr   A

Complexity

Total Complexity 2

Size/Duplication

Total Lines 14
Duplicated Lines 0 %
Metric Value
dl 0
loc 14
rs 10
wmc 2

2 Methods

Rating   Name   Duplication   Size   Complexity  
A __get__() 0 5 1
A __init__() 0 3 1
1
"""
2
dataset.py
3
"""
4
from functools import total_ordering
5
from six import (
6
    iteritems,
7
    with_metaclass,
8
)
9
10
from zipline.pipeline.term import Term, AssetExists
11
from zipline.utils.input_validation import ensure_dtype
12
from zipline.utils.preprocess import preprocess
13
14
15
class Column(object):
16
    """
17
    An abstract column of data, not yet associated with a dataset.
18
    """
19
20
    @preprocess(dtype=ensure_dtype)
21
    def __init__(self, dtype):
22
        self.dtype = dtype
23
24
    def bind(self, name):
25
        """
26
        Bind a `Column` object to its name.
27
        """
28
        return _BoundColumnDescr(dtype=self.dtype, name=name)
29
30
31
class _BoundColumnDescr(object):
32
    """
33
    Intermediate class that sits on `DataSet` objects and returns memoized
34
    `BoundColumn` objects when requested.
35
    """
36
    def __init__(self, dtype, name):
37
        self.dtype = dtype
38
        self.name = name
39
40
    def __get__(self, instance, owner):
41
        return BoundColumn(
42
            dtype=self.dtype,
43
            dataset=owner,
44
            name=self.name,
45
        )
46
47
48
class BoundColumn(Term):
49
    """
50
    A Column of data that's been concretely bound to a particular dataset.
51
    """
52
    mask = AssetExists()
53
    extra_input_rows = 0
54
    inputs = ()
55
56
    def __new__(cls, dtype, dataset, name):
57
        return super(BoundColumn, cls).__new__(
58
            cls,
59
            domain=dataset.domain,
60
            dtype=dtype,
61
            dataset=dataset,
62
            name=name,
63
        )
64
65
    def _init(self, dataset, name, *args, **kwargs):
66
        self._dataset = dataset
67
        self._name = name
68
        return super(BoundColumn, self)._init(*args, **kwargs)
69
70
    @classmethod
71
    def static_identity(cls, dataset, name, *args, **kwargs):
72
        return (
73
            super(BoundColumn, cls).static_identity(*args, **kwargs),
74
            dataset,
75
            name,
76
        )
77
78
    @property
79
    def dataset(self):
80
        return self._dataset
81
82
    @property
83
    def name(self):
84
        return self._name
85
86
    @property
87
    def qualname(self):
88
        """
89
        Fully qualified of this column.
90
        """
91
        return '.'.join([self.dataset.__name__, self.name])
92
93
    @property
94
    def latest(self):
95
        from zipline.pipeline.factors import Latest
96
        return Latest(inputs=(self,), dtype=self.dtype)
97
98
    def __repr__(self):
99
        return "{qualname}::{dtype}".format(
100
            qualname=self.qualname,
101
            dtype=self.dtype.name,
102
        )
103
104
    def short_repr(self):
105
        return self.qualname
106
107
108
@total_ordering
109
class DataSetMeta(type):
110
    """
111
    Metaclass for DataSets
112
113
    Supplies name and dataset information to Column attributes.
114
    """
115
116
    def __new__(mcls, name, bases, dict_):
117
        newtype = super(DataSetMeta, mcls).__new__(mcls, name, bases, dict_)
118
        # collect all of the column names that we inherit from our parents
119
        column_names = set().union(
120
            *(getattr(base, '_column_names', ()) for base in bases)
121
        )
122
        for maybe_colname, maybe_column in iteritems(dict_):
123
            if isinstance(maybe_column, Column):
124
                # add column names defined on our class
125
                bound_column_descr = maybe_column.bind(maybe_colname)
126
                setattr(newtype, maybe_colname, bound_column_descr)
127
                column_names.add(maybe_colname)
128
129
        newtype._column_names = frozenset(column_names)
130
        return newtype
131
132
    @property
133
    def columns(self):
134
        return frozenset(
135
            getattr(self, colname) for colname in self._column_names
136
        )
137
138
    def __lt__(self, other):
139
        return id(self) < id(other)
140
141
    def __repr__(self):
142
        return '<DataSet: %r>' % self.__name__
143
144
145
class DataSet(with_metaclass(DataSetMeta, object)):
146
    domain = None
147