|
1
|
|
|
from green_magic.data.backend.engine_specs import EngineTabularRetriever, EngineTabularIterator, EngineTabularMutator |
|
2
|
|
|
from green_magic.data.backend.engine import DataEngine |
|
3
|
|
|
import pandas as pd |
|
4
|
|
|
|
|
5
|
|
|
__all__ = ['PDTabularRetriever', 'PDTabularIterator', 'PDTabularMutator'] |
|
6
|
|
|
|
|
7
|
|
|
|
|
8
|
|
|
@EngineTabularRetriever.register_as_subclass('pd') |
|
9
|
|
|
class PDTabularRetriever(EngineTabularRetriever): |
|
10
|
|
|
"""The observation object is the same as the one your return from 'from_json_lines'""" |
|
11
|
|
|
def column(self, identifier, data): |
|
12
|
|
|
return data.observations[identifier] |
|
13
|
|
|
|
|
14
|
|
|
def row(self, identifier, data): |
|
15
|
|
|
return data.observations.loc(identifier) |
|
16
|
|
|
|
|
17
|
|
|
def nb_columns(self, data): |
|
18
|
|
|
return len(data.observations.columns) |
|
19
|
|
|
|
|
20
|
|
|
def nb_rows(self, data): |
|
21
|
|
|
return len(data.observations) |
|
22
|
|
|
|
|
23
|
|
|
def get_numerical_attributes(self, data): |
|
24
|
|
|
return data.observations._get_numeric_data().columns.values |
|
25
|
|
|
|
|
26
|
|
|
|
|
27
|
|
|
@EngineTabularIterator.register_as_subclass('pd') |
|
28
|
|
|
class PDTabularIterator(EngineTabularIterator): |
|
29
|
|
|
"""The observation object is the same as the one your return from 'from_json_lines'""" |
|
30
|
|
|
|
|
31
|
|
|
def columnnames(self, data): |
|
32
|
|
|
return [_ for _ in data.observations.columns] |
|
33
|
|
|
|
|
34
|
|
|
def iterrows(self, data): |
|
35
|
|
|
return iter(data.observations.iterrows()) |
|
36
|
|
|
|
|
37
|
|
|
def itercolumns(self, data): |
|
38
|
|
|
return iter(data.observations[column] for column in data.observations.columns) |
|
39
|
|
|
|
|
40
|
|
|
|
|
41
|
|
|
@EngineTabularMutator.register_as_subclass('pd') |
|
42
|
|
|
class PDTabularMutator(EngineTabularMutator): |
|
43
|
|
|
def add_column(self, datapoints, values, new_attribute, **kwargs): |
|
44
|
|
|
datapoints.observations[new_attribute] = values |
|
45
|
|
|
|
|
46
|
|
|
@DataEngine.dec() |
|
47
|
|
|
def observations(file_path): |
|
48
|
|
|
return pd.read_json(file_path, lines=True) |
|
49
|
|
|
|