1
|
|
|
# -*- coding: utf-8 -*- |
2
|
|
|
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- |
3
|
|
|
# vi: set ft=python sts=4 ts=4 sw=4 et: |
4
|
|
|
""" |
5
|
|
|
Utilities to fill crumbs with data from pandas DataFrames. |
6
|
|
|
#TODO: add tests |
7
|
|
|
""" |
8
|
|
|
from hansel.utils import _get_matching_items |
9
|
|
|
|
10
|
|
|
|
11
|
|
|
def _pandas_rename_cols(df, col_map): |
12
|
|
|
""" Return a copy of `df` with the columns renamed as in `col_map`. |
13
|
|
|
Parameters |
14
|
|
|
---------- |
15
|
|
|
df: pandas.DataFrame |
16
|
|
|
|
17
|
|
|
col_map: dict[str] -> str |
18
|
|
|
This is a "DataFrame column name" to "crumb argument name" relation |
19
|
|
|
dictionary. |
20
|
|
|
Example: {'Subject ID': 'subject_id'} |
21
|
|
|
|
22
|
|
|
Returns |
23
|
|
|
------- |
24
|
|
|
renamed: pandas.DataFrame |
25
|
|
|
""" |
26
|
|
|
renamed = df.copy() |
27
|
|
|
renamed.columns = [col_map.get(col_name, col_name) for col_name in df.columns] |
28
|
|
|
return renamed |
29
|
|
|
|
30
|
|
|
|
31
|
|
|
def df_to_valuesmap(df, crumb_arg_names, arg_names=None): |
32
|
|
|
""" Return a values_map from data in `df` and |
33
|
|
|
the matching column and arguments names from `df`, `crumb_arg_names` |
34
|
|
|
and `arg_names`. |
35
|
|
|
Parameters |
36
|
|
|
---------- |
37
|
|
|
df: pandas.DataFrame |
38
|
|
|
|
39
|
|
|
crumb: hansel.Crumb |
40
|
|
|
|
41
|
|
|
arg_names: sequence of str |
42
|
|
|
A list of the crumb arguments and DataFrame columns to extract |
43
|
|
|
the info to fill the crumbs. |
44
|
|
|
Both must match, or use _pandas_rename_cols to rename the columns. |
45
|
|
|
If None, will look for all the arguments that match in both |
46
|
|
|
`df` and `arg_names`. |
47
|
|
|
Example: ['subject_id'] |
48
|
|
|
|
49
|
|
|
|
50
|
|
|
Returns |
51
|
|
|
------- |
52
|
|
|
values_map: list of sequences of 2-tuple |
53
|
|
|
""" |
54
|
|
|
crumb_names = _get_matching_items(df.columns, |
55
|
|
|
crumb_arg_names, |
56
|
|
|
arg_names) |
57
|
|
|
|
58
|
|
|
# get the columns of df that have been matched |
59
|
|
|
return (list(rec.items()) for rec in df[crumb_names].to_dict(orient='records')) |
60
|
|
|
|
61
|
|
|
|
62
|
|
|
def pandas_fill_crumbs(df, crumb, names_map=None): |
63
|
|
|
""" Create a generator of crumbs filled with the `df` column names and `crumb` |
64
|
|
|
arguments that match or the ones indicated in `names_map`. |
65
|
|
|
Parameters |
66
|
|
|
---------- |
67
|
|
|
df: pandas.DataFrame |
68
|
|
|
|
69
|
|
|
crumb: hansel.Crumb |
70
|
|
|
|
71
|
|
|
names_map: sequence of sequences of 2-tuple or dict[str] -> str |
72
|
|
|
This is a "DataFrame column name" to "crumb argument name" relation |
73
|
|
|
dictionary. |
74
|
|
|
Example: {'Subject ID': 'subject_id'} |
75
|
|
|
If None will make a dictionary from the open crumbs arguments, e.g., |
76
|
|
|
{'subject_id': 'subject_id'}. |
77
|
|
|
|
78
|
|
|
The values of this dict will be used to filter the columns |
79
|
|
|
in `df` and the crumb arguments in `crumb`. |
80
|
|
|
|
81
|
|
|
You may need to rename the columns of `df` before using this. |
82
|
|
|
|
83
|
|
|
Returns |
84
|
|
|
------- |
85
|
|
|
crumbs: generator of crumbs |
86
|
|
|
Crumbs filled with the data in `df`. |
87
|
|
|
""" |
88
|
|
|
if names_map is None: |
89
|
|
|
names_map = {arg_name: arg_name for arg_name in crumb.open_args()} |
90
|
|
|
|
91
|
|
|
nmap = names_map |
92
|
|
|
if not isinstance(nmap, dict): |
93
|
|
|
nmap = dict(nmap) |
94
|
|
|
|
95
|
|
|
values_map = (df |
96
|
|
|
.pipe(_pandas_rename_cols, nmap) |
97
|
|
|
.pipe(df_to_valuesmap, list(crumb.all_args()), |
98
|
|
|
arg_names=list(nmap.values())) |
99
|
|
|
) |
100
|
|
|
|
101
|
|
|
return (crumb.replace(**dict(argvals)) for argvals in values_map) |
102
|
|
|
|