1
|
|
|
# -*- coding: utf-8 -*- |
2
|
|
|
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- |
3
|
|
|
# vi: set ft=python sts=4 ts=4 sw=4 et: |
4
|
|
|
""" |
5
|
|
|
Utilities to fill crumbs with data from pandas DataFrames. |
6
|
|
|
#TODO: add tests |
7
|
|
|
""" |
8
|
|
|
from typing import Iterator, Dict |
9
|
|
|
|
10
|
|
|
import hansel |
11
|
|
|
from hansel.utils import _get_matching_items, CrumbArgsSequences |
12
|
|
|
|
13
|
|
|
|
14
|
|
|
def _pandas_rename_cols(df: 'pandas.DataFrame', col_map: Dict[str, str]) -> 'pandas.DataFrame': |
15
|
|
|
""" Return a copy of `df` with the columns renamed as in `col_map`. |
16
|
|
|
Parameters |
17
|
|
|
---------- |
18
|
|
|
df: pandas.DataFrame |
19
|
|
|
|
20
|
|
|
col_map: dict[str] -> str |
21
|
|
|
This is a "DataFrame column name" to "crumb argument name" relation |
22
|
|
|
dictionary. |
23
|
|
|
Example: {'Subject ID': 'subject_id'} |
24
|
|
|
|
25
|
|
|
Returns |
26
|
|
|
------- |
27
|
|
|
renamed: pandas.DataFrame |
28
|
|
|
""" |
29
|
|
|
renamed = df.copy() |
30
|
|
|
renamed.columns = [col_map.get(col_name, col_name) for col_name in df.columns] |
31
|
|
|
return renamed |
32
|
|
|
|
33
|
|
|
|
34
|
|
|
def df_to_valuesmap( |
35
|
|
|
df: 'pandas.DataFrame', |
36
|
|
|
crumb_arg_names: Iterator[str], |
37
|
|
|
arg_names: Iterator[str]=None |
38
|
|
|
) -> CrumbArgsSequences: |
39
|
|
|
""" Return a values_map from data in `df` and |
40
|
|
|
the matching column and arguments names from `df`, `crumb_arg_names` |
41
|
|
|
and `arg_names`. |
42
|
|
|
Parameters |
43
|
|
|
---------- |
44
|
|
|
df: pandas.DataFrame |
45
|
|
|
|
46
|
|
|
crumb_arg_names: |
47
|
|
|
|
48
|
|
|
arg_names: sequence of str |
49
|
|
|
A list of the crumb arguments and DataFrame columns to extract |
50
|
|
|
the info to fill the crumbs. |
51
|
|
|
Both must match, or use _pandas_rename_cols to rename the columns. |
52
|
|
|
If None, will look for all the arguments that match in both |
53
|
|
|
`df` and `arg_names`. |
54
|
|
|
Example: ['subject_id'] |
55
|
|
|
|
56
|
|
|
|
57
|
|
|
Returns |
58
|
|
|
------- |
59
|
|
|
values_map: list of sequences of 2-tuple |
60
|
|
|
""" |
61
|
|
|
crumb_names = _get_matching_items(df.columns, |
62
|
|
|
crumb_arg_names, |
63
|
|
|
arg_names) |
64
|
|
|
|
65
|
|
|
# get the columns of df that have been matched |
66
|
|
|
return (list(rec.items()) for rec in df[crumb_names].to_dict(orient='records')) |
67
|
|
|
|
68
|
|
|
|
69
|
|
|
def pandas_fill_crumbs( |
70
|
|
|
df: 'pandas.DataFrame', |
71
|
|
|
crumb: hansel.Crumb, |
72
|
|
|
names_map: CrumbArgsSequences=None |
73
|
|
|
) -> Iterator[hansel.Crumb]: |
74
|
|
|
""" Create a generator of crumbs filled with the `df` column names and `crumb` |
75
|
|
|
arguments that match or the ones indicated in `names_map`. |
76
|
|
|
Parameters |
77
|
|
|
---------- |
78
|
|
|
df: pandas.DataFrame |
79
|
|
|
|
80
|
|
|
crumb: hansel.Crumb |
81
|
|
|
|
82
|
|
|
names_map: sequence of sequences of 2-tuple or dict[str] -> str |
83
|
|
|
This is a "DataFrame column name" to "crumb argument name" relation |
84
|
|
|
dictionary. |
85
|
|
|
Example: {'Subject ID': 'subject_id'} |
86
|
|
|
If None will make a dictionary from the open crumbs arguments, e.g., |
87
|
|
|
{'subject_id': 'subject_id'}. |
88
|
|
|
|
89
|
|
|
The values of this dict will be used to filter the columns |
90
|
|
|
in `df` and the crumb arguments in `crumb`. |
91
|
|
|
|
92
|
|
|
You may need to rename the columns of `df` before using this. |
93
|
|
|
|
94
|
|
|
Returns |
95
|
|
|
------- |
96
|
|
|
crumbs: generator of crumbs |
97
|
|
|
Crumbs filled with the data in `df`. |
98
|
|
|
""" |
99
|
|
|
if names_map is None: |
100
|
|
|
names_map = {arg_name: arg_name for arg_name in crumb.open_args()} |
101
|
|
|
|
102
|
|
|
nmap = names_map |
103
|
|
|
if not isinstance(nmap, dict): |
104
|
|
|
nmap = dict(nmap) |
105
|
|
|
|
106
|
|
|
values_map = (df |
107
|
|
|
.pipe(_pandas_rename_cols, nmap) |
108
|
|
|
.pipe(df_to_valuesmap, list(crumb.all_args()), arg_names=list(nmap.values())) |
109
|
|
|
) |
110
|
|
|
|
111
|
|
|
yield from (crumb.replace(**dict(argvals)) for argvals in values_map) |
112
|
|
|
|