Completed
Push — master ( d09250...b67a9c )
by Alexandre M.
9s
created

pandas_fill_crumbs()   B

Complexity

Conditions 5

Size

Total Lines 39

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 1 Features 2
Metric Value
cc 5
c 3
b 1
f 2
dl 0
loc 39
rs 8.0894
1
# -*- coding: utf-8 -*-
2
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
3
# vi: set ft=python sts=4 ts=4 sw=4 et:
4
"""
5
Utilities to fill crumbs with data from pandas DataFrames.
6
#TODO: add tests
7
"""
8
from typing import Iterator, Dict
9
10
import hansel
11
from hansel.utils import _get_matching_items, CrumbArgsSequences
12
13
14
def _pandas_rename_cols(df: 'pandas.DataFrame', col_map: Dict[str, str]) -> 'pandas.DataFrame':
15
    """ Return a copy of `df` with the columns renamed as in `col_map`.
16
    Parameters
17
    ----------
18
    df: pandas.DataFrame
19
20
    col_map: dict[str] -> str
21
        This is a "DataFrame column name" to "crumb argument name" relation
22
        dictionary.
23
        Example: {'Subject ID': 'subject_id'}
24
25
    Returns
26
    -------
27
    renamed: pandas.DataFrame
28
    """
29
    renamed = df.copy()
30
    renamed.columns = [col_map.get(col_name, col_name) for col_name in df.columns]
31
    return renamed
32
33
34
def df_to_valuesmap(
35
    df: 'pandas.DataFrame',
36
    crumb_arg_names: Iterator[str],
37
    arg_names: Iterator[str]=None
38
) -> CrumbArgsSequences:
39
    """ Return a values_map from data in `df` and
40
    the matching column and arguments names from `df`, `crumb_arg_names`
41
    and `arg_names`.
42
    Parameters
43
    ----------
44
    df: pandas.DataFrame
45
46
    crumb_arg_names:
47
48
    arg_names: sequence of str
49
        A list of the crumb arguments and DataFrame columns to extract
50
        the info to fill the crumbs.
51
        Both must match, or use _pandas_rename_cols to rename the columns.
52
        If None, will look for all the arguments that match in both
53
        `df` and `arg_names`.
54
        Example: ['subject_id']
55
56
57
    Returns
58
    -------
59
    values_map: list of sequences of 2-tuple
60
    """
61
    crumb_names = _get_matching_items(df.columns,
62
                                      crumb_arg_names,
63
                                      arg_names)
64
65
    # get the columns of df that have been matched
66
    return (list(rec.items()) for rec in df[crumb_names].to_dict(orient='records'))
67
68
69
def pandas_fill_crumbs(
70
    df: 'pandas.DataFrame',
71
    crumb: hansel.Crumb,
72
    names_map: CrumbArgsSequences=None
73
) -> Iterator[hansel.Crumb]:
74
    """ Create a generator of crumbs filled with the `df` column names and `crumb`
75
    arguments that match or the ones indicated in `names_map`.
76
    Parameters
77
    ----------
78
    df: pandas.DataFrame
79
80
    crumb: hansel.Crumb
81
82
    names_map: sequence of sequences of 2-tuple or dict[str] -> str
83
        This is a "DataFrame column name" to "crumb argument name" relation
84
        dictionary.
85
        Example: {'Subject ID': 'subject_id'}
86
        If None will make a dictionary from the open crumbs arguments, e.g.,
87
        {'subject_id': 'subject_id'}.
88
89
        The values of this dict will be used to filter the columns
90
        in `df` and the crumb arguments in `crumb`.
91
92
        You may need to rename the columns of `df` before using this.
93
94
    Returns
95
    -------
96
    crumbs: generator of crumbs
97
        Crumbs filled with the data in `df`.
98
    """
99
    if names_map is None:
100
        names_map = {arg_name: arg_name for arg_name in crumb.open_args()}
101
102
    nmap = names_map
103
    if not isinstance(nmap, dict):
104
        nmap = dict(nmap)
105
106
    values_map = (df
107
                  .pipe(_pandas_rename_cols, nmap)
108
                  .pipe(df_to_valuesmap, list(crumb.all_args()), arg_names=list(nmap.values()))
109
                  )
110
111
    yield from (crumb.replace(**dict(argvals)) for argvals in values_map)
112