Completed
Push — master ( da0b24...226022 )
by Alexandre M.
01:04
created

pandas_fill_crumbs()   B

Complexity

Conditions 5

Size

Total Lines 40

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 5
dl 0
loc 40
rs 8.0894
1
# -*- coding: utf-8 -*-
2
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
3
# vi: set ft=python sts=4 ts=4 sw=4 et:
4
"""
5
Utilities to fill crumbs with data from pandas DataFrames.
6
#TODO: add tests
7
"""
8
from hansel.utils import _get_matching_items
9
10
11
def _pandas_rename_cols(df, col_map):
12
    """ Return a copy of `df` with the columns renamed as in `col_map`.
13
    Parameters
14
    ----------
15
    df: pandas.DataFrame
16
17
    col_map: dict[str] -> str
18
        This is a "DataFrame column name" to "crumb argument name" relation
19
        dictionary.
20
        Example: {'Subject ID': 'subject_id'}
21
22
    Returns
23
    -------
24
    renamed: pandas.DataFrame
25
    """
26
    renamed = df.copy()
27
    renamed.columns = [col_map.get(col_name, col_name) for col_name in df.columns]
28
    return renamed
29
30
31
def df_to_valuesmap(df, crumb_arg_names, arg_names=None):
32
    """ Return a values_map from data in `df` and
33
    the matching column and arguments names from `df`, `crumb_arg_names`
34
    and `arg_names`.
35
    Parameters
36
    ----------
37
    df: pandas.DataFrame
38
39
    crumb: hansel.Crumb
40
41
    arg_names: sequence of str
42
        A list of the crumb arguments and DataFrame columns to extract
43
        the info to fill the crumbs.
44
        Both must match, or use _pandas_rename_cols to rename the columns.
45
        If None, will look for all the arguments that match in both
46
        `df` and `arg_names`.
47
        Example: ['subject_id']
48
49
50
    Returns
51
    -------
52
    values_map: list of sequences of 2-tuple
53
    """
54
    crumb_names = _get_matching_items(df.columns,
55
                                      crumb_arg_names,
56
                                      arg_names)
57
58
    # get the columns of df that have been matched
59
    return (list(rec.items()) for rec in df[crumb_names].to_dict(orient='records'))
60
61
62
def pandas_fill_crumbs(df, crumb, names_map=None):
63
    """ Create a generator of crumbs filled with the `df` column names and `crumb`
64
    arguments that match or the ones indicated in `names_map`.
65
    Parameters
66
    ----------
67
    df: pandas.DataFrame
68
69
    crumb: hansel.Crumb
70
71
    names_map: sequence of sequences of 2-tuple or dict[str] -> str
72
        This is a "DataFrame column name" to "crumb argument name" relation
73
        dictionary.
74
        Example: {'Subject ID': 'subject_id'}
75
        If None will make a dictionary from the open crumbs arguments, e.g.,
76
        {'subject_id': 'subject_id'}.
77
78
        The values of this dict will be used to filter the columns
79
        in `df` and the crumb arguments in `crumb`.
80
81
        You may need to rename the columns of `df` before using this.
82
83
    Returns
84
    -------
85
    crumbs: generator of crumbs
86
        Crumbs filled with the data in `df`.
87
    """
88
    if names_map is None:
89
        names_map = {arg_name: arg_name for arg_name in crumb.open_args()}
90
91
    nmap = names_map
92
    if not isinstance(nmap, dict):
93
        nmap = dict(nmap)
94
95
    values_map = (df
96
                    .pipe(_pandas_rename_cols, nmap)
97
                    .pipe(df_to_valuesmap, list(crumb.all_args()),
98
                          arg_names=list(nmap.values()))
99
                  )
100
101
    return (crumb.replace(**dict(argvals)) for argvals in values_map)
102