pandas_fill_crumbs() - Code Metrics - Inspection of "Update README.rst" - alexsavio/hansel - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( da0b24...226022 )

by Alexandre M.

created 2016-04-14 14:53 UTC

pandas_fill_crumbs() B

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
cc	5
dl	0
loc	40
rs	8.0894

# -*- coding: utf-8 -*-
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
"""
Utilities to fill crumbs with data from pandas DataFrames.
#TODO: add tests
"""
from hansel.utils import _get_matching_items


def _pandas_rename_cols(df, col_map):
    """ Return a copy of `df` with the columns renamed as in `col_map`.
    Parameters
    ----------
    df: pandas.DataFrame

    col_map: dict[str] -> str
        This is a "DataFrame column name" to "crumb argument name" relation
        dictionary.
        Example: {'Subject ID': 'subject_id'}

    Returns
    -------
    renamed: pandas.DataFrame
    """
    renamed = df.copy()
    renamed.columns = [col_map.get(col_name, col_name) for col_name in df.columns]
    return renamed


def df_to_valuesmap(df, crumb_arg_names, arg_names=None):
    """ Return a values_map from data in `df` and
    the matching column and arguments names from `df`, `crumb_arg_names`
    and `arg_names`.
    Parameters
    ----------
    df: pandas.DataFrame

    crumb: hansel.Crumb

    arg_names: sequence of str
        A list of the crumb arguments and DataFrame columns to extract
        the info to fill the crumbs.
        Both must match, or use _pandas_rename_cols to rename the columns.
        If None, will look for all the arguments that match in both
        `df` and `arg_names`.
        Example: ['subject_id']


    Returns
    -------
    values_map: list of sequences of 2-tuple
    """
    crumb_names = _get_matching_items(df.columns,
                                      crumb_arg_names,
                                      arg_names)

    # get the columns of df that have been matched
    return (list(rec.items()) for rec in df[crumb_names].to_dict(orient='records'))


def pandas_fill_crumbs(df, crumb, names_map=None):
    """ Create a generator of crumbs filled with the `df` column names and `crumb`
    arguments that match or the ones indicated in `names_map`.
    Parameters
    ----------
    df: pandas.DataFrame

    crumb: hansel.Crumb

    names_map: sequence of sequences of 2-tuple or dict[str] -> str
        This is a "DataFrame column name" to "crumb argument name" relation
        dictionary.
        Example: {'Subject ID': 'subject_id'}
        If None will make a dictionary from the open crumbs arguments, e.g.,
        {'subject_id': 'subject_id'}.

        The values of this dict will be used to filter the columns
        in `df` and the crumb arguments in `crumb`.

        You may need to rename the columns of `df` before using this.

    Returns
    -------
    crumbs: generator of crumbs
        Crumbs filled with the data in `df`.
    """
    if names_map is None:
        names_map = {arg_name: arg_name for arg_name in crumb.open_args()}

    nmap = names_map
    if not isinstance(nmap, dict):
        nmap = dict(nmap)

    values_map = (df
                    .pipe(_pandas_rename_cols, nmap)
                    .pipe(df_to_valuesmap, list(crumb.all_args()),
                          arg_names=list(nmap.values()))
                  )

    return (crumb.replace(**dict(argvals)) for argvals in values_map)


1			# -- coding: utf-8 --
2			# emacs: -- mode: python; py-indent-offset: 4; indent-tabs-mode: nil --
3			# vi: set ft=python sts=4 ts=4 sw=4 et:
4			"""
5			Utilities to fill crumbs with data from pandas DataFrames.
6			#TODO: add tests
7			"""
8			from hansel.utils import _get_matching_items
9
10
11			def _pandas_rename_cols(df, col_map):
12			""" Return a copy of `df` with the columns renamed as in `col_map`.
13			Parameters
14			----------
15			df: pandas.DataFrame
16
17			col_map: dict[str] -> str
18			This is a "DataFrame column name" to "crumb argument name" relation
19			dictionary.
20			Example: {'Subject ID': 'subject_id'}
21
22			Returns
23			-------
24			renamed: pandas.DataFrame
25			"""
26			renamed = df.copy()
27			renamed.columns = [col_map.get(col_name, col_name) for col_name in df.columns]
28			return renamed
29
30
31			def df_to_valuesmap(df, crumb_arg_names, arg_names=None):
32			""" Return a values_map from data in `df` and
33			the matching column and arguments names from `df`, `crumb_arg_names`
34			and `arg_names`.
35			Parameters
36			----------
37			df: pandas.DataFrame
38
39			crumb: hansel.Crumb
40
41			arg_names: sequence of str
42			A list of the crumb arguments and DataFrame columns to extract
43			the info to fill the crumbs.
44			Both must match, or use _pandas_rename_cols to rename the columns.
45			If None, will look for all the arguments that match in both
46			`df` and `arg_names`.
47			Example: ['subject_id']
48
49
50			Returns
51			-------
52			values_map: list of sequences of 2-tuple
53			"""
54			crumb_names = _get_matching_items(df.columns,
55			crumb_arg_names,
56			arg_names)
57
58			# get the columns of df that have been matched
59			return (list(rec.items()) for rec in df[crumb_names].to_dict(orient='records'))
60
61
62			def pandas_fill_crumbs(df, crumb, names_map=None):
63			""" Create a generator of crumbs filled with the `df` column names and `crumb`
64			arguments that match or the ones indicated in `names_map`.
65			Parameters
66			----------
67			df: pandas.DataFrame
68
69			crumb: hansel.Crumb
70
71			names_map: sequence of sequences of 2-tuple or dict[str] -> str
72			This is a "DataFrame column name" to "crumb argument name" relation
73			dictionary.
74			Example: {'Subject ID': 'subject_id'}
75			If None will make a dictionary from the open crumbs arguments, e.g.,
76			{'subject_id': 'subject_id'}.
77
78			The values of this dict will be used to filter the columns
79			in `df` and the crumb arguments in `crumb`.
80
81			You may need to rename the columns of `df` before using this.
82
83			Returns
84			-------
85			crumbs: generator of crumbs
86			Crumbs filled with the data in `df`.
87			"""
88			if names_map is None:
89			names_map = {arg_name: arg_name for arg_name in crumb.open_args()}
90
91			nmap = names_map
92			if not isinstance(nmap, dict):
93			nmap = dict(nmap)
94
95			values_map = (df
96			.pipe(_pandas_rename_cols, nmap)
97			.pipe(df_to_valuesmap, list(crumb.all_args()),
98			arg_names=list(nmap.values()))
99			)
100
101			return (crumb.replace(**dict(argvals)) for argvals in values_map)
102

alexsavio / hansel

Push — master ( da0b24...226022 )

pandas_fill_crumbs() B

Complexity

Size

Duplication

Duplication Side-by-Side

Filter issues like