klib.describe._missing_vals() - Code Metrics - Inspection of "update descriptions and functions" - akanz1/klib - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( c0ff44...3c81b0 )

by Andreas

created 2020-04-04 12:40 UTC

klib.describe._missing_vals() A

↳ Parent: klib.describe

Complexity

Conditions

Size

Total Lines	25
Code Lines	8

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	8
dl	0
loc	25
rs	10
c	0
b	0
f	0
cc	1
nop	1

'''
Utilities for descriptive analytics.

:author: Andreas Kanz

'''

# Imports
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
import seaborn as sns


# Missing value plot
def missingval_plot(data, cmap='PuBuGn', figsize=(20, 12), sort=False, spine_color='#EEEEEE'):
    '''
    Two-dimensional visualization of the missing values in a dataset.

    Parameters
    ----------
    data: 2D dataset that can be coerced into Pandas DataFrame. If a Pandas DataFrame is provided, the index/column information is used to label the plots.

    cmap: colormap, default 'PuBuGn'
        Any valid colormap can be used. E.g. 'Greys', 'RdPu'. More information can be found in the matplotlib documentation.

    figsize: tuple, default (20,12)
        Use to control the figure size.

    sort: bool, default False
        Sort columns based on missing values in descending order and drop columns without any missing values

    spine_color: color-code, default '#EEEEEE'
    Set to 'None' to hide the spines on all plots or use any valid matplotlib color argument.

    Returns
    -------
    ax: matplotlib Axes. Axes object with the heatmap.
    '''

    data = pd.DataFrame(data)

    if sort:
        mv_cols_sorted = data.isna().sum(axis=0).sort_values(ascending=False)
        final_cols = mv_cols_sorted.drop(mv_cols_sorted[mv_cols_sorted.values == 0].keys().tolist()).keys().tolist()
        data = data[final_cols]
        print('Displaying only columns with missing values.')

    # Identify missing values
    mv_cols = data.isna().sum(axis=0)
    mv_rows = data.isna().sum(axis=1)
    mv_total = mv_cols.sum()
    mv_cols_rel = mv_cols / data.shape[0]
    total_datapoints = data.shape[0]*data.shape[1]

    if mv_total == 0:
        print('No missing values found in the dataset.')
    else:
        # Create figure and axes
        fig = plt.figure(figsize=figsize)
        grid = fig.add_gridspec(nrows=6, ncols=6, left=0.05, right=0.48, wspace=0.05)
        ax1 = fig.add_subplot(grid[:1, :5])
        ax2 = fig.add_subplot(grid[1:, :5])
        ax3 = fig.add_subplot(grid[:1, 5:])
        ax4 = fig.add_subplot(grid[1:, 5:])

        # ax1 - Barplot
        colors = plt.get_cmap(cmap)(mv_cols / np.max(mv_cols))  # color bars by height
        ax1.bar(range(len(mv_cols)), np.round((mv_cols_rel)*100, 2), color=colors)
        ax1.get_xaxis().set_visible(False)
        ax1.set(frame_on=False, xlim=(-.5, len(mv_cols)-0.5))
        ax1.set_ylim(0, np.max(mv_cols_rel)*100)
        ax1.grid(linestyle=':', linewidth=1)
        ax1.yaxis.set_major_formatter(ticker.PercentFormatter(decimals=0))
        ax1.tick_params(axis='y', colors='#111111', length=1)

        # annotate values on top of the bars
        for rect, label in zip(ax1.patches, mv_cols):
            height = rect.get_height()
            ax1.text(.1 + rect.get_x() + rect.get_width() / 2, height+0.5, label,
                     ha='center',
                     va='bottom',
                     rotation='90',
                     alpha=0.5,
                     fontsize='small')

        ax1.set_frame_on(True)
        for _, spine in ax1.spines.items():
            spine.set_visible(True)
            spine.set_color(spine_color)
        ax1.spines['top'].set_color(None)

        # ax2 - Heatmap
        sns.heatmap(data.isna(), cbar=False, cmap='binary', ax=ax2)
        ax2.set_yticks(np.round(ax2.get_yticks()[0::5], -1))
        ax2.set_yticklabels(ax2.get_yticks())
        ax2.set_xticklabels(
            ax2.get_xticklabels(),
            horizontalalignment='center',
            fontweight='light',
            fontsize='medium')
        ax2.tick_params(length=1, colors='#111111')
        for _, spine in ax2.spines.items():
            spine.set_visible(True)
            spine.set_color(spine_color)

        # ax3 - Summary
        fontax3 = {'color':  '#111111',
                   'weight': 'normal',
                   'size': 12,
                   }
        ax3.get_xaxis().set_visible(False)
        ax3.get_yaxis().set_visible(False)
        ax3.set(frame_on=False)

        ax3.text(0.1, 0.9, f"Total: {np.round(total_datapoints/1000,1)}K",
                 transform=ax3.transAxes,
                 fontdict=fontax3)
        ax3.text(0.1, 0.7, f"Missing: {np.round(mv_total/1000,1)}K",
                 transform=ax3.transAxes,
                 fontdict=fontax3)
        ax3.text(0.1, 0.5, f"Relative: {np.round(mv_total/total_datapoints*100,1)}%",
                 transform=ax3.transAxes,
                 fontdict=fontax3)
        ax3.text(0.1, 0.3, f"Max-col: {np.round(mv_cols.max()/data.shape[0]*100)}%",
                 transform=ax3.transAxes,
                 fontdict=fontax3)
        ax3.text(0.1, 0.1, f"Max-row: {np.round(mv_rows.max()/data.shape[1]*100)}%",
                 transform=ax3.transAxes,
                 fontdict=fontax3)

        # ax4 - Scatter plot
        ax4.get_yaxis().set_visible(False)
        for _, spine in ax4.spines.items():
            spine.set_color(spine_color)
        ax4.tick_params(axis='x', colors='#111111', length=1)

        ax4.scatter(mv_rows, range(len(mv_rows)), s=mv_rows, c=mv_rows, cmap=cmap, marker=".")
        ax4.set_ylim(0, len(mv_rows))
        ax4.set_ylim(ax4.get_ylim()[::-1])  # invert y-axis
        ax4.grid(linestyle=':', linewidth=1)


# Correlation matrix / heatmap
def corr_plot(data, split=None, threshold=0, cmap='BrBG', figsize=(12, 10), annot=True, dev=False, **kwargs):
    '''
    Two-dimensional visualization of the correlation between feature-columns, excluding NA values.

    Parameters
    ----------
    data: 2D dataset that can be coerced into Pandas DataFrame. If a Pandas DataFrame is provided, the index/column information is used to label the plots.

    split: {None, 'pos', 'neg', 'high', 'low'}, default None
        Type of split to be performed.

        * None: visualize all correlations between the feature-columns.
        * pos: visualize all positive correlations between the feature-columns above the threshold.
        * neg: visualize all negative correlations between the feature-columns below the threshold.
        * high: visualize all correlations between the feature-columns for which abs(corr) > threshold is True.
        * low: visualize all correlations between the feature-columns for which abs(corr) < threshold is True.

    threshold: float, default 0
        Value between 0 <= threshold <= 1

    cmap: matplotlib colormap name or object, or list of colors, default 'BrBG'
        The mapping from data values to color space.

    figsize: tuple, default (12, 10)
        Use to control the figure size.

    annot: bool, default True
        Use to show or hide annotations.

    dev: bool, default False
        Display figure settings in the plot by setting dev = True. If False, the settings are not displayed. Use for presentations.

    **kwargs: optional
        Additional elements to control the visualization of the plot, e.g.:

        * mask: bool, default True
        If set to False the entire correlation matrix, including the upper triangle is shown. Set dev = False in this case to avoid overlap.
        * vmax: float, default is calculated from the given correlation coefficients.
        Value between -1 or vmin <= vmax <= 1, limits the range of the colorbar.
        * vmin: float, default is calculated from the given correlation coefficients.
        Value between -1 <= vmin <= 1 or vmax, limits the range of the colorbar.
        * linewidths: float, default 0.5
        Controls the line-width inbetween the squares.
        * annot_kws: dict, default {'size' : 10}
        Controls the font size of the annotations. Only available when annot = True.
        * cbar_kws: dict, default {'shrink': .95, 'aspect': 30}
        Controls the size of the colorbar.
        * Many more kwargs are available, i.e. 'alpha' to control blending, or options to adjust labels, ticks ...

        Kwargs can be supplied through a dictionary of key-value pairs (see above).

    Returns
    -------
    ax: matplotlib Axes. Axes object with the heatmap.
    '''

    data = pd.DataFrame(data)

    if split == 'pos':
        corr = data.corr().where((data.corr() >= threshold) & (data.corr() > 0))
        print('Displaying positive correlations. Use "threshold" to further limit the results.')
    elif split == 'neg':
        corr = data.corr().where((data.corr() <= threshold) & (data.corr() < 0))
        print('Displaying negative correlations. Use "threshold" to further limit the results.')
    elif split == 'high':
        corr = data.corr().where(np.abs(data.corr()) >= threshold)
        print('Displaying absolute correlations above a chosen threshold.')
    elif split == 'low':
        corr = data.corr().where(np.abs(data.corr()) <= threshold)
        print('Displaying absolute correlations below a chosen threshold.')
    else:
        corr = data.corr()
        split = 'None'
        threshold = 'None'

    # Generate mask for the upper triangle
    mask = np.triu(np.ones_like(corr, dtype=np.bool))

    # Compute dimensions and correlation range to adjust settings
    vmax = np.round(np.nanmax(corr.where(mask == False))-0.05, 2)
    vmin = np.round(np.nanmin(corr.where(mask == False))+0.05, 2)

    # Set up the matplotlib figure and generate colormap
    fig, ax = plt.subplots(figsize=figsize)

    # kwargs for the heatmap
    kwargs = {'mask': mask,
              'cmap': cmap,
              'annot': annot,
              'vmax': vmax,
              'vmin': vmin,
              'linewidths': .5,
              'annot_kws': {'size': 10},
              'cbar_kws': {'shrink': .95, 'aspect': 30},
              **kwargs}

    # Draw heatmap with mask and some default settings
    sns.heatmap(corr,
                center=0,
                square=True,
                fmt='.2f',
                **kwargs
                )

    ax.set_title('Feature-correlation Matrix', fontdict={'fontsize': 18})

    if dev:  # show settings
        fig.suptitle(f"\
            Settings (dev-mode): \n\
            - split-mode: {split} \n\
            - threshold: {threshold} \n\
            - annotations: {annot} \n\
            - cbar: \n\
                - vmax: {vmax} \n\
                - vmin: {vmin} \n\
            - linewidths: {kwargs['linewidths']} \n\
            - annot_kws: {kwargs['annot_kws']} \n\
            - cbar_kws: {kwargs['cbar_kws']}",
                     fontsize=12,
                     color='gray',
                     x=0.35,
                     y=0.85,
                     ha='left')


# _functions

def _memory_usage(data):
    '''
    Gives the total memory usage in kilobytes.

    Parameters
    ----------
    data: 2D dataset that can be coerced into Pandas DataFrame. If a Pandas DataFrame is provided, the index/column information is used to label the plots.

    Returns
    -------
    memory_usage: float

    '''

    data = pd.DataFrame(data)
    memory_usage = round(data.memory_usage(index=True, deep=True).sum()/1024, 2)

    return memory_usage


def _missing_vals(data):
    '''
    Gives metrics of missing values in the dataset.

    Parameters
    ----------
    data: 2D dataset that can be coerced into Pandas DataFrame. If a Pandas DataFrame is provided, the index/column information is used to label the plots.

    Returns
    -------
    total_mv: float, number of missing values in the entire dataset
    rows_mv: float, number of missing values in each row
    cols_mv: float, number of missing values in each column
    rows_mv_ratio: float, ratio of missing values for each row
    cols_mv_ratio: float, ratio of missing values for each column
    '''

    data = pd.DataFrame(data)
    rows_mv = data.isna().sum(axis=0)
    cols_mv = data.isna().sum(axis=1)
    total_mv = data.isna().sum().sum()
    rows_mv_ratio = rows_mv/data.shape[0]
    cols_mv_ratio = cols_mv/data.shape[1]

    return total_mv, rows_mv, cols_mv, rows_mv_ratio, cols_mv_ratio


1			'''
2			Utilities for descriptive analytics.
3
4			:author: Andreas Kanz
5
6			'''
7
8			# Imports
9			import matplotlib.pyplot as plt
10			import matplotlib.ticker as ticker
11			import numpy as np
12			import pandas as pd
13			import seaborn as sns
14
15
16			# Missing value plot
17			def missingval_plot(data, cmap='PuBuGn', figsize=(20, 12), sort=False, spine_color='#EEEEEE'):
18			'''
19			Two-dimensional visualization of the missing values in a dataset.
20
21			Parameters
22			----------
23			data: 2D dataset that can be coerced into Pandas DataFrame. If a Pandas DataFrame is provided, the index/column information is used to label the plots.
24
25			cmap: colormap, default 'PuBuGn'
26			Any valid colormap can be used. E.g. 'Greys', 'RdPu'. More information can be found in the matplotlib documentation.
27
28			figsize: tuple, default (20,12)
29			Use to control the figure size.
30
31			sort: bool, default False
32			Sort columns based on missing values in descending order and drop columns without any missing values
33
34			spine_color: color-code, default '#EEEEEE'
35			Set to 'None' to hide the spines on all plots or use any valid matplotlib color argument.
36
37			Returns
38			-------
39			ax: matplotlib Axes. Axes object with the heatmap.
40			'''
41
42			data = pd.DataFrame(data)
43
44			if sort:
45			mv_cols_sorted = data.isna().sum(axis=0).sort_values(ascending=False)
46			final_cols = mv_cols_sorted.drop(mv_cols_sorted[mv_cols_sorted.values == 0].keys().tolist()).keys().tolist()
47			data = data[final_cols]
48			print('Displaying only columns with missing values.')
49
50			# Identify missing values
51			mv_cols = data.isna().sum(axis=0)
52			mv_rows = data.isna().sum(axis=1)
53			mv_total = mv_cols.sum()
54			mv_cols_rel = mv_cols / data.shape[0]
55			total_datapoints = data.shape[0]*data.shape[1]
56
57			if mv_total == 0:
58			print('No missing values found in the dataset.')
59			else:
60			# Create figure and axes
61			fig = plt.figure(figsize=figsize)
62			grid = fig.add_gridspec(nrows=6, ncols=6, left=0.05, right=0.48, wspace=0.05)
63			ax1 = fig.add_subplot(grid[:1, :5])
64			ax2 = fig.add_subplot(grid[1:, :5])
65			ax3 = fig.add_subplot(grid[:1, 5:])
66			ax4 = fig.add_subplot(grid[1:, 5:])
67
68			# ax1 - Barplot
69			colors = plt.get_cmap(cmap)(mv_cols / np.max(mv_cols)) # color bars by height
70			ax1.bar(range(len(mv_cols)), np.round((mv_cols_rel)*100, 2), color=colors)
71			ax1.get_xaxis().set_visible(False)
72			ax1.set(frame_on=False, xlim=(-.5, len(mv_cols)-0.5))
73			ax1.set_ylim(0, np.max(mv_cols_rel)*100)
74			ax1.grid(linestyle=':', linewidth=1)
75			ax1.yaxis.set_major_formatter(ticker.PercentFormatter(decimals=0))
76			ax1.tick_params(axis='y', colors='#111111', length=1)
77
78			# annotate values on top of the bars
79			for rect, label in zip(ax1.patches, mv_cols):
80			height = rect.get_height()
81			ax1.text(.1 + rect.get_x() + rect.get_width() / 2, height+0.5, label,
82			ha='center',
83			va='bottom',
84			rotation='90',
85			alpha=0.5,
86			fontsize='small')
87
88			ax1.set_frame_on(True)
89			for _, spine in ax1.spines.items():
90			spine.set_visible(True)
91			spine.set_color(spine_color)
92			ax1.spines['top'].set_color(None)
93
94			# ax2 - Heatmap
95			sns.heatmap(data.isna(), cbar=False, cmap='binary', ax=ax2)
96			ax2.set_yticks(np.round(ax2.get_yticks()[0::5], -1))
97			ax2.set_yticklabels(ax2.get_yticks())
98			ax2.set_xticklabels(
99			ax2.get_xticklabels(),
100			horizontalalignment='center',
101			fontweight='light',
102			fontsize='medium')
103			ax2.tick_params(length=1, colors='#111111')
104			for _, spine in ax2.spines.items():
105			spine.set_visible(True)
106			spine.set_color(spine_color)
107
108			# ax3 - Summary
109			fontax3 = {'color': '#111111',
110			'weight': 'normal',
111			'size': 12,
112			}
113			ax3.get_xaxis().set_visible(False)
114			ax3.get_yaxis().set_visible(False)
115			ax3.set(frame_on=False)
116
117			ax3.text(0.1, 0.9, f"Total: {np.round(total_datapoints/1000,1)}K",
118			transform=ax3.transAxes,
119			fontdict=fontax3)
120			ax3.text(0.1, 0.7, f"Missing: {np.round(mv_total/1000,1)}K",
121			transform=ax3.transAxes,
122			fontdict=fontax3)
123			ax3.text(0.1, 0.5, f"Relative: {np.round(mv_total/total_datapoints*100,1)}%",
124			transform=ax3.transAxes,
125			fontdict=fontax3)
126			ax3.text(0.1, 0.3, f"Max-col: {np.round(mv_cols.max()/data.shape[0]*100)}%",
127			transform=ax3.transAxes,
128			fontdict=fontax3)
129			ax3.text(0.1, 0.1, f"Max-row: {np.round(mv_rows.max()/data.shape[1]*100)}%",
130			transform=ax3.transAxes,
131			fontdict=fontax3)
132
133			# ax4 - Scatter plot
134			ax4.get_yaxis().set_visible(False)
135			for _, spine in ax4.spines.items():
136			spine.set_color(spine_color)
137			ax4.tick_params(axis='x', colors='#111111', length=1)
138
139			ax4.scatter(mv_rows, range(len(mv_rows)), s=mv_rows, c=mv_rows, cmap=cmap, marker=".")
140			ax4.set_ylim(0, len(mv_rows))
141			ax4.set_ylim(ax4.get_ylim()[::-1]) # invert y-axis
142			ax4.grid(linestyle=':', linewidth=1)
143
144
145			# Correlation matrix / heatmap
146			def corr_plot(data, split=None, threshold=0, cmap='BrBG', figsize=(12, 10), annot=True, dev=False, **kwargs):
147			'''
148			Two-dimensional visualization of the correlation between feature-columns, excluding NA values.
149
150			Parameters
151			----------
152			data: 2D dataset that can be coerced into Pandas DataFrame. If a Pandas DataFrame is provided, the index/column information is used to label the plots.
153
154			split: {None, 'pos', 'neg', 'high', 'low'}, default None
155			Type of split to be performed.
156
157			* None: visualize all correlations between the feature-columns.
158			* pos: visualize all positive correlations between the feature-columns above the threshold.
159			* neg: visualize all negative correlations between the feature-columns below the threshold.
160			* high: visualize all correlations between the feature-columns for which abs(corr) > threshold is True.
161			* low: visualize all correlations between the feature-columns for which abs(corr) < threshold is True.
162
163			threshold: float, default 0
164			Value between 0 <= threshold <= 1
165
166			cmap: matplotlib colormap name or object, or list of colors, default 'BrBG'
167			The mapping from data values to color space.
168
169			figsize: tuple, default (12, 10)
170			Use to control the figure size.
171
172			annot: bool, default True
173			Use to show or hide annotations.
174
175			dev: bool, default False
176			Display figure settings in the plot by setting dev = True. If False, the settings are not displayed. Use for presentations.
177
178			**kwargs: optional
179			Additional elements to control the visualization of the plot, e.g.:
180
181			* mask: bool, default True
182			If set to False the entire correlation matrix, including the upper triangle is shown. Set dev = False in this case to avoid overlap.
183			* vmax: float, default is calculated from the given correlation coefficients.
184			Value between -1 or vmin <= vmax <= 1, limits the range of the colorbar.
185			* vmin: float, default is calculated from the given correlation coefficients.
186			Value between -1 <= vmin <= 1 or vmax, limits the range of the colorbar.
187			* linewidths: float, default 0.5
188			Controls the line-width inbetween the squares.
189			* annot_kws: dict, default {'size' : 10}
190			Controls the font size of the annotations. Only available when annot = True.
191			* cbar_kws: dict, default {'shrink': .95, 'aspect': 30}
192			Controls the size of the colorbar.
193			* Many more kwargs are available, i.e. 'alpha' to control blending, or options to adjust labels, ticks ...
194
195			Kwargs can be supplied through a dictionary of key-value pairs (see above).
196
197			Returns
198			-------
199			ax: matplotlib Axes. Axes object with the heatmap.
200			'''
201
202			data = pd.DataFrame(data)
203
204			if split == 'pos':
205			corr = data.corr().where((data.corr() >= threshold) & (data.corr() > 0))
206			print('Displaying positive correlations. Use "threshold" to further limit the results.')
207			elif split == 'neg':
208			corr = data.corr().where((data.corr() <= threshold) & (data.corr() < 0))
209			print('Displaying negative correlations. Use "threshold" to further limit the results.')
210			elif split == 'high':
211			corr = data.corr().where(np.abs(data.corr()) >= threshold)
212			print('Displaying absolute correlations above a chosen threshold.')
213			elif split == 'low':
214			corr = data.corr().where(np.abs(data.corr()) <= threshold)
215			print('Displaying absolute correlations below a chosen threshold.')
216			else:
217			corr = data.corr()
218			split = 'None'
219			threshold = 'None'
220
221			# Generate mask for the upper triangle
222			mask = np.triu(np.ones_like(corr, dtype=np.bool))
223
224			# Compute dimensions and correlation range to adjust settings
225			vmax = np.round(np.nanmax(corr.where(mask == False))-0.05, 2)
226			vmin = np.round(np.nanmin(corr.where(mask == False))+0.05, 2)
227
228			# Set up the matplotlib figure and generate colormap
229			fig, ax = plt.subplots(figsize=figsize)
230
231			# kwargs for the heatmap
232			kwargs = {'mask': mask,
233			'cmap': cmap,
234			'annot': annot,
235			'vmax': vmax,
236			'vmin': vmin,
237			'linewidths': .5,
238			'annot_kws': {'size': 10},
239			'cbar_kws': {'shrink': .95, 'aspect': 30},
240			**kwargs}
241
242			# Draw heatmap with mask and some default settings
243			sns.heatmap(corr,
244			center=0,
245			square=True,
246			fmt='.2f',
247			**kwargs
248			)
249
250			ax.set_title('Feature-correlation Matrix', fontdict={'fontsize': 18})
251
252			if dev: # show settings
253			fig.suptitle(f"\
254			Settings (dev-mode): \n\
255			- split-mode: {split} \n\
256			- threshold: {threshold} \n\
257			- annotations: {annot} \n\
258			- cbar: \n\
259			- vmax: {vmax} \n\
260			- vmin: {vmin} \n\
261			- linewidths: {kwargs['linewidths']} \n\
262			- annot_kws: {kwargs['annot_kws']} \n\
263			- cbar_kws: {kwargs['cbar_kws']}",
264			fontsize=12,
265			color='gray',
266			x=0.35,
267			y=0.85,
268			ha='left')
269
270
271			# _functions
272
273			def _memory_usage(data):
274			'''
275			Gives the total memory usage in kilobytes.
276
277			Parameters
278			----------
279			data: 2D dataset that can be coerced into Pandas DataFrame. If a Pandas DataFrame is provided, the index/column information is used to label the plots.
280
281			Returns
282			-------
283			memory_usage: float
284
285			'''
286
287			data = pd.DataFrame(data)
288			memory_usage = round(data.memory_usage(index=True, deep=True).sum()/1024, 2)
289
290			return memory_usage
291
292
293			def _missing_vals(data):
294			'''
295			Gives metrics of missing values in the dataset.
296
297			Parameters
298			----------
299			data: 2D dataset that can be coerced into Pandas DataFrame. If a Pandas DataFrame is provided, the index/column information is used to label the plots.
300
301			Returns
302			-------
303			total_mv: float, number of missing values in the entire dataset
304			rows_mv: float, number of missing values in each row
305			cols_mv: float, number of missing values in each column
306			rows_mv_ratio: float, ratio of missing values for each row
307			cols_mv_ratio: float, ratio of missing values for each column
308			'''
309
310			data = pd.DataFrame(data)
311			rows_mv = data.isna().sum(axis=0)
312			cols_mv = data.isna().sum(axis=1)
313			total_mv = data.isna().sum().sum()
314			rows_mv_ratio = rows_mv/data.shape[0]
315			cols_mv_ratio = cols_mv/data.shape[1]
316
317			return total_mv, rows_mv, cols_mv, rows_mv_ratio, cols_mv_ratio
318

akanz1 / klib

GitHub Access Token became invalid

Push — master ( c0ff44...3c81b0 )

klib.describe._missing_vals() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like