klib.utils._validate_input_int() - Code Metrics - Inspection of "Merge branch 'master' of github.com:akanz1/klib" - akanz1/klib - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( dd7fa7...5946a8 )

by Andreas

created 2020-04-22 14:43 UTC

klib.utils._validate_input_int() A

↳ Parent: klib.utils

Complexity

Conditions

Size

Total Lines	3
Code Lines	3

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	2
eloc	3
nop	2
dl	0
loc	3
rs	10
c	0
b	0
f	0

'''
Utilities and auxiliary functions.

:author: Andreas Kanz

'''

# Imports
import numpy as np
import pandas as pd


def _corr_selector(corr, split=None, threshold=0):
    '''
    Select correlations based on the provided parameters.

    Parameters
    ----------
    corr: List or matrix of correlations.

    split: {None, 'pos', 'neg', 'high', 'low'}, default None
        Type of split to be performed.

    threshold: float, default 0
        Value between 0 <= threshold <= 1

    Returns:
    -------
    corr: List or matrix of (filtered) correlations.
    '''

    if split == 'pos':
        corr = corr.where((corr >= threshold) & (corr > 0))
        print('Displaying positive correlations. Use "threshold" to further limit the results.')
    elif split == 'neg':
        corr = corr.where((corr <= threshold) & (corr < 0))
        print('Displaying negative correlations. Use "threshold" to further limit the results.')
    elif split == 'high':
        corr = corr.where(np.abs(corr) >= threshold)
        print(f'Displaying absolute correlations above the threshold ({threshold}).')
    elif split == 'low':
        corr = corr.where(np.abs(corr) <= threshold)
        print(f'Displaying absolute correlations below the threshold ({threshold}).')
    else:
        corr = corr

    return corr


def _drop_duplicates(data):
    '''
    Provides information and drops duplicate rows.

    Parameters
    ----------
    data: 2D dataset that can be coerced into Pandas DataFrame.

    Returns
    -------
    data: Deduplicated Pandas DataFrame
    rows_dropped: Index Object of rows dropped.
    '''

    data = pd.DataFrame(data).copy()
    rows_dropped = data[data.duplicated()].index
    data = data.drop_duplicates()

    return data, rows_dropped


def _memory_usage(data):
    '''
    Gives the total memory usage in kilobytes.

    Parameters
    ----------
    data: 2D dataset that can be coerced into Pandas DataFrame.

    Returns
    -------
    memory_usage: float
    '''

    data = pd.DataFrame(data).copy()
    memory_usage = round(data.memory_usage(index=True, deep=True).sum()/1024, 2)

    return memory_usage


def _missing_vals(data):
    '''
    Gives metrics of missing values in the dataset.

    Parameters
    ----------
    data: 2D dataset that can be coerced into Pandas DataFrame.

    Returns
    -------
    mv_total: float, number of missing values in the entire dataset
    mv_rows: float, number of missing values in each row
    mv_cols: float, number of missing values in each column
    mv_rows_ratio: float, ratio of missing values for each row
    mv_cols_ratio: float, ratio of missing values for each column
    '''

    data = pd.DataFrame(data).copy()
    mv_rows = data.isna().sum(axis=1)
    mv_cols = data.isna().sum(axis=0)
    mv_total = data.isna().sum().sum()
    mv_rows_ratio = mv_rows/data.shape[1]
    mv_cols_ratio = mv_cols/data.shape[0]

    return {'mv_total': mv_total,
            'mv_rows': mv_rows,
            'mv_cols': mv_cols,
            'mv_rows_ratio': mv_rows_ratio,
            'mv_cols_ratio': mv_cols_ratio}


def _validate_input_range(value, desc, lower, upper):
    if value < lower or value > upper:
        raise ValueError(
            f'Input value for {desc} is {value} but should be in the range {lower} <= {desc} <= {upper}.')


def _validate_input_int(value, desc):
    if type(value) != int:
        raise TypeError(f'Input value for {desc} is {type(value)} but should be an integer.')


def _validate_input_bool(value, desc):
    if not(isinstance(value, bool)):
        raise ValueError(f'Input value for {desc} is {value} but should be boolean.')


1			'''
2			Utilities and auxiliary functions.
3
4			:author: Andreas Kanz
5
6			'''
7
8			# Imports
9			import numpy as np
10			import pandas as pd
11
12
13			def _corr_selector(corr, split=None, threshold=0):
14			'''
15			Select correlations based on the provided parameters.
16
17			Parameters
18			----------
19			corr: List or matrix of correlations.
20
21			split: {None, 'pos', 'neg', 'high', 'low'}, default None
22			Type of split to be performed.
23
24			threshold: float, default 0
25			Value between 0 <= threshold <= 1
26
27			Returns:
28			-------
29			corr: List or matrix of (filtered) correlations.
30			'''
31
32			if split == 'pos':
33			corr = corr.where((corr >= threshold) & (corr > 0))
34			print('Displaying positive correlations. Use "threshold" to further limit the results.')
35			elif split == 'neg':
36			corr = corr.where((corr <= threshold) & (corr < 0))
37			print('Displaying negative correlations. Use "threshold" to further limit the results.')
38			elif split == 'high':
39			corr = corr.where(np.abs(corr) >= threshold)
40			print(f'Displaying absolute correlations above the threshold ({threshold}).')
41			elif split == 'low':
42			corr = corr.where(np.abs(corr) <= threshold)
43			print(f'Displaying absolute correlations below the threshold ({threshold}).')
44			else:
45			corr = corr
46
47			return corr
48
49
50			def _drop_duplicates(data):
51			'''
52			Provides information and drops duplicate rows.
53
54			Parameters
55			----------
56			data: 2D dataset that can be coerced into Pandas DataFrame.
57
58			Returns
59			-------
60			data: Deduplicated Pandas DataFrame
61			rows_dropped: Index Object of rows dropped.
62			'''
63
64			data = pd.DataFrame(data).copy()
65			rows_dropped = data[data.duplicated()].index
66			data = data.drop_duplicates()
67
68			return data, rows_dropped
69
70
71			def _memory_usage(data):
72			'''
73			Gives the total memory usage in kilobytes.
74
75			Parameters
76			----------
77			data: 2D dataset that can be coerced into Pandas DataFrame.
78
79			Returns
80			-------
81			memory_usage: float
82			'''
83
84			data = pd.DataFrame(data).copy()
85			memory_usage = round(data.memory_usage(index=True, deep=True).sum()/1024, 2)
86
87			return memory_usage
88
89
90			def _missing_vals(data):
91			'''
92			Gives metrics of missing values in the dataset.
93
94			Parameters
95			----------
96			data: 2D dataset that can be coerced into Pandas DataFrame.
97
98			Returns
99			-------
100			mv_total: float, number of missing values in the entire dataset
101			mv_rows: float, number of missing values in each row
102			mv_cols: float, number of missing values in each column
103			mv_rows_ratio: float, ratio of missing values for each row
104			mv_cols_ratio: float, ratio of missing values for each column
105			'''
106
107			data = pd.DataFrame(data).copy()
108			mv_rows = data.isna().sum(axis=1)
109			mv_cols = data.isna().sum(axis=0)
110			mv_total = data.isna().sum().sum()
111			mv_rows_ratio = mv_rows/data.shape[1]
112			mv_cols_ratio = mv_cols/data.shape[0]
113
114			return {'mv_total': mv_total,
115			'mv_rows': mv_rows,
116			'mv_cols': mv_cols,
117			'mv_rows_ratio': mv_rows_ratio,
118			'mv_cols_ratio': mv_cols_ratio}
119
120
121			def _validate_input_range(value, desc, lower, upper):
122			if value < lower or value > upper:
123			raise ValueError(
124			f'Input value for {desc} is {value} but should be in the range {lower} <= {desc} <= {upper}.')
125
126
127			def _validate_input_int(value, desc):
128			if type(value) != int:
129			raise TypeError(f'Input value for {desc} is {type(value)} but should be an integer.')
130
131
132			def _validate_input_bool(value, desc):
133			if not(isinstance(value, bool)):
134			raise ValueError(f'Input value for {desc} is {value} but should be boolean.')
135

akanz1 / klib

GitHub Access Token became invalid

Push — master ( dd7fa7...5946a8 )

klib.utils._validate_input_int() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like