GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Push — master ( 4f98db...92a4ba )
by Andreas
01:40
created

klib.utils._corr_selector()   A

Complexity

Conditions 5

Size

Total Lines 32
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 15
nop 3
dl 0
loc 32
rs 9.1832
c 0
b 0
f 0
1
'''
2
Utilities and auxiliary functions.
3
4
:author: Andreas Kanz
5
6
'''
7
8
# Imports
9
import numpy as np
10
import pandas as pd
11
12
13
def _corr_selector(corr, split=None, threshold=0):
14
    '''
15
    Parameters
16
    ----------
17
    corr: List or matrix of correlations.
18
19
    split: {None, 'pos', 'neg', 'high', 'low'}, default None
20
        Type of split to be performed.
21
22
    threshold: float, default 0
23
        Value between 0 <= threshold <= 1
24
25
    Returns:
26
    -------
27
    corr: List or matrix of (filtered) correlations.
28
    '''
29
    if split == 'pos':
30
        corr = corr.where((corr >= threshold) & (corr > 0))
31
        print('Displaying positive correlations. Use "threshold" to further limit the results.')
32
    elif split == 'neg':
33
        corr = corr.where((corr <= threshold) & (corr < 0))
34
        print('Displaying negative correlations. Use "threshold" to further limit the results.')
35
    elif split == 'high':
36
        corr = corr.where(np.abs(corr) >= threshold)
37
        print('Displaying absolute correlations above a chosen threshold.')
38
    elif split == 'low':
39
        corr = corr.where(np.abs(corr) <= threshold)
40
        print('Displaying absolute correlations below a chosen threshold.')
41
    else:
42
        corr = corr
43
44
    return corr
45
46
47
def _drop_duplicates(data):
48
    '''
49
    Provides information and drops duplicate rows.
50
51
    Parameters
52
    ----------
53
    data: 2D dataset that can be coerced into Pandas DataFrame.
54
55
    Returns
56
    -------
57
    data: Deduplicated Pandas DataFrame
58
    rows_dropped: Index Object of rows dropped.
59
    '''
60
61
    data = pd.DataFrame(data).copy()
62
    rows_dropped = data[data.duplicated()].index
63
    data = data.drop_duplicates()
64
65
    return data, rows_dropped
66
67
68
def _memory_usage(data):
69
    '''
70
    Gives the total memory usage in kilobytes.
71
72
    Parameters
73
    ----------
74
    data: 2D dataset that can be coerced into Pandas DataFrame.
75
76
    Returns
77
    -------
78
    memory_usage: float
79
    '''
80
81
    data = pd.DataFrame(data).copy()
82
    memory_usage = round(data.memory_usage(index=True, deep=True).sum()/1024, 2)
83
84
    return memory_usage
85
86
87
def _missing_vals(data):
88
    '''
89
    Gives metrics of missing values in the dataset.
90
91
    Parameters
92
    ----------
93
    data: 2D dataset that can be coerced into Pandas DataFrame.
94
95
    Returns
96
    -------
97
    mv_total: float, number of missing values in the entire dataset
98
    mv_rows: float, number of missing values in each row
99
    mv_cols: float, number of missing values in each column
100
    mv_rows_ratio: float, ratio of missing values for each row
101
    mv_cols_ratio: float, ratio of missing values for each column
102
    '''
103
104
    data = pd.DataFrame(data).copy()
105
    mv_rows = data.isna().sum(axis=1)
106
    mv_cols = data.isna().sum(axis=0)
107
    mv_total = data.isna().sum().sum()
108
    mv_rows_ratio = mv_rows/data.shape[1]
109
    mv_cols_ratio = mv_cols/data.shape[0]
110
111
    return {'mv_total': mv_total,
112
            'mv_rows': mv_rows,
113
            'mv_cols': mv_cols,
114
            'mv_rows_ratio': mv_rows_ratio,
115
            'mv_cols_ratio': mv_cols_ratio}
116
117
118
def _validate_input_0_1(value, desc):
119
    if value < 0 or value > 1:
120
        raise ValueError(f'Input value for {desc} is {value} but should be a float in the range 0 <= {desc} <=1.')
121
122
123
def _validate_input_bool(value, desc):
124
    if not(isinstance(value, bool)):
125
        raise ValueError(f'Input value for {desc} is {value} but should be boolean.')
126