GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Push — master ( cc6bfd...4f98db )
by Andreas
01:27
created

klib.utils._drop_duplicates()   A

Complexity

Conditions 1

Size

Total Lines 19
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 5
nop 1
dl 0
loc 19
rs 10
c 0
b 0
f 0
1
'''
2
Utilities and auxiliary functions.
3
4
:author: Andreas Kanz
5
6
'''
7
8
# Imports
9
import pandas as pd
10
11
12
def _drop_duplicates(data):
13
    '''
14
    Provides information and drops duplicate rows.
15
16
    Parameters
17
    ----------
18
    data: 2D dataset that can be coerced into Pandas DataFrame.
19
20
    Returns
21
    -------
22
    data: Deduplicated Pandas DataFrame
23
    rows_dropped: Index Object of rows dropped.
24
    '''
25
26
    data = pd.DataFrame(data).copy()
27
    rows_dropped = data[data.duplicated()].index
28
    data = data.drop_duplicates()
29
30
    return data, rows_dropped
31
32
33
def _memory_usage(data):
34
    '''
35
    Gives the total memory usage in kilobytes.
36
37
    Parameters
38
    ----------
39
    data: 2D dataset that can be coerced into Pandas DataFrame.
40
41
    Returns
42
    -------
43
    memory_usage: float
44
    '''
45
46
    data = pd.DataFrame(data).copy()
47
    memory_usage = round(data.memory_usage(index=True, deep=True).sum()/1024, 2)
48
49
    return memory_usage
50
51
52
def _missing_vals(data):
53
    '''
54
    Gives metrics of missing values in the dataset.
55
56
    Parameters
57
    ----------
58
    data: 2D dataset that can be coerced into Pandas DataFrame.
59
60
    Returns
61
    -------
62
    mv_total: float, number of missing values in the entire dataset
63
    mv_rows: float, number of missing values in each row
64
    mv_cols: float, number of missing values in each column
65
    mv_rows_ratio: float, ratio of missing values for each row
66
    mv_cols_ratio: float, ratio of missing values for each column
67
    '''
68
69
    data = pd.DataFrame(data).copy()
70
    mv_rows = data.isna().sum(axis=1)
71
    mv_cols = data.isna().sum(axis=0)
72
    mv_total = data.isna().sum().sum()
73
    mv_rows_ratio = mv_rows/data.shape[1]
74
    mv_cols_ratio = mv_cols/data.shape[0]
75
76
    return {'mv_total': mv_total,
77
            'mv_rows': mv_rows,
78
            'mv_cols': mv_cols,
79
            'mv_rows_ratio': mv_rows_ratio,
80
            'mv_cols_ratio': mv_cols_ratio}
81
82
83
def _validate_input_0_1(value, desc):
84
    if value < 0 or value > 1:
85
        raise ValueError(f'Input value for {desc} is {value} but should be a float in the range 0 <= {desc} <=1.')
86
87
88
def _validate_input_bool(value, desc):
89
    if not(isinstance(value, bool)):
90
        raise ValueError(f'Input value for {desc} is {value} but should be boolean.')
91