1
|
|
|
""" |
2
|
|
|
Measuring the performance of key functionality. |
3
|
|
|
|
4
|
|
|
:author: Andreas Kanz |
5
|
|
|
""" |
6
|
|
|
|
7
|
|
|
import functools |
8
|
|
|
import pandas as pd |
9
|
|
|
from pathlib import Path |
10
|
|
|
from time import perf_counter |
11
|
|
|
import matplotlib.pyplot as plt |
12
|
|
|
|
13
|
|
|
import klib |
14
|
|
|
|
15
|
|
|
# Paths |
16
|
|
|
base_path = Path(__file__).resolve().parents[2] |
17
|
|
|
print(base_path) |
18
|
|
|
data_path = base_path / "examples" |
19
|
|
|
export_path = base_path / "klib/scripts/" |
20
|
|
|
|
21
|
|
|
# Data Import |
22
|
|
|
filepath = data_path / "NFL_DATASET.csv" |
23
|
|
|
data = pd.read_csv(filepath) |
24
|
|
|
|
25
|
|
|
|
26
|
|
|
def timer(func): |
27
|
|
|
@functools.wraps(func) |
28
|
|
|
def wrapper(*args, **kwargs): |
29
|
|
|
time_start = perf_counter() |
30
|
|
|
func(*args, **kwargs) |
31
|
|
|
duration = perf_counter() - time_start |
32
|
|
|
return duration |
33
|
|
|
|
34
|
|
|
return wrapper |
35
|
|
|
|
36
|
|
|
|
37
|
|
|
@timer |
38
|
|
|
def time_data_cleaning(): |
39
|
|
|
klib.data_cleaning(data, show=None) |
40
|
|
|
|
41
|
|
|
|
42
|
|
|
@timer |
43
|
|
|
def time_missingval_plot(): |
44
|
|
|
klib.missingval_plot(data) |
45
|
|
|
|
46
|
|
|
|
47
|
|
|
@timer |
48
|
|
|
def time_dist_plot(): |
49
|
|
|
klib.dist_plot(data.iloc[:, :5]) |
50
|
|
|
|
51
|
|
|
|
52
|
|
|
@timer |
53
|
|
|
def time_cat_plot(): |
54
|
|
|
klib.cat_plot(data) |
55
|
|
|
|
56
|
|
|
|
57
|
|
|
def main(): |
58
|
|
|
df_times = pd.DataFrame() |
59
|
|
|
df_times["data_cleaning"] = pd.Series([time_data_cleaning() for _ in range(12)]) |
60
|
|
|
df_times["missingval_plot"] = pd.Series([time_missingval_plot() for _ in range(7)]) |
61
|
|
|
df_times["dist_plot"] = pd.Series([time_dist_plot() for _ in range(7)]) |
62
|
|
|
df_times["cat_plot"] = pd.Series([time_cat_plot() for _ in range(7)]) |
63
|
|
|
df_times = df_times.fillna(df_times.mean()) |
64
|
|
|
fig, ax = plt.subplots(nrows=1, ncols=4, figsize=(14, 7)) |
65
|
|
|
reference_values = [5, 10, 10, 10] |
66
|
|
|
|
67
|
|
|
for i, (col, ref) in enumerate(zip(df_times.columns, reference_values)): |
68
|
|
|
ax[i].boxplot(df_times[col]) |
69
|
|
|
ax[i].set_title(" ".join(col.split("_")).title()) |
70
|
|
|
ax[i].axhline(ref) |
71
|
|
|
fig.suptitle("Performance", fontsize=16) |
72
|
|
|
fig.savefig("boxplots.png") |
73
|
|
|
|
74
|
|
|
|
75
|
|
|
if __name__ == "__main__": |
76
|
|
|
main() |
77
|
|
|
|