1
|
|
|
"""Measuring the performance of key functionality. |
2
|
|
|
|
3
|
|
|
:author: Andreas Kanz |
4
|
|
|
""" |
5
|
|
|
|
6
|
|
|
import functools |
7
|
|
|
from pathlib import Path |
8
|
|
|
from time import perf_counter |
9
|
|
|
|
10
|
|
|
import matplotlib.pyplot as plt |
11
|
|
|
import pandas as pd |
12
|
|
|
|
13
|
|
|
import klib |
14
|
|
|
|
15
|
|
|
# Paths |
16
|
|
|
base_path = Path(__file__).resolve().parents[3] |
17
|
|
|
print(base_path) |
18
|
|
|
data_path = base_path / "examples" |
19
|
|
|
|
20
|
|
|
# Data Import |
21
|
|
|
filepath = data_path / "NFL_DATASET.csv" |
22
|
|
|
data = pd.read_csv(filepath) |
23
|
|
|
|
24
|
|
|
|
25
|
|
|
def timer(func): |
26
|
|
|
@functools.wraps(func) |
27
|
|
|
def wrapper(*args, **kwargs): |
28
|
|
|
time_start = perf_counter() |
29
|
|
|
func(*args, **kwargs) |
30
|
|
|
return perf_counter() - time_start |
31
|
|
|
|
32
|
|
|
return wrapper |
33
|
|
|
|
34
|
|
|
|
35
|
|
|
@timer |
36
|
|
|
def time_data_cleaning(): |
37
|
|
|
klib.data_cleaning(data, show=None) |
38
|
|
|
|
39
|
|
|
|
40
|
|
|
@timer |
41
|
|
|
def time_missingval_plot(): |
42
|
|
|
klib.missingval_plot(data) |
43
|
|
|
|
44
|
|
|
|
45
|
|
|
@timer |
46
|
|
|
def time_dist_plot(): |
47
|
|
|
klib.dist_plot(data.iloc[:, :5]) |
48
|
|
|
|
49
|
|
|
|
50
|
|
|
@timer |
51
|
|
|
def time_cat_plot(): |
52
|
|
|
klib.cat_plot(data) |
53
|
|
|
|
54
|
|
|
|
55
|
|
|
def main(): |
56
|
|
|
df_times = pd.DataFrame() |
57
|
|
|
df_times["data_cleaning"] = pd.Series([time_data_cleaning() for _ in range(12)]) |
58
|
|
|
df_times["missingval_plot"] = pd.Series([time_missingval_plot() for _ in range(7)]) |
59
|
|
|
df_times["dist_plot"] = pd.Series([time_dist_plot() for _ in range(7)]) |
60
|
|
|
df_times["cat_plot"] = pd.Series([time_cat_plot() for _ in range(7)]) |
61
|
|
|
df_times = df_times.fillna(df_times.mean()) |
62
|
|
|
fig, ax = plt.subplots(nrows=1, ncols=4, figsize=(14, 7)) |
63
|
|
|
reference_values = [5, 10, 10, 10] |
64
|
|
|
|
65
|
|
|
for i, (col, ref) in enumerate( |
66
|
|
|
zip(df_times.columns, reference_values, strict=True), |
67
|
|
|
): |
68
|
|
|
ax[i].boxplot(df_times[col]) |
69
|
|
|
ax[i].set_title(" ".join(col.split("_")).title()) |
70
|
|
|
ax[i].axhline(ref) |
71
|
|
|
fig.suptitle("Performance", fontsize=16) |
72
|
|
|
fig.savefig("boxplots.png") |
73
|
|
|
|
74
|
|
|
|
75
|
|
|
if __name__ == "__main__": |
76
|
|
|
main() |
77
|
|
|
|