Passed
Push — master ( 5927b1...ccc1cd )
by Christophe
01:43 queued 38s
created

papermill_step1   A

Complexity

Total Complexity 0

Size/Duplication

Total Lines 75
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 0
eloc 34
dl 0
loc 75
rs 10
c 0
b 0
f 0
1
# ---
2
# jupyter:
3
#   jupytext:
4
#     text_representation:
5
#       extension: .py
6
#       format_name: percent
7
#       format_version: '1.1'
8
#       jupytext_version: 0.8.5
9
#   kernelspec:
10
#     display_name: Python 3
11
#     language: python
12
#     name: python3
13
#   language_info:
14
#     codemirror_mode:
15
#       name: ipython
16
#       version: 3
17
#     file_extension: .py
18
#     mimetype: text/x-python
19
#     name: python
20
#     nbconvert_exporter: python
21
#     pygments_lexer: ipython3
22
#     version: 3.6.6
23
# ---
24
25
# %% {"tags": ["parameters"]}
26
# Our default parameters
27
# This cell has a "parameters" tag, means that it defines the parameters for use in the notebook
28
run_date = "2018-04-28"
29
source_id = 'sensor1'
30
31
# %%
32
import pandas as pd
33
import numpy as np
34
import papermill as pm
35
import matplotlib.pyplot as plt
36
import matplotlib.dates as mdates
37
from datetime import datetime, timedelta
38
import time
39
import os
40
plt.ioff()
41
42
# %%
43
run_datetime = datetime.strptime(run_date, '%Y-%m-%d')
44
ts = pd.date_range("00:00", "23:59", freq="5min")
45
td = ts - timedelta((datetime.now() - run_datetime).days)
46
data = pd.DataFrame(np.random.randn(len(td)), columns=['mydata'])
47
data = data.rolling(70, min_periods=1, center=True).mean()  # Smooth it so it looks purdy
48
data['date'] = td
49
data['hour'] = data['date'].apply(lambda x: datetime.strftime(x, "%H"))
50
51
# %%
52
print(data['date'].describe())
53
data.describe()
54
55
# %%
56
data = data.sort_values('date').set_index('date', drop=True)
57
data.head(5)
58
59
# %%
60
fig, ax = plt.subplots()
61
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
62
plt.gcf().autofmt_xdate()
63
ax.plot(data.index, data['mydata'], c='k', alpha=.5)
64
ax.set(title="Activity for the day of {}".format(run_date))
65
pm.display('activity_day_fig', fig)
66
67
# %%
68
month_partition = run_datetime.strftime("%Y-%m")
69
output_file = "../data/output/step1/" + month_partition + "/" + run_date + '-' + source_id + '.csv'
70
print(output_file)
71
72
# %%
73
os.makedirs(os.path.dirname(output_file), exist_ok=True)
74
data.to_csv(output_file)
75
76
77