1
|
|
|
# --- |
2
|
|
|
# jupyter: |
3
|
|
|
# jupytext: |
4
|
|
|
# text_representation: |
5
|
|
|
# extension: .py |
6
|
|
|
# format_name: percent |
7
|
|
|
# format_version: '1.1' |
8
|
|
|
# jupytext_version: 0.8.5 |
9
|
|
|
# kernelspec: |
10
|
|
|
# display_name: Python 3 |
11
|
|
|
# language: python |
12
|
|
|
# name: python3 |
13
|
|
|
# language_info: |
14
|
|
|
# codemirror_mode: |
15
|
|
|
# name: ipython |
16
|
|
|
# version: 3 |
17
|
|
|
# file_extension: .py |
18
|
|
|
# mimetype: text/x-python |
19
|
|
|
# name: python |
20
|
|
|
# nbconvert_exporter: python |
21
|
|
|
# pygments_lexer: ipython3 |
22
|
|
|
# version: 3.6.6 |
23
|
|
|
# --- |
24
|
|
|
|
25
|
|
|
# %% {"tags": ["parameters"]} |
26
|
|
|
# Our default parameters |
27
|
|
|
# This cell has a "parameters" tag, means that it defines the parameters for use in the notebook |
28
|
|
|
run_date = '2018-11-18' |
29
|
|
|
source_id = 'sensor1' |
30
|
|
|
nb_days = 7 |
31
|
|
|
|
32
|
|
|
# %% |
33
|
|
|
import statsmodels.api as sm |
34
|
|
|
import matplotlib.pyplot as plt |
35
|
|
|
from datetime import datetime, timedelta |
36
|
|
|
import os |
37
|
|
|
import pandas as pd |
38
|
|
|
|
39
|
|
|
from pylab import rcParams |
40
|
|
|
import papermill as pm |
41
|
|
|
|
42
|
|
|
# %% |
43
|
|
|
data_dir = "../data/input/step1" |
44
|
|
|
data = None |
45
|
|
|
run_datetime = datetime.strptime(run_date, '%Y-%m-%d') |
46
|
|
|
for i in range(nb_days): |
47
|
|
|
deltatime = run_datetime - timedelta(i) |
48
|
|
|
month_partition = deltatime.strftime("%Y-%m") |
49
|
|
|
delta = datetime.strftime(deltatime, '%Y-%m-%d') |
50
|
|
|
file = os.path.join(data_dir, month_partition, delta + "-" + source_id + ".csv") |
51
|
|
|
if os.path.exists(file): |
52
|
|
|
print("Loading " + file) |
53
|
|
|
new = pd.read_csv(file) |
54
|
|
|
if data is not None: |
55
|
|
|
data = pd.concat([data, new]) |
56
|
|
|
else: |
57
|
|
|
data = new |
58
|
|
|
|
59
|
|
|
# %% |
60
|
|
|
data['date'] = data['date'].apply(lambda x : datetime.strptime(x, "%Y-%m-%d %H:%M:%S")) |
61
|
|
|
print(data['date'].describe()) |
62
|
|
|
data.describe() |
63
|
|
|
|
64
|
|
|
# %% |
65
|
|
|
data = data.sort_values('date').set_index('date', drop=True) |
66
|
|
|
data = data.asfreq(freq="5min") |
67
|
|
|
data.head(5) |
68
|
|
|
|
69
|
|
|
# %% |
70
|
|
|
pred = sm.load("../data/input/step2/prediction_model_" + run_date + "-" + source_id) |
71
|
|
|
|
72
|
|
|
# %% |
73
|
|
|
pred_ci = pred.conf_int() |
74
|
|
|
rcParams['figure.figsize'] = 18, 8 |
75
|
|
|
fig, ax = plt.subplots() |
76
|
|
|
ax.plot(data[data.index > (run_datetime - timedelta(3))]['mydata'], label='observed') |
77
|
|
|
ax.plot(pred.predicted_mean, label='One-step ahead Forecast', alpha=.7) |
78
|
|
|
ax.fill_between(pred_ci.index, |
79
|
|
|
pred_ci.iloc[:, 0], |
80
|
|
|
pred_ci.iloc[:, 1], color='k', alpha=.2) |
81
|
|
|
ax.set_xlabel('Date') |
82
|
|
|
ax.set_ylabel('mydata') |
83
|
|
|
ax.set(title='Results on {}'.format(run_date)) |
84
|
|
|
fig.legend() |
85
|
|
|
|
86
|
|
|
# %% |
87
|
|
|
|
88
|
|
|
|
89
|
|
|
|
90
|
|
|
|