papermill_step2 - Code Metrics - Inspection of "Merge pull request #10 from Aiscalate/dev" - Aiscalate/aiscalator - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 5927b1...ccc1cd )

by Christophe

created 2018-11-18 19:25 UTC

papermill_step2 A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	156
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	0
eloc	91
dl	0
loc	156
rs	10
c	0
b	0
f	0

# ---
# jupyter:
#   jupytext:
#     text_representation:
#       extension: .py
#       format_name: percent
#       format_version: '1.1'
#       jupytext_version: 0.8.5
#   kernelspec:
#     display_name: Python 3
#     language: python
#     name: python3
#   language_info:
#     codemirror_mode:
#       name: ipython
#       version: 3
#     file_extension: .py
#     mimetype: text/x-python
#     name: python
#     nbconvert_exporter: python
#     pygments_lexer: ipython3
#     version: 3.6.6
# ---

# %% {"tags": ["parameters"]}
# Our default parameters
# This cell has a "parameters" tag, means that it defines the parameters for use in the notebook
run_date = '2018-11-18'
source_id = 'sensor1'
nb_days = 32

# %%
import numpy as np
import pandas as pd
import papermill as pm
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import os
import statsmodels.api as sm
from pylab import rcParams
import itertools

# turn off interactive plotting to avoid double plotting
plt.ioff()

# %%
data_dir = "../data/input/step1" 
data = None
run_datetime = datetime.strptime(run_date, '%Y-%m-%d')
for i in range(nb_days):    
    deltatime = run_datetime - timedelta(i)
    month_partition = deltatime.strftime("%Y-%m")
    delta = datetime.strftime(deltatime, '%Y-%m-%d')    
    file = os.path.join(data_dir, month_partition, delta + "-" + source_id + ".csv")
    if os.path.exists(file):
        print("Loading " + file)
        new = pd.read_csv(file)
        if data is not None:
            data = pd.concat([data, new])
        else:
            data = new


# %%
data['date'] = data['date'].apply(lambda x : datetime.strptime(x, "%Y-%m-%d %H:%M:%S"))
print(data['date'].describe())
data.describe()


# %%
data = data.sort_values('date').set_index('date', drop=True)
data = data.asfreq(freq="5min")
data.head(5)

# %% [markdown]
# https://towardsdatascience.com/an-end-to-end-project-on-time-series-analysis-and-forecasting-with-python-4835e6bf050b
#
#
# https://www.statsmodels.org/dev/generated/statsmodels.tsa.seasonal.seasonal_decompose.html

# %%
rcParams['figure.figsize'] = 18, 8
decomposition = sm.tsa.seasonal_decompose(data['mydata'], model='additive', freq=288)
fig = decomposition.plot()


# %%
p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]

print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))

# %%
scores = {
    "AIC" : [],
    "param" : [],
    "param_seasonal" : []
}
for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(
                data['mydata'],
                order=param,
                seasonal_order=param_seasonal,
                enforce_stationarity=False,
                enforce_invertibility=False
            )
            results = mod.fit()
            scores['AIC'].append(results.aic)
            scores['param'].append(param)
            scores['param_seasonal'].append(param_seasonal)
            print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
        except:
            continue
scores = pd.DataFrame.from_dict(scores)
scores.sort_values('AIC').head(5)

# %%
best = scores.sort_values('AIC').head(1).values[0]
mod = sm.tsa.statespace.SARIMAX(data['mydata'],
                                order=best[1],
                                seasonal_order=best[2],
                                enforce_stationarity=True,
                                enforce_invertibility=False)
results = mod.fit()
print(results.summary().tables[1])

# %%
results.plot_diagnostics(figsize=(16, 8))
plt.show()

# %%
pred = results.get_prediction(start=(run_datetime - timedelta(1)), dynamic=False)
pred_ci = pred.conf_int()

fig, ax = plt.subplots()
ax.plot(data[data.index > (run_datetime - timedelta(3))]['mydata'], label='observed')
ax.plot(pred.predicted_mean, label='One-step ahead Forecast', alpha=.7)
ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.2)
ax.set_xlabel('Date')
ax.set_ylabel('mydata')
ax.set(title='Results of ARIMA{}x{}12 - AIC:{} on {}'.format(best[1], best[2], round(best[0]), run_date))
fig.legend()
pm.display('arima_results_fig', fig)

# %%
pred.save("../data/output/step2/prediction_model_" + run_date + "-" + source_id)

# %%





1			# ---
2			# jupyter:
3			# jupytext:
4			# text_representation:
5			# extension: .py
6			# format_name: percent
7			# format_version: '1.1'
8			# jupytext_version: 0.8.5
9			# kernelspec:
10			# display_name: Python 3
11			# language: python
12			# name: python3
13			# language_info:
14			# codemirror_mode:
15			# name: ipython
16			# version: 3
17			# file_extension: .py
18			# mimetype: text/x-python
19			# name: python
20			# nbconvert_exporter: python
21			# pygments_lexer: ipython3
22			# version: 3.6.6
23			# ---
24
25			# %% {"tags": ["parameters"]}
26			# Our default parameters
27			# This cell has a "parameters" tag, means that it defines the parameters for use in the notebook
28			run_date = '2018-11-18'
29			source_id = 'sensor1'
30			nb_days = 32
31
32			# %%
33			import numpy as np
34			import pandas as pd
35			import papermill as pm
36			import matplotlib.pyplot as plt
37			from datetime import datetime, timedelta
38			import os
39			import statsmodels.api as sm
40			from pylab import rcParams
41			import itertools
42
43			# turn off interactive plotting to avoid double plotting
44			plt.ioff()
45
46			# %%
47			data_dir = "../data/input/step1"
48			data = None
49			run_datetime = datetime.strptime(run_date, '%Y-%m-%d')
50			for i in range(nb_days):
51			deltatime = run_datetime - timedelta(i)
52			month_partition = deltatime.strftime("%Y-%m")
53			delta = datetime.strftime(deltatime, '%Y-%m-%d')
54			file = os.path.join(data_dir, month_partition, delta + "-" + source_id + ".csv")
55			if os.path.exists(file):
56			print("Loading " + file)
57			new = pd.read_csv(file)
58			if data is not None:
59			data = pd.concat([data, new])
60			else:
61			data = new
62
63
64			# %%
65			data['date'] = data['date'].apply(lambda x : datetime.strptime(x, "%Y-%m-%d %H:%M:%S"))
66			print(data['date'].describe())
67			data.describe()
68
69
70			# %%
71			data = data.sort_values('date').set_index('date', drop=True)
72			data = data.asfreq(freq="5min")
73			data.head(5)
74
75			# %% [markdown]
76			# https://towardsdatascience.com/an-end-to-end-project-on-time-series-analysis-and-forecasting-with-python-4835e6bf050b
77			#
78			#
79			# https://www.statsmodels.org/dev/generated/statsmodels.tsa.seasonal.seasonal_decompose.html
80
81			# %%
82			rcParams['figure.figsize'] = 18, 8
83			decomposition = sm.tsa.seasonal_decompose(data['mydata'], model='additive', freq=288)
84			fig = decomposition.plot()
85
86
87			# %%
88			p = d = q = range(0, 2)
89			pdq = list(itertools.product(p, d, q))
90			seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]
91
92			print('Examples of parameter combinations for Seasonal ARIMA...')
93			print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
94			print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
95			print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
96			print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))
97
98			# %%
99			scores = {
100			"AIC" : [],
101			"param" : [],
102			"param_seasonal" : []
103			}
104			for param in pdq:
105			for param_seasonal in seasonal_pdq:
106			try:
107			mod = sm.tsa.statespace.SARIMAX(
108			data['mydata'],
109			order=param,
110			seasonal_order=param_seasonal,
111			enforce_stationarity=False,
112			enforce_invertibility=False
113			)
114			results = mod.fit()
115			scores['AIC'].append(results.aic)
116			scores['param'].append(param)
117			scores['param_seasonal'].append(param_seasonal)
118			print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
119			except:
120			continue
121			scores = pd.DataFrame.from_dict(scores)
122			scores.sort_values('AIC').head(5)
123
124			# %%
125			best = scores.sort_values('AIC').head(1).values[0]
126			mod = sm.tsa.statespace.SARIMAX(data['mydata'],
127			order=best[1],
128			seasonal_order=best[2],
129			enforce_stationarity=True,
130			enforce_invertibility=False)
131			results = mod.fit()
132			print(results.summary().tables[1])
133
134			# %%
135			results.plot_diagnostics(figsize=(16, 8))
136			plt.show()
137
138			# %%
139			pred = results.get_prediction(start=(run_datetime - timedelta(1)), dynamic=False)
140			pred_ci = pred.conf_int()
141
142			fig, ax = plt.subplots()
143			ax.plot(data[data.index > (run_datetime - timedelta(3))]['mydata'], label='observed')
144			ax.plot(pred.predicted_mean, label='One-step ahead Forecast', alpha=.7)
145			ax.fill_between(pred_ci.index,
146			pred_ci.iloc[:, 0],
147			pred_ci.iloc[:, 1], color='k', alpha=.2)
148			ax.set_xlabel('Date')
149			ax.set_ylabel('mydata')
150			ax.set(title='Results of ARIMA{}x{}12 - AIC:{} on {}'.format(best[1], best[2], round(best[0]), run_date))
151			fig.legend()
152			pm.display('arima_results_fig', fig)
153
154			# %%
155			pred.save("../data/output/step2/prediction_model_" + run_date + "-" + source_id)
156
157			# %%
158
159
160
161

Aiscalate / aiscalator

Push — master ( 5927b1...ccc1cd )

papermill_step2 A

Complexity

Size/Duplication

Importance

Duplication Side-by-Side

Filter issues like