| 1 |  |  | # --- | 
            
                                                                        
                            
            
                                    
            
            
                | 2 |  |  | # jupyter: | 
            
                                                                        
                            
            
                                    
            
            
                | 3 |  |  | #   jupytext: | 
            
                                                                        
                            
            
                                    
            
            
                | 4 |  |  | #     text_representation: | 
            
                                                                        
                            
            
                                    
            
            
                | 5 |  |  | #       extension: .py | 
            
                                                                        
                            
            
                                    
            
            
                | 6 |  |  | #       format_name: percent | 
            
                                                                        
                            
            
                                    
            
            
                | 7 |  |  | #       format_version: '1.1' | 
            
                                                                        
                            
            
                                    
            
            
                | 8 |  |  | #       jupytext_version: 0.8.5 | 
            
                                                                        
                            
            
                                    
            
            
                | 9 |  |  | #   kernelspec: | 
            
                                                                        
                            
            
                                    
            
            
                | 10 |  |  | #     display_name: Python 3 | 
            
                                                                        
                            
            
                                    
            
            
                | 11 |  |  | #     language: python | 
            
                                                                        
                            
            
                                    
            
            
                | 12 |  |  | #     name: python3 | 
            
                                                                        
                            
            
                                    
            
            
                | 13 |  |  | #   language_info: | 
            
                                                                        
                            
            
                                    
            
            
                | 14 |  |  | #     codemirror_mode: | 
            
                                                                        
                            
            
                                    
            
            
                | 15 |  |  | #       name: ipython | 
            
                                                                        
                            
            
                                    
            
            
                | 16 |  |  | #       version: 3 | 
            
                                                                        
                            
            
                                    
            
            
                | 17 |  |  | #     file_extension: .py | 
            
                                                                        
                            
            
                                    
            
            
                | 18 |  |  | #     mimetype: text/x-python | 
            
                                                                        
                            
            
                                    
            
            
                | 19 |  |  | #     name: python | 
            
                                                                        
                            
            
                                    
            
            
                | 20 |  |  | #     nbconvert_exporter: python | 
            
                                                                        
                            
            
                                    
            
            
                | 21 |  |  | #     pygments_lexer: ipython3 | 
            
                                                                        
                            
            
                                    
            
            
                | 22 |  |  | #     version: 3.6.6 | 
            
                                                                        
                            
            
                                    
            
            
                | 23 |  |  | # --- | 
            
                                                                        
                            
            
                                    
            
            
                | 24 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 25 |  |  | # %% {"tags": ["parameters"]} | 
            
                                                                        
                            
            
                                    
            
            
                | 26 |  |  | # Our default parameters | 
            
                                                                        
                            
            
                                    
            
            
                | 27 |  |  | # This cell has a "parameters" tag, means that it defines the parameters for use in the notebook | 
            
                                                                        
                            
            
                                    
            
            
                | 28 |  |  | run_date = '2018-11-18' | 
            
                                                                        
                            
            
                                    
            
            
                | 29 |  |  | source_id = 'sensor1' | 
            
                                                                        
                            
            
                                    
            
            
                | 30 |  |  | nb_days = 32 | 
            
                                                                        
                            
            
                                    
            
            
                | 31 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 32 |  |  | # %% | 
            
                                                                        
                            
            
                                    
            
            
                | 33 |  |  | import numpy as np | 
            
                                                                        
                            
            
                                    
            
            
                | 34 |  |  | import pandas as pd | 
            
                                                                        
                            
            
                                    
            
            
                | 35 |  |  | import papermill as pm | 
            
                                                                        
                            
            
                                    
            
            
                | 36 |  |  | import matplotlib.pyplot as plt | 
            
                                                                        
                            
            
                                    
            
            
                | 37 |  |  | from datetime import datetime, timedelta | 
            
                                                                        
                            
            
                                    
            
            
                | 38 |  |  | import os | 
            
                                                                        
                            
            
                                    
            
            
                | 39 |  |  | import statsmodels.api as sm | 
            
                                                                        
                            
            
                                    
            
            
                | 40 |  |  | from pylab import rcParams | 
            
                                                                        
                            
            
                                    
            
            
                | 41 |  |  | import itertools | 
            
                                                                        
                            
            
                                    
            
            
                | 42 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 43 |  |  | # turn off interactive plotting to avoid double plotting | 
            
                                                                        
                            
            
                                    
            
            
                | 44 |  |  | plt.ioff() | 
            
                                                                        
                            
            
                                    
            
            
                | 45 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 46 |  |  | # %% | 
            
                                                                        
                            
            
                                    
            
            
                | 47 |  |  | data_dir = "../data/input/step1"  | 
            
                                                                        
                            
            
                                    
            
            
                | 48 |  |  | data = None | 
            
                                                                        
                            
            
                                    
            
            
                | 49 |  |  | run_datetime = datetime.strptime(run_date, '%Y-%m-%d') | 
            
                                                                        
                            
            
                                    
            
            
                | 50 |  |  | for i in range(nb_days):     | 
            
                                                                        
                            
            
                                    
            
            
                | 51 |  |  |     deltatime = run_datetime - timedelta(i) | 
            
                                                                        
                            
            
                                    
            
            
                | 52 |  |  |     month_partition = deltatime.strftime("%Y-%m") | 
            
                                                                        
                            
            
                                    
            
            
                | 53 |  |  |     delta = datetime.strftime(deltatime, '%Y-%m-%d')     | 
            
                                                                        
                            
            
                                    
            
            
                | 54 |  |  |     file = os.path.join(data_dir, month_partition, delta + "-" + source_id + ".csv") | 
            
                                                                        
                            
            
                                    
            
            
                | 55 |  |  |     if os.path.exists(file): | 
            
                                                                        
                            
            
                                    
            
            
                | 56 |  |  |         print("Loading " + file) | 
            
                                                                        
                            
            
                                    
            
            
                | 57 |  |  |         new = pd.read_csv(file) | 
            
                                                                        
                            
            
                                    
            
            
                | 58 |  |  |         if data is not None: | 
            
                                                                        
                            
            
                                    
            
            
                | 59 |  |  |             data = pd.concat([data, new]) | 
            
                                                                        
                            
            
                                    
            
            
                | 60 |  |  |         else: | 
            
                                                                        
                            
            
                                    
            
            
                | 61 |  |  |             data = new | 
            
                                                                        
                            
            
                                    
            
            
                | 62 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 63 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 64 |  |  | # %% | 
            
                                                                        
                            
            
                                    
            
            
                | 65 |  |  | data['date'] = data['date'].apply(lambda x : datetime.strptime(x, "%Y-%m-%d %H:%M:%S")) | 
            
                                                                        
                            
            
                                    
            
            
                | 66 |  |  | print(data['date'].describe()) | 
            
                                                                        
                            
            
                                    
            
            
                | 67 |  |  | data.describe() | 
            
                                                                        
                            
            
                                    
            
            
                | 68 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 69 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 70 |  |  | # %% | 
            
                                                                        
                            
            
                                    
            
            
                | 71 |  |  | data = data.sort_values('date').set_index('date', drop=True) | 
            
                                                                        
                            
            
                                    
            
            
                | 72 |  |  | data = data.asfreq(freq="5min") | 
            
                                                                        
                            
            
                                    
            
            
                | 73 |  |  | data.head(5) | 
            
                                                                        
                            
            
                                    
            
            
                | 74 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 75 |  |  | # %% [markdown] | 
            
                                                                        
                            
            
                                    
            
            
                | 76 |  |  | # https://towardsdatascience.com/an-end-to-end-project-on-time-series-analysis-and-forecasting-with-python-4835e6bf050b | 
            
                                                                        
                            
            
                                    
            
            
                | 77 |  |  | # | 
            
                                                                        
                            
            
                                    
            
            
                | 78 |  |  | # | 
            
                                                                        
                            
            
                                    
            
            
                | 79 |  |  | # https://www.statsmodels.org/dev/generated/statsmodels.tsa.seasonal.seasonal_decompose.html | 
            
                                                                        
                            
            
                                    
            
            
                | 80 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 81 |  |  | # %% | 
            
                                                                        
                            
            
                                    
            
            
                | 82 |  |  | rcParams['figure.figsize'] = 18, 8 | 
            
                                                                        
                            
            
                                    
            
            
                | 83 |  |  | decomposition = sm.tsa.seasonal_decompose(data['mydata'], model='additive', freq=288) | 
            
                                                                        
                            
            
                                    
            
            
                | 84 |  |  | fig = decomposition.plot() | 
            
                                                                        
                            
            
                                    
            
            
                | 85 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 86 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 87 |  |  | # %% | 
            
                                                                        
                            
            
                                    
            
            
                | 88 |  |  | p = d = q = range(0, 2) | 
            
                                                                        
                            
            
                                    
            
            
                | 89 |  |  | pdq = list(itertools.product(p, d, q)) | 
            
                                                                        
                            
            
                                    
            
            
                | 90 |  |  | seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))] | 
            
                                                                        
                            
            
                                    
            
            
                | 91 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 92 |  |  | print('Examples of parameter combinations for Seasonal ARIMA...') | 
            
                                                                        
                            
            
                                    
            
            
                | 93 |  |  | print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1])) | 
            
                                                                        
                            
            
                                    
            
            
                | 94 |  |  | print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2])) | 
            
                                                                        
                            
            
                                    
            
            
                | 95 |  |  | print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3])) | 
            
                                                                        
                            
            
                                    
            
            
                | 96 |  |  | print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4])) | 
            
                                                                        
                            
            
                                    
            
            
                | 97 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 98 |  |  | # %% | 
            
                                                                        
                            
            
                                    
            
            
                | 99 |  |  | scores = { | 
            
                                                                        
                            
            
                                    
            
            
                | 100 |  |  |     "AIC" : [], | 
            
                                                                        
                            
            
                                    
            
            
                | 101 |  |  |     "param" : [], | 
            
                                                                        
                            
            
                                    
            
            
                | 102 |  |  |     "param_seasonal" : [] | 
            
                                                                        
                            
            
                                    
            
            
                | 103 |  |  | } | 
            
                                                                        
                            
            
                                    
            
            
                | 104 |  |  | for param in pdq: | 
            
                                                                        
                            
            
                                    
            
            
                | 105 |  |  |     for param_seasonal in seasonal_pdq: | 
            
                                                                        
                            
            
                                    
            
            
                | 106 |  |  |         try: | 
            
                                                                        
                            
            
                                    
            
            
                | 107 |  |  |             mod = sm.tsa.statespace.SARIMAX( | 
            
                                                                        
                            
            
                                    
            
            
                | 108 |  |  |                 data['mydata'], | 
            
                                                                        
                            
            
                                    
            
            
                | 109 |  |  |                 order=param, | 
            
                                                                        
                            
            
                                    
            
            
                | 110 |  |  |                 seasonal_order=param_seasonal, | 
            
                                                                        
                            
            
                                    
            
            
                | 111 |  |  |                 enforce_stationarity=False, | 
            
                                                                        
                            
            
                                    
            
            
                | 112 |  |  |                 enforce_invertibility=False | 
            
                                                                        
                            
            
                                    
            
            
                | 113 |  |  |             ) | 
            
                                                                        
                            
            
                                    
            
            
                | 114 |  |  |             results = mod.fit() | 
            
                                                                        
                            
            
                                    
            
            
                | 115 |  |  |             scores['AIC'].append(results.aic) | 
            
                                                                        
                            
            
                                    
            
            
                | 116 |  |  |             scores['param'].append(param) | 
            
                                                                        
                            
            
                                    
            
            
                | 117 |  |  |             scores['param_seasonal'].append(param_seasonal) | 
            
                                                                        
                            
            
                                    
            
            
                | 118 |  |  |             print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic)) | 
            
                                                                        
                            
            
                                    
            
            
                | 119 |  |  |         except: | 
            
                                                                        
                            
            
                                    
            
            
                | 120 |  |  |             continue | 
            
                                                                        
                            
            
                                    
            
            
                | 121 |  |  | scores = pd.DataFrame.from_dict(scores) | 
            
                                                                        
                            
            
                                    
            
            
                | 122 |  |  | scores.sort_values('AIC').head(5) | 
            
                                                                        
                            
            
                                    
            
            
                | 123 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 124 |  |  | # %% | 
            
                                                                        
                            
            
                                    
            
            
                | 125 |  |  | best = scores.sort_values('AIC').head(1).values[0] | 
            
                                                                        
                            
            
                                    
            
            
                | 126 |  |  | mod = sm.tsa.statespace.SARIMAX(data['mydata'], | 
            
                                                                        
                            
            
                                    
            
            
                | 127 |  |  |                                 order=best[1], | 
            
                                                                        
                            
            
                                    
            
            
                | 128 |  |  |                                 seasonal_order=best[2], | 
            
                                                                        
                            
            
                                    
            
            
                | 129 |  |  |                                 enforce_stationarity=True, | 
            
                                                                        
                            
            
                                    
            
            
                | 130 |  |  |                                 enforce_invertibility=False) | 
            
                                                                        
                            
            
                                    
            
            
                | 131 |  |  | results = mod.fit() | 
            
                                                                        
                            
            
                                    
            
            
                | 132 |  |  | print(results.summary().tables[1]) | 
            
                                                                        
                            
            
                                    
            
            
                | 133 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 134 |  |  | # %% | 
            
                                                                        
                            
            
                                    
            
            
                | 135 |  |  | results.plot_diagnostics(figsize=(16, 8)) | 
            
                                                                        
                            
            
                                    
            
            
                | 136 |  |  | plt.show() | 
            
                                                                        
                            
            
                                    
            
            
                | 137 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 138 |  |  | # %% | 
            
                                                                        
                            
            
                                    
            
            
                | 139 |  |  | pred = results.get_prediction(start=(run_datetime - timedelta(1)), dynamic=False) | 
            
                                                                        
                            
            
                                    
            
            
                | 140 |  |  | pred_ci = pred.conf_int() | 
            
                                                                        
                            
            
                                    
            
            
                | 141 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 142 |  |  | fig, ax = plt.subplots() | 
            
                                                                        
                            
            
                                    
            
            
                | 143 |  |  | ax.plot(data[data.index > (run_datetime - timedelta(3))]['mydata'], label='observed') | 
            
                                                                        
                            
            
                                    
            
            
                | 144 |  |  | ax.plot(pred.predicted_mean, label='One-step ahead Forecast', alpha=.7) | 
            
                                                                        
                            
            
                                    
            
            
                | 145 |  |  | ax.fill_between(pred_ci.index, | 
            
                                                                        
                            
            
                                    
            
            
                | 146 |  |  |                 pred_ci.iloc[:, 0], | 
            
                                                                        
                            
            
                                    
            
            
                | 147 |  |  |                 pred_ci.iloc[:, 1], color='k', alpha=.2) | 
            
                                                                        
                            
            
                                    
            
            
                | 148 |  |  | ax.set_xlabel('Date') | 
            
                                                                        
                            
            
                                    
            
            
                | 149 |  |  | ax.set_ylabel('mydata') | 
            
                                                                        
                            
            
                                    
            
            
                | 150 |  |  | ax.set(title='Results of ARIMA{}x{}12 - AIC:{} on {}'.format(best[1], best[2], round(best[0]), run_date)) | 
            
                                                                        
                            
            
                                    
            
            
                | 151 |  |  | fig.legend() | 
            
                                                                        
                            
            
                                    
            
            
                | 152 |  |  | pm.display('arima_results_fig', fig) | 
            
                                                                        
                            
            
                                    
            
            
                | 153 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 154 |  |  | # %% | 
            
                                                                        
                            
            
                                    
            
            
                | 155 |  |  | pred.save("../data/output/step2/prediction_model_" + run_date + "-" + source_id) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  | # %% | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 160 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 161 |  |  |  |