| Conditions | 5 |
| Total Lines | 121 |
| Code Lines | 77 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | """ |
||
| 18 | def prepare_input_data(plot_resampling=False): |
||
| 19 | url_temperature = ( |
||
| 20 | "https://oemof.org/wp-content/uploads/2025/12/temperature.csv" |
||
| 21 | ) |
||
| 22 | url_energy = "https://oemof.org/wp-content/uploads/2025/12/energy.csv" |
||
| 23 | |||
| 24 | print( |
||
| 25 | "Data is licensed from M. Schlemminger, T. Ohrdes, E. Schneider," |
||
| 26 | " and M. Knoop. Under Creative Commons Attribution 4.0 International" |
||
| 27 | " License. It is also available at doi: 10.5281/zenodo.5642902." |
||
| 28 | " (We use single family home 26 plus the south-facing PV" |
||
| 29 | " from that dataset.)" |
||
| 30 | ) |
||
| 31 | |||
| 32 | file_path = Path(__file__).parent |
||
| 33 | |||
| 34 | temperature_file = Path(file_path, "temperature.csv") |
||
| 35 | if not temperature_file.exists(): |
||
| 36 | urlretrieve(url_temperature, temperature_file) |
||
| 37 | df_temperature = pd.read_csv( |
||
| 38 | temperature_file, |
||
| 39 | index_col="Unix Epoch", |
||
| 40 | ) |
||
| 41 | |||
| 42 | df_temperature.index = pd.to_datetime( |
||
| 43 | df_temperature.index, |
||
| 44 | unit="s", |
||
| 45 | utc=True, |
||
| 46 | ) |
||
| 47 | |||
| 48 | # ----- clean up data -------------------------------------------------------------- |
||
| 49 | # 1) Duplikate durch Mittelwert ersetzen |
||
| 50 | df_temperature = df_temperature.groupby(df_temperature.index).mean() |
||
| 51 | |||
| 52 | # 2) Regulären 5-Minuten-Index erzeugen (Zeitzone erhalten) |
||
| 53 | tz = df_temperature.index.tz |
||
| 54 | full_idx = pd.date_range( |
||
| 55 | start=df_temperature.index.min(), |
||
| 56 | end=df_temperature.index.max(), |
||
| 57 | freq="5min", |
||
| 58 | tz=tz, |
||
| 59 | ) |
||
| 60 | |||
| 61 | # 3) Auf 5-Minuten-Raster reindizieren -> Lücken werden NaN |
||
| 62 | df_regular = df_temperature.reindex(full_idx) |
||
| 63 | |||
| 64 | # 4) Zeitbasierte Interpolation nur für numerische Spalten |
||
| 65 | num_cols = df_regular.select_dtypes(include="number").columns |
||
| 66 | |||
| 67 | # Interpolation (zeitbasiert: berücksichtigt die Zeitabstände im Index) |
||
| 68 | df_regular[num_cols] = df_regular[num_cols].interpolate(method="time") |
||
| 69 | |||
| 70 | # 5) Ränder ohne beidseitige Nachbarn per ffill/bfill schließen |
||
| 71 | df_regular[num_cols] = df_regular[num_cols].ffill().bfill() |
||
| 72 | |||
| 73 | df_temperature = df_regular |
||
| 74 | |||
| 75 | # ------------------------------------------- |
||
| 76 | |||
| 77 | building_area = 120 # m² (from publication) |
||
| 78 | specific_heat_demand = 60 # kWh/m²/a (educated guess) |
||
| 79 | holidays = dict(Germany().holidays(2019)) |
||
| 80 | |||
| 81 | # We estimate the heat demand from the ambient temperature using demandlib. |
||
| 82 | # This returns energy per time step in units of kWh. |
||
| 83 | df_temperature["heat demand (kWh)"] = demandlib.bdew.HeatBuilding( |
||
| 84 | df_temperature.index, |
||
| 85 | holidays=holidays, |
||
| 86 | temperature=df_temperature["Air Temperature (°C)"], |
||
| 87 | shlp_type="EFH", |
||
| 88 | building_class=1, |
||
| 89 | wind_class=1, |
||
| 90 | annual_heat_demand=building_area * specific_heat_demand, |
||
| 91 | name="EFH", |
||
| 92 | ).get_bdew_profile() |
||
| 93 | |||
| 94 | df_temperature["heat demand (W)"] = ( |
||
| 95 | df_temperature["heat demand (kWh)"] * 1e3 / (5 / 60) |
||
| 96 | ) |
||
| 97 | |||
| 98 | energy_file = Path(file_path, "energy.csv") |
||
| 99 | if not energy_file.exists(): |
||
| 100 | urlretrieve(url_energy, energy_file) |
||
| 101 | df_energy = pd.read_csv( |
||
| 102 | energy_file, |
||
| 103 | index_col=0, |
||
| 104 | ) |
||
| 105 | df_energy.index = pd.to_datetime( |
||
| 106 | df_energy.index, |
||
| 107 | unit="s", |
||
| 108 | utc=True, |
||
| 109 | ) |
||
| 110 | |||
| 111 | if plot_resampling: |
||
| 112 | p_pv = {} |
||
| 113 | resolutions = [ |
||
| 114 | "1 min", |
||
| 115 | "5 min", |
||
| 116 | "10 min", |
||
| 117 | "15 min", |
||
| 118 | "30 min", |
||
| 119 | "1 h", |
||
| 120 | "2 h", |
||
| 121 | "3 h", |
||
| 122 | "6 h", |
||
| 123 | ] |
||
| 124 | |||
| 125 | for resolution in resolutions: |
||
| 126 | p_pv[resolution] = df_energy["PV (W)"].resample(resolution).mean() |
||
| 127 | plt.plot( |
||
| 128 | np.linspace(0, 8760, len(p_pv[resolution])), |
||
| 129 | sorted(p_pv[resolution] / 1e3)[::-1], |
||
| 130 | label=resolution, |
||
| 131 | ) |
||
| 132 | |||
| 133 | plt.xlim(-10, 510) |
||
| 134 | plt.ylim(7, 16) |
||
| 135 | plt.legend() |
||
| 136 | plt.show() |
||
| 137 | |||
| 138 | return df_temperature, df_energy |
||
| 139 | |||
| 143 |