Passed
Pull Request — dev (#1226)
by
unknown
01:59
created

shared.prepare_input_data()   B

Complexity

Conditions 5

Size

Total Lines 121
Code Lines 77

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 77
dl 0
loc 121
rs 7.286
c 0
b 0
f 0
cc 5
nop 1

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
"""
2
SPDX-FileCopyrightText: Patrik Schönfeldt
3
SPDX-FileCopyrightText: DLR e.V.
4
5
SPDX-License-Identifier: MIT
6
"""
7
8
from pathlib import Path
9
from urllib.request import urlretrieve
10
11
import demandlib
12
import matplotlib.pyplot as plt
13
import numpy as np
14
import pandas as pd
15
from workalendar.europe import Germany
16
17
18
def prepare_input_data(plot_resampling=False):
19
    url_temperature = (
20
        "https://oemof.org/wp-content/uploads/2025/12/temperature.csv"
21
    )
22
    url_energy = "https://oemof.org/wp-content/uploads/2025/12/energy.csv"
23
24
    print(
25
        "Data is licensed from M. Schlemminger, T. Ohrdes, E. Schneider,"
26
        " and M. Knoop. Under Creative Commons Attribution 4.0 International"
27
        " License. It is also available at doi: 10.5281/zenodo.5642902."
28
        " (We use single family home 26 plus the south-facing PV"
29
        " from that dataset.)"
30
    )
31
32
    file_path = Path(__file__).parent
33
34
    temperature_file = Path(file_path, "temperature.csv")
35
    if not temperature_file.exists():
36
        urlretrieve(url_temperature, temperature_file)
37
    df_temperature = pd.read_csv(
38
        temperature_file,
39
        index_col="Unix Epoch",
40
    )
41
42
    df_temperature.index = pd.to_datetime(
43
        df_temperature.index,
44
        unit="s",
45
        utc=True,
46
    )
47
48
    # ----- clean up data --------------------------------------------------------------
49
    # 1) Duplikate durch Mittelwert ersetzen
50
    df_temperature = df_temperature.groupby(df_temperature.index).mean()
51
52
    # 2) Regulären 5-Minuten-Index erzeugen (Zeitzone erhalten)
53
    tz = df_temperature.index.tz
54
    full_idx = pd.date_range(
55
        start=df_temperature.index.min(),
56
        end=df_temperature.index.max(),
57
        freq="5min",
58
        tz=tz,
59
    )
60
61
    # 3) Auf 5-Minuten-Raster reindizieren -> Lücken werden NaN
62
    df_regular = df_temperature.reindex(full_idx)
63
64
    # 4) Zeitbasierte Interpolation nur für numerische Spalten
65
    num_cols = df_regular.select_dtypes(include="number").columns
66
67
    # Interpolation (zeitbasiert: berücksichtigt die Zeitabstände im Index)
68
    df_regular[num_cols] = df_regular[num_cols].interpolate(method="time")
69
70
    # 5) Ränder ohne beidseitige Nachbarn per ffill/bfill schließen
71
    df_regular[num_cols] = df_regular[num_cols].ffill().bfill()
72
73
    df_temperature = df_regular
74
75
    # -------------------------------------------
76
77
    building_area = 120  # m² (from publication)
78
    specific_heat_demand = 60  #  kWh/m²/a  (educated guess)
79
    holidays = dict(Germany().holidays(2019))
80
81
    # We estimate the heat demand from the ambient temperature using demandlib.
82
    # This returns energy per time step in units of kWh.
83
    df_temperature["heat demand (kWh)"] = demandlib.bdew.HeatBuilding(
84
        df_temperature.index,
85
        holidays=holidays,
86
        temperature=df_temperature["Air Temperature (°C)"],
87
        shlp_type="EFH",
88
        building_class=1,
89
        wind_class=1,
90
        annual_heat_demand=building_area * specific_heat_demand,
91
        name="EFH",
92
    ).get_bdew_profile()
93
94
    df_temperature["heat demand (W)"] = (
95
        df_temperature["heat demand (kWh)"] * 1e3 / (5 / 60)
96
    )
97
98
    energy_file = Path(file_path, "energy.csv")
99
    if not energy_file.exists():
100
        urlretrieve(url_energy, energy_file)
101
    df_energy = pd.read_csv(
102
        energy_file,
103
        index_col=0,
104
    )
105
    df_energy.index = pd.to_datetime(
106
        df_energy.index,
107
        unit="s",
108
        utc=True,
109
    )
110
111
    if plot_resampling:
112
        p_pv = {}
113
        resolutions = [
114
            "1 min",
115
            "5 min",
116
            "10 min",
117
            "15 min",
118
            "30 min",
119
            "1 h",
120
            "2 h",
121
            "3 h",
122
            "6 h",
123
        ]
124
125
        for resolution in resolutions:
126
            p_pv[resolution] = df_energy["PV (W)"].resample(resolution).mean()
127
            plt.plot(
128
                np.linspace(0, 8760, len(p_pv[resolution])),
129
                sorted(p_pv[resolution] / 1e3)[::-1],
130
                label=resolution,
131
            )
132
133
        plt.xlim(-10, 510)
134
        plt.ylim(7, 16)
135
        plt.legend()
136
        plt.show()
137
138
    return df_temperature, df_energy
139
140
141
if __name__ == "__main__":
142
    prepare_input_data(plot_resampling=True)
143