Passed
Pull Request — dev (#1226)
by Patrik
01:47
created

shared.prepare_input_data()   B

Complexity

Conditions 3

Size

Total Lines 112
Code Lines 69

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 69
dl 0
loc 112
rs 8.0145
c 0
b 0
f 0
cc 3
nop 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
"""
2
SPDX-FileCopyrightText: Patrik Schönfeldt
3
SPDX-FileCopyrightText: DLR e.V.
4
5
SPDX-License-Identifier: MIT
6
"""
7
8
from pathlib import Path
9
10
import demandlib
11
import pandas as pd
12
import numpy as np
13
from urllib.request import urlretrieve
14
from workalendar.europe import Germany
15
16
17
def prepare_input_data():
18
    url_temperature = (
19
        "https://oemof.org/wp-content/uploads/2025/12/temperature.csv"
20
    )
21
    url_energy = "https://oemof.org/wp-content/uploads/2025/12/energy.csv"
22
23
    print(
24
        "Data is licensed from M. Schlemminger, T. Ohrdes, E. Schneider,"
25
        " and M. Knoop. Under Creative Commons Attribution 4.0 International"
26
        " License. It is also available at doi: 10.5281/zenodo.5642902."
27
        " (We use single family home 26 plus the south-facing PV"
28
        " from that dataset.)"
29
    )
30
31
    file_path = Path(__file__).parent
32
33
    temperature_file = Path(file_path, "temperature.csv")
34
    if not temperature_file.exists():
35
        urlretrieve(url_temperature, temperature_file)
36
    temperature = pd.read_csv(
37
        temperature_file,
38
        index_col="Unix Epoch",
39
    )
40
    timedelta = np.empty(len(temperature))
41
    timedelta[:-1] = (temperature.index[1:] - temperature.index[:-1]) / 3600
42
    timedelta[-1] = np.nan
43
44
    temperature.index = pd.to_datetime(
45
        temperature.index,
46
        unit="s",
47
        utc=True,
48
    )
49
50
    building_area = 120  # m² (from publication)
51
    specific_heat_demand = 60  #  kWh/m²/a  (educated guess)
52
    holidays = dict(Germany().holidays(2019))
53
54
    # We estimate the heat demand from the ambient temperature using demandlib.
55
    # This returns energy per time step in units of kWh.
56
    temperature["heat demand (kWh)"] = demandlib.bdew.HeatBuilding(
57
        temperature.index,
58
        holidays=holidays,
59
        temperature=temperature["Air Temperature (°C)"],
60
        shlp_type="EFH",
61
        building_class=1,
62
        wind_class=1,
63
        annual_heat_demand=building_area * specific_heat_demand,
64
        name="EFH",
65
    ).get_bdew_profile()
66
67
    temperature["heat demand (kW)"] = (
68
        temperature["heat demand (kWh)"] / timedelta
69
    )
70
71
    energy_file = Path(file_path, "energy.csv")
72
    if not energy_file.exists():
73
        urlretrieve(url_energy, energy_file)
74
75
    energy = pd.read_csv(
76
        energy_file,
77
        index_col=0,
78
    )
79
    energy.index = pd.to_datetime(
80
        energy.index,
81
        unit="s",
82
        utc=True,
83
    )
84
85
    energy[energy == np.inf] = np.nan
86
    # ToDo: Auf 1 Minuten samplen und Nan-Werte interpolieren (linear)
87
    #  Daten in W
88
    #  demand ist absolut
89
    #  COP einfügen
90
    #  Mobilitätszeitreihe, die zu den Daten passt.
91
    #  Zeitstempel beachten ohne Offset!
92
93
    energy = (
94
        energy.resample("1 min")
95
        .mean()
96
    )
97
    temperature[temperature == np.inf] = np.nan
98
    temperature = (
99
        temperature[10:].resample("1 min")
100
        .mean()
101
    )
102
    df = pd.concat([energy, temperature], axis=1)
103
    df = df.interpolate()
104
105
    # **************** COP calculation **********************************
106
    t_supply = 60
107
    efficiency = 0.5  # source?
108
    cop_max = 7  # source???
109
110
    cop_hp = (t_supply + 273.15 * efficiency) / (
111
        t_supply - df["Air Temperature (°C)"]
112
    )
113
    cop_hp.loc[cop_hp > cop_max] = cop_max
114
115
    df["cop"] = cop_hp
116
117
    df["PV (kW/kWp)"] = df["PV (W)"] / 14.5e3  # Wp from publication
118
119
    df["electricity demand (W)"] /= 1000
120
    df.rename(
121
        columns={"electricity demand (W)": "electricity demand (kW)"},
122
        inplace=True,
123
    )
124
125
    # drop colums that are no longer useful
126
    df.drop(columns=["PV (W)", "heat demand (kWh)"], inplace=True)
127
128
    return df
129
130
131
if __name__ == "__main__":
132
    print(prepare_input_data())
133