shared.prepare_input_data() - Code Metrics - Inspection of "Examples timeseries retcon paper" - oemof/oemof-solph - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — dev (#1226)

by Patrik

created 2026-01-02 18:59 UTC

shared.prepare_input_data() B

↳ Parent: shared

Complexity

Conditions

Size

Total Lines	112
Code Lines	69

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	69
dl	0
loc	112
rs	8.0145
c	0
b	0
f	0
cc	3
nop	0

How to fix Long Method

"""
SPDX-FileCopyrightText: Patrik Schönfeldt
SPDX-FileCopyrightText: DLR e.V.

SPDX-License-Identifier: MIT
"""

from pathlib import Path

import demandlib
import pandas as pd
import numpy as np
from urllib.request import urlretrieve
from workalendar.europe import Germany


def prepare_input_data():
    url_temperature = (
        "https://oemof.org/wp-content/uploads/2025/12/temperature.csv"
    )
    url_energy = "https://oemof.org/wp-content/uploads/2025/12/energy.csv"

    print(
        "Data is licensed from M. Schlemminger, T. Ohrdes, E. Schneider,"
        " and M. Knoop. Under Creative Commons Attribution 4.0 International"
        " License. It is also available at doi: 10.5281/zenodo.5642902."
        " (We use single family home 26 plus the south-facing PV"
        " from that dataset.)"
    )

    file_path = Path(__file__).parent

    temperature_file = Path(file_path, "temperature.csv")
    if not temperature_file.exists():
        urlretrieve(url_temperature, temperature_file)
    temperature = pd.read_csv(
        temperature_file,
        index_col="Unix Epoch",
    )
    timedelta = np.empty(len(temperature))
    timedelta[:-1] = (temperature.index[1:] - temperature.index[:-1]) / 3600
    timedelta[-1] = np.nan

    temperature.index = pd.to_datetime(
        temperature.index,
        unit="s",
        utc=True,
    )

    building_area = 120  # m² (from publication)
    specific_heat_demand = 60  #  kWh/m²/a  (educated guess)
    holidays = dict(Germany().holidays(2019))

    # We estimate the heat demand from the ambient temperature using demandlib.
    # This returns energy per time step in units of kWh.
    temperature["heat demand (kWh)"] = demandlib.bdew.HeatBuilding(
        temperature.index,
        holidays=holidays,
        temperature=temperature["Air Temperature (°C)"],
        shlp_type="EFH",
        building_class=1,
        wind_class=1,
        annual_heat_demand=building_area * specific_heat_demand,
        name="EFH",
    ).get_bdew_profile()

    temperature["heat demand (kW)"] = (
        temperature["heat demand (kWh)"] / timedelta
    )

    energy_file = Path(file_path, "energy.csv")
    if not energy_file.exists():
        urlretrieve(url_energy, energy_file)

    energy = pd.read_csv(
        energy_file,
        index_col=0,
    )
    energy.index = pd.to_datetime(
        energy.index,
        unit="s",
        utc=True,
    )

    energy[energy == np.inf] = np.nan
    # ToDo: Auf 1 Minuten samplen und Nan-Werte interpolieren (linear)
    #  Daten in W
    #  demand ist absolut
    #  COP einfügen
    #  Mobilitätszeitreihe, die zu den Daten passt.
    #  Zeitstempel beachten ohne Offset!

    energy = (
        energy.resample("1 min")
        .mean()
    )
    temperature[temperature == np.inf] = np.nan
    temperature = (
        temperature[10:].resample("1 min")
        .mean()
    )
    df = pd.concat([energy, temperature], axis=1)
    df = df.interpolate()

    # **************** COP calculation **********************************
    t_supply = 60
    efficiency = 0.5  # source?
    cop_max = 7  # source???

    cop_hp = (t_supply + 273.15 * efficiency) / (
        t_supply - df["Air Temperature (°C)"]
    )
    cop_hp.loc[cop_hp > cop_max] = cop_max

    df["cop"] = cop_hp

    df["PV (kW/kWp)"] = df["PV (W)"] / 14.5e3  # Wp from publication

    df["electricity demand (W)"] /= 1000
    df.rename(
        columns={"electricity demand (W)": "electricity demand (kW)"},
        inplace=True,
    )

    # drop colums that are no longer useful
    df.drop(columns=["PV (W)", "heat demand (kWh)"], inplace=True)

    return df


if __name__ == "__main__":
    print(prepare_input_data())


1			"""
2			SPDX-FileCopyrightText: Patrik Schönfeldt
3			SPDX-FileCopyrightText: DLR e.V.
4
5			SPDX-License-Identifier: MIT
6			"""
7
8			from pathlib import Path
9
10			import demandlib
11			import pandas as pd
12			import numpy as np
13			from urllib.request import urlretrieve
14			from workalendar.europe import Germany
15
16
17			def prepare_input_data():
18			url_temperature = (
19			"https://oemof.org/wp-content/uploads/2025/12/temperature.csv"
20			)
21			url_energy = "https://oemof.org/wp-content/uploads/2025/12/energy.csv"
22
23			print(
24			"Data is licensed from M. Schlemminger, T. Ohrdes, E. Schneider,"
25			" and M. Knoop. Under Creative Commons Attribution 4.0 International"
26			" License. It is also available at doi: 10.5281/zenodo.5642902."
27			" (We use single family home 26 plus the south-facing PV"
28			" from that dataset.)"
29			)
30
31			file_path = Path(__file__).parent
32
33			temperature_file = Path(file_path, "temperature.csv")
34			if not temperature_file.exists():
35			urlretrieve(url_temperature, temperature_file)
36			temperature = pd.read_csv(
37			temperature_file,
38			index_col="Unix Epoch",
39			)
40			timedelta = np.empty(len(temperature))
41			timedelta[:-1] = (temperature.index[1:] - temperature.index[:-1]) / 3600
42			timedelta[-1] = np.nan
43
44			temperature.index = pd.to_datetime(
45			temperature.index,
46			unit="s",
47			utc=True,
48			)
49
50			building_area = 120 # m² (from publication)
51			specific_heat_demand = 60 # kWh/m²/a (educated guess)
52			holidays = dict(Germany().holidays(2019))
53
54			# We estimate the heat demand from the ambient temperature using demandlib.
55			# This returns energy per time step in units of kWh.
56			temperature["heat demand (kWh)"] = demandlib.bdew.HeatBuilding(
57			temperature.index,
58			holidays=holidays,
59			temperature=temperature["Air Temperature (°C)"],
60			shlp_type="EFH",
61			building_class=1,
62			wind_class=1,
63			annual_heat_demand=building_area * specific_heat_demand,
64			name="EFH",
65			).get_bdew_profile()
66
67			temperature["heat demand (kW)"] = (
68			temperature["heat demand (kWh)"] / timedelta
69			)
70
71			energy_file = Path(file_path, "energy.csv")
72			if not energy_file.exists():
73			urlretrieve(url_energy, energy_file)
74
75			energy = pd.read_csv(
76			energy_file,
77			index_col=0,
78			)
79			energy.index = pd.to_datetime(
80			energy.index,
81			unit="s",
82			utc=True,
83			)
84
85			energy[energy == np.inf] = np.nan
86			# ToDo: Auf 1 Minuten samplen und Nan-Werte interpolieren (linear)
87			# Daten in W
88			# demand ist absolut
89			# COP einfügen
90			# Mobilitätszeitreihe, die zu den Daten passt.
91			# Zeitstempel beachten ohne Offset!
92
93			energy = (
94			energy.resample("1 min")
95			.mean()
96			)
97			temperature[temperature == np.inf] = np.nan
98			temperature = (
99			temperature[10:].resample("1 min")
100			.mean()
101			)
102			df = pd.concat([energy, temperature], axis=1)
103			df = df.interpolate()
104
105			# ************** COP calculation ********************************
106			t_supply = 60
107			efficiency = 0.5 # source?
108			cop_max = 7 # source???
109
110			cop_hp = (t_supply + 273.15 * efficiency) / (
111			t_supply - df["Air Temperature (°C)"]
112			)
113			cop_hp.loc[cop_hp > cop_max] = cop_max
114
115			df["cop"] = cop_hp
116
117			df["PV (kW/kWp)"] = df["PV (W)"] / 14.5e3 # Wp from publication
118
119			df["electricity demand (W)"] /= 1000
120			df.rename(
121			columns={"electricity demand (W)": "electricity demand (kW)"},
122			inplace=True,
123			)
124
125			# drop colums that are no longer useful
126			df.drop(columns=["PV (W)", "heat demand (kWh)"], inplace=True)
127
128			return df
129
130
131			if __name__ == "__main__":
132			print(prepare_input_data())
133

oemof / oemof-solph

Pull Request — dev (#1226)

shared.prepare_input_data() B

Complexity

Size

Duplication

Importance

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like