shared.prepare_input_data() - Code Metrics - Inspection of "Examples timeseries retcon paper" - oemof/oemof-solph - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — dev (#1226)

by Patrik

created 2026-01-05 08:39 UTC

shared.prepare_input_data() B

↳ Parent: shared

Complexity

Conditions

Size

Total Lines	105
Code Lines	66

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	66
dl	0
loc	105
rs	8.1127
c	0
b	0
f	0
cc	3
nop	0

How to fix Long Method

"""
SPDX-FileCopyrightText: Patrik Schönfeldt
SPDX-FileCopyrightText: DLR e.V.

SPDX-License-Identifier: MIT
"""

import datetime
from pathlib import Path

import demandlib
import pandas as pd
import numpy as np
from urllib.request import urlretrieve
from workalendar.europe import Germany


def prepare_input_data():
    # ToDo: Mobilitätszeitreihe, die zu den Daten passt.

    url_temperature = (
        "https://oemof.org/wp-content/uploads/2025/12/temperature.csv"
    )
    url_energy = "https://oemof.org/wp-content/uploads/2026/01/energy.csv"

    print(
        "Data is licensed from M. Schlemminger, T. Ohrdes, E. Schneider,"
        " and M. Knoop. Under Creative Commons Attribution 4.0 International"
        " License. It is also available at doi: 10.5281/zenodo.5642902."
        " (We use building 27 plus the south-facing PV"
        " from that dataset.)"
    )

    file_path = Path(__file__).parent

    def _temperature_dataframe():
        temperature_file = Path(file_path, "temperature.csv")
        if not temperature_file.exists():
            urlretrieve(url_temperature, temperature_file)
        temperature = pd.read_csv(
            temperature_file,
            index_col="Unix Epoch",
        )

        temperature.index = pd.to_datetime(
            temperature.index,
            unit="s",
            utc=True,
        )

        temperature[temperature == np.inf] = np.nan
        temperature = temperature[10:].resample("1 min").mean()
        return temperature

    def _energy_dataframe():
        energy_file = Path(file_path, "energy.csv")
        if not energy_file.exists():
            urlretrieve(url_energy, energy_file)

        energy = pd.read_csv(
            energy_file,
            index_col=0,
        )
        energy.index = pd.to_datetime(
            energy.index,
            unit="s",
            utc=True,
        )

        energy[energy == np.inf] = np.nan

        energy = (
            energy.resample("1 min")
            .mean()
        )
        return energy

    df = pd.concat([_energy_dataframe(), _temperature_dataframe()], axis=1)

    df = df.interpolate()

    building_area = 110  # m² (from publication)
    specific_heat_demand = 60  #  kWh/m²/a  (educated guess)
    holidays = dict(Germany().holidays(2019))

    # We estimate the heat demand from the ambient temperature using demandlib.
    # This returns energy per time step in units of kWh, but we want kW.
    df["heat demand (kW)"] = demandlib.bdew.HeatBuilding(
        df.index,
        holidays=holidays,
        temperature=df["Air Temperature (°C)"],
        shlp_type="EFH",
        building_class=1,
        wind_class=1,
        annual_heat_demand=building_area * specific_heat_demand,
        name="EFH",
    ).get_bdew_profile() * 60

    # **************** COP calculation **********************************
    t_supply = 60
    efficiency = 0.5  # source?
    cop_max = 7  # source???

    cop_hp = (t_supply + 273.15 * efficiency) / (
        t_supply - df["Air Temperature (°C)"]
    )
    cop_hp.loc[cop_hp > cop_max] = cop_max

    df["cop"] = cop_hp

    df["PV (kW/kWp)"] = df["P_PV (W)"] / 14.5e3  # Wp from publication

    df["P_tot27 (W)"] /= 1000
    df.rename(
        columns={"P_tot27 (W)": "electricity demand (kW)"},
        inplace=True,
    )

    # drop colums that are no longer useful
    df.drop(columns=["P_PV (W)"], inplace=True)

    return df


if __name__ == "__main__":
    import matplotlib.pyplot as plt

    df =prepare_input_data()

    # plt.plot(df["electricity demand (kW)"], "k")

    p_pv = {}
    resolutions = [
        "1 min",
    #    "5 min",
    #    "10 min",
        "15 min",
    #    "30 min",
        "1 h",
    #    "2 h",
        "3 h",
    #    "6 h",
    ]

    fig0, ax0 = plt.subplots(figsize=(4, 2), tight_layout=True)
    fig1, ax1 = plt.subplots(figsize=(4, 2), tight_layout=True)

    for resolution in resolutions[::-1]:
        time_series = 15.4 * df["PV (kW/kWp)"].resample(resolution).mean()
        # plt.plot(
        #    np.linspace(0, 8760, len(p_pv[resolution])),
        #    sorted(p_pv[resolution])[::-1],
        #    label=resolution,
        # )

        time_series = time_series[
            datetime.datetime(2019, 11, 3, 0, tzinfo=datetime.timezone.utc)
            : datetime.datetime(2019, 11, 4, 0, tzinfo=datetime.timezone.utc)
        ]
        hour_axis = np.linspace(0, 24, num=len(time_series))
        ax0.step(
            x=hour_axis,
            y=time_series,
            label=resolution,# + f" ({len(time_series)} steps)",
            where="post",
        )
        ax1.step(
            x=hour_axis,
            y=sorted(time_series)[::-1],
            label=resolution,# + f" ({len(time_series)} steps)",
            where="post",
        )

        p_pv[resolution] = time_series

    ax0.set_xlim(5.9, 18.1)
    ax0.set_xlabel("Time (UTC)")
    ax0.set_ylabel("Power (kW)")
    ax0.legend()
    ax0.grid()
    fig0.savefig("2019-11-3_PV-timeseries.eps")
    fig0.savefig("2019-11-3_PV-timeseries.pdf")

    ax1.grid()
    ax1.set_xlim(-0.1, 12.1)
    ax1.set_xlabel("Duration (h)")
    ax1.set_ylabel("Power (kW)")
    ax1.set_yscale("log")
    #ax1.legend()
    fig1.savefig("2019-11-3_PV-duration.eps")
    fig1.savefig("2019-11-3_PV-duration.pdf")

    plt.show()


1			"""
2			SPDX-FileCopyrightText: Patrik Schönfeldt
3			SPDX-FileCopyrightText: DLR e.V.
4
5			SPDX-License-Identifier: MIT
6			"""
7
8			import datetime
9			from pathlib import Path
10
11			import demandlib
12			import pandas as pd
13			import numpy as np
14			from urllib.request import urlretrieve
15			from workalendar.europe import Germany
16
17
18			def prepare_input_data():
19			# ToDo: Mobilitätszeitreihe, die zu den Daten passt.
20
21			url_temperature = (
22			"https://oemof.org/wp-content/uploads/2025/12/temperature.csv"
23			)
24			url_energy = "https://oemof.org/wp-content/uploads/2026/01/energy.csv"
25
26			print(
27			"Data is licensed from M. Schlemminger, T. Ohrdes, E. Schneider,"
28			" and M. Knoop. Under Creative Commons Attribution 4.0 International"
29			" License. It is also available at doi: 10.5281/zenodo.5642902."
30			" (We use building 27 plus the south-facing PV"
31			" from that dataset.)"
32			)
33
34			file_path = Path(__file__).parent
35
36			def _temperature_dataframe():
37			temperature_file = Path(file_path, "temperature.csv")
38			if not temperature_file.exists():
39			urlretrieve(url_temperature, temperature_file)
40			temperature = pd.read_csv(
41			temperature_file,
42			index_col="Unix Epoch",
43			)
44
45			temperature.index = pd.to_datetime(
46			temperature.index,
47			unit="s",
48			utc=True,
49			)
50
51			temperature[temperature == np.inf] = np.nan
52			temperature = temperature[10:].resample("1 min").mean()
53			return temperature
54
55			def _energy_dataframe():
56			energy_file = Path(file_path, "energy.csv")
57			if not energy_file.exists():
58			urlretrieve(url_energy, energy_file)
59
60			energy = pd.read_csv(
61			energy_file,
62			index_col=0,
63			)
64			energy.index = pd.to_datetime(
65			energy.index,
66			unit="s",
67			utc=True,
68			)
69
70			energy[energy == np.inf] = np.nan
71
72			energy = (
73			energy.resample("1 min")
74			.mean()
75			)
76			return energy
77
78			df = pd.concat([_energy_dataframe(), _temperature_dataframe()], axis=1)
79
80			df = df.interpolate()
81
82			building_area = 110 # m² (from publication)
83			specific_heat_demand = 60 # kWh/m²/a (educated guess)
84			holidays = dict(Germany().holidays(2019))
85
86			# We estimate the heat demand from the ambient temperature using demandlib.
87			# This returns energy per time step in units of kWh, but we want kW.
88			df["heat demand (kW)"] = demandlib.bdew.HeatBuilding(
89			df.index,
90			holidays=holidays,
91			temperature=df["Air Temperature (°C)"],
92			shlp_type="EFH",
93			building_class=1,
94			wind_class=1,
95			annual_heat_demand=building_area * specific_heat_demand,
96			name="EFH",
97			).get_bdew_profile() * 60
98
99			# ************** COP calculation ********************************
100			t_supply = 60
101			efficiency = 0.5 # source?
102			cop_max = 7 # source???
103
104			cop_hp = (t_supply + 273.15 * efficiency) / (
105			t_supply - df["Air Temperature (°C)"]
106			)
107			cop_hp.loc[cop_hp > cop_max] = cop_max
108
109			df["cop"] = cop_hp
110
111			df["PV (kW/kWp)"] = df["P_PV (W)"] / 14.5e3 # Wp from publication
112
113			df["P_tot27 (W)"] /= 1000
114			df.rename(
115			columns={"P_tot27 (W)": "electricity demand (kW)"},
116			inplace=True,
117			)
118
119			# drop colums that are no longer useful
120			df.drop(columns=["P_PV (W)"], inplace=True)
121
122			return df
123
124
125			if __name__ == "__main__":
126			import matplotlib.pyplot as plt
127
128			df =prepare_input_data()
129
130			# plt.plot(df["electricity demand (kW)"], "k")
131
132			p_pv = {}
133			resolutions = [
134			"1 min",
135			# "5 min",
136			# "10 min",
137			"15 min",
138			# "30 min",
139			"1 h",
140			# "2 h",
141			"3 h",
142			# "6 h",
143			]
144
145			fig0, ax0 = plt.subplots(figsize=(4, 2), tight_layout=True)
146			fig1, ax1 = plt.subplots(figsize=(4, 2), tight_layout=True)
147
148			for resolution in resolutions[::-1]:
149			time_series = 15.4 * df["PV (kW/kWp)"].resample(resolution).mean()
150			# plt.plot(
151			# np.linspace(0, 8760, len(p_pv[resolution])),
152			# sorted(p_pv[resolution])[::-1],
153			# label=resolution,
154			# )
155
156			time_series = time_series[
157			datetime.datetime(2019, 11, 3, 0, tzinfo=datetime.timezone.utc)
158			: datetime.datetime(2019, 11, 4, 0, tzinfo=datetime.timezone.utc)
159			]
160			hour_axis = np.linspace(0, 24, num=len(time_series))
161			ax0.step(
162			x=hour_axis,
163			y=time_series,
164			label=resolution,# + f" ({len(time_series)} steps)",
165			where="post",
166			)
167			ax1.step(
168			x=hour_axis,
169			y=sorted(time_series)[::-1],
170			label=resolution,# + f" ({len(time_series)} steps)",
171			where="post",
172			)
173
174			p_pv[resolution] = time_series
175
176			ax0.set_xlim(5.9, 18.1)
177			ax0.set_xlabel("Time (UTC)")
178			ax0.set_ylabel("Power (kW)")
179			ax0.legend()
180			ax0.grid()
181			fig0.savefig("2019-11-3_PV-timeseries.eps")
182			fig0.savefig("2019-11-3_PV-timeseries.pdf")
183
184			ax1.grid()
185			ax1.set_xlim(-0.1, 12.1)
186			ax1.set_xlabel("Duration (h)")
187			ax1.set_ylabel("Power (kW)")
188			ax1.set_yscale("log")
189			#ax1.legend()
190			fig1.savefig("2019-11-3_PV-duration.eps")
191			fig1.savefig("2019-11-3_PV-duration.pdf")
192
193			plt.show()
194

oemof / oemof-solph

Pull Request — dev (#1226)

shared.prepare_input_data() B

Complexity

Size

Duplication

Importance

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like