data.datasets.industry.temporal.calc_load_curves_ind_sites() - Code Metrics - Inspection of "Features/#1048 desagg dsm time series" - openego/eGon-data - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — dev (#1052)

unknown

created 2022-12-22 14:52 UTC

calc_load_curves_ind_sites() B

↳ Parent: data.datasets.industry.temporal

Complexity

Conditions

Size

Total Lines	102
Code Lines	47

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	47
dl	0
loc	102
rs	8.7345
c	0
b	0
f	0
cc	2
nop	1

How to fix Long Method

"""The central module containing all code dealing with processing
timeseries data using demandregio

"""

from sqlalchemy.ext.declarative import declarative_base
import geopandas as gpd
import numpy as np
import pandas as pd

from egon.data import db
from egon.data.datasets.electricity_demand.temporal import calc_load_curve
import egon.data.config

Base = declarative_base()


def identify_voltage_level(df):

    """Identify the voltage_level of a grid component based on its peak load
    and defined thresholds.


    Parameters
    ----------
    df : pandas.DataFrame
        Data frame containing information about peak loads


    Returns
    -------
    pandas.DataFrame
        Data frame with an additional column with voltage level

    """

    df["voltage_level"] = np.nan

    # Identify voltage_level for every demand area taking thresholds into
    # account which were defined in the eGon project
    df.loc[df["peak_load"] <= 0.1, "voltage_level"] = 7
    df.loc[df["peak_load"] > 0.1, "voltage_level"] = 6
    df.loc[df["peak_load"] > 0.2, "voltage_level"] = 5
    df.loc[df["peak_load"] > 5.5, "voltage_level"] = 4
    df.loc[df["peak_load"] > 20, "voltage_level"] = 3
    df.loc[df["peak_load"] > 120, "voltage_level"] = 1

    return df


def identify_bus(load_curves, demand_area):
    """Identify the grid connection point for a consumer by determining its
    grid level based on the time series' peak load and the spatial
    intersection to mv grid districts or ehv voronoi cells.


    Parameters
    ----------
    load_curves : pandas.DataFrame
        Demand timeseries per demand area (e.g. osm landuse area, industrial
        site)

    demand_area: pandas.DataFrame
        Dataframe with id and geometry of areas where an industrial demand
        is assigned to, such as osm landuse areas or industrial sites.

    Returns
    -------
    pandas.DataFrame
        Aggregated industrial demand timeseries per bus

    """

    sources = egon.data.config.datasets()["electrical_load_curves_industry"][
        "sources"
    ]

    # Select mv griddistrict
    griddistrict = db.select_geodataframe(
        f"""SELECT bus_id, geom FROM
                {sources['egon_mv_grid_district']['schema']}.
                {sources['egon_mv_grid_district']['table']}""",
        geom_col="geom",
        epsg=3035,
    )

    # Initialize dataframe to identify peak load per demand area (e.g. osm
    # landuse area or industrial site)
    peak = pd.DataFrame(columns=["id", "peak_load"])
    peak["id"] = load_curves.max(axis=0).index
    peak["peak_load"] = load_curves.max(axis=0).values

    peak = identify_voltage_level(peak)

    # Assign bus_id to demand area by merging landuse and peak df
    peak = pd.merge(demand_area, peak, right_on="id", left_index=True)

    # Identify all demand areas connected to HVMV buses
    peak_hv = peak[peak["voltage_level"] > 1]

    # Perform a spatial join between the centroid of the demand area and mv
    # grid districts to identify grid connection point
    peak_hv["centroid"] = peak_hv["geom"].centroid
    peak_hv = peak_hv.set_geometry("centroid")
    peak_hv_c = gpd.sjoin(peak_hv, griddistrict, how="inner", op="intersects")

    # Perform a spatial join between the polygon of the demand area  and mv
    # grid districts to ensure every area got assign to a bus
    peak_hv_p = peak_hv[~peak_hv.isin(peak_hv_c)].dropna().set_geometry("geom")
    peak_hv_p = gpd.sjoin(
        peak_hv_p, griddistrict, how="inner", op="intersects"
    ).drop_duplicates(subset=["id"])

    # Bring both dataframes together
    peak_bus = peak_hv_c.append(peak_hv_p, ignore_index=True)

    # Select ehv voronoi
    ehv_voronoi = db.select_geodataframe(
        f"""SELECT bus_id, geom FROM
                {sources['egon_mv_grid_district']['schema']}.
                {sources['egon_mv_grid_district']['table']}""",
        geom_col="geom",
        epsg=3035,
    )

    # Identify all demand areas connected to EHV buses
    peak_ehv = peak[peak["voltage_level"] == 1]

    # Perform a spatial join between the centroid of the demand area and ehv
    # voronoi to identify grid connection point
    peak_ehv["centroid"] = peak_ehv["geom"].centroid
    peak_ehv = peak_ehv.set_geometry("centroid")
    peak_ehv = gpd.sjoin(peak_ehv, ehv_voronoi, how="inner", op="intersects")

    # Bring both dataframes together
    peak_bus = peak_bus.append(peak_ehv, ignore_index=True)

    # Combine dataframes to bring loadcurves and bus id together
    curves_da = pd.merge(
        load_curves.T,
        peak_bus[["bus_id", "id", "geom"]],
        left_index=True,
        right_on="id",
    )

    return curves_da


def calc_load_curves_ind_osm(scenario):
    """Temporal disaggregate electrical demand per osm industrial landuse
    area.


    Parameters
    ----------
    scenario : str
        Scenario name.

    Returns
    -------
    pandas.DataFrame
        Demand timeseries of industry allocated to osm landuse areas and
        aggregated per substation id

    """

    sources = egon.data.config.datasets()["electrical_load_curves_industry"][
        "sources"
    ]

    # Select demands per industrial branch and osm landuse area
    demands_osm_area = db.select_dataframe(
        f"""SELECT osm_id, wz, demand
            FROM {sources['osm']['schema']}.
            {sources['osm']['table']}
            WHERE scenario = '{scenario}'
            AND demand > 0
            """
    ).set_index(["osm_id", "wz"])

    # Select industrial landuse polygons as demand area
    demand_area = db.select_geodataframe(
        f"""SELECT id, geom FROM
                {sources['osm_landuse']['schema']}.
                {sources['osm_landuse']['table']}
                WHERE sector = 3 """,
        index_col="id",
        geom_col="geom",
        epsg=3035,
    )

    # Calculate shares of industrial branches per osm area
    osm_share_wz = demands_osm_area.groupby("osm_id").apply(
        lambda grp: grp / grp.sum()
    )

    osm_share_wz.reset_index(inplace=True)

    share_wz_transpose = pd.DataFrame(
        index=osm_share_wz.osm_id.unique(), columns=osm_share_wz.wz.unique()
    )
    share_wz_transpose.index.rename("osm_id", inplace=True)

    for wz in share_wz_transpose.columns:
        share_wz_transpose[wz] = (
            osm_share_wz[osm_share_wz.wz == wz].set_index("osm_id").demand
        )

    # Rename columns to bring it in line with demandregio data
    share_wz_transpose.rename(columns={1718: 17}, inplace=True)

    # Calculate industrial annual demand per osm area
    annual_demand_osm = demands_osm_area.groupby("osm_id").demand.sum()

    # Return electrical load curves per osm industrial landuse area
    load_curves = calc_load_curve(share_wz_transpose, annual_demand_osm)

    curves_da = identify_bus(load_curves, demand_area)

    # Group all load curves per bus
    curves_bus = (
        curves_da.drop(["id"], axis=1).fillna(0).groupby("bus_id").sum()
    )

    # Initalize pandas.DataFrame for export to database
    load_ts_df = pd.DataFrame(index=curves_bus.index, columns=["p_set"])

    # Insert time series data to df as an array
    load_ts_df.p_set = curves_bus.values.tolist()

    # Create Dataframe to store time series individually
    curves_individual_interim = (
        curves_da.drop(["bus_id", "geom"], axis=1).fillna(0)
    ).set_index("id")
    curves_individual = curves_da[["id", "bus_id"]]
    curves_individual["p_set"] = curves_individual_interim.values.tolist()
    curves_individual["scn_name"] = scenario
    curves_individual = curves_individual.rename(
        columns={"id": "osm_id"}
    ).set_index(["osm_id", "scn_name"])

    return load_ts_df, curves_individual


def insert_osm_ind_load():

    """Inserts electrical industry loads assigned to osm landuse areas to the
    database.

    Returns
    -------
    None.

    """

    targets = egon.data.config.datasets()["electrical_load_curves_industry"][
        "targets"
    ]

    for scenario in ["eGon2021", "eGon2035", "eGon100RE"]:

        # Delete existing data from database
        db.execute_sql(
            f"""
            DELETE FROM
            {targets['osm_load']['schema']}.{targets['osm_load']['table']}
            WHERE scn_name = '{scenario}'
            """
        )

        db.execute_sql(
            f"""
            DELETE FROM
            {targets['osm_load_individual']['schema']}.
            {targets['osm_load_individual']['table']}
            WHERE scn_name = '{scenario}'
            """
        )

        # Calculate cts load curves per mv substation (hvmv bus)
        data, curves_individual = calc_load_curves_ind_osm(scenario)
        data.index = data.index.rename("bus")
        data["scn_name"] = scenario

        data.set_index(["scn_name"], inplace=True, append=True)

        # Insert into database
        data.to_sql(
            targets["osm_load"]["table"],
            schema=targets["osm_load"]["schema"],
            con=db.engine(),
            if_exists="append",
        )

        curves_individual["peak_load"] = np.array(
            curves_individual["p_set"].values.tolist()
        ).max(axis=1)
        curves_individual["demand"] = np.array(
            curves_individual["p_set"].values.tolist()
        ).sum(axis=1)
        curves_individual = identify_voltage_level(curves_individual)

        curves_individual.to_sql(
            targets["osm_load_individual"]["table"],
            schema=targets["osm_load_individual"]["schema"],
            con=db.engine(),
            if_exists="append",
        )


def calc_load_curves_ind_sites(scenario):
    """Temporal disaggregation of load curves per industrial site and
    industrial subsector.


    Parameters
    ----------
    scenario : str
        Scenario name.

    Returns
    -------
    pandas.DataFrame
        Demand timeseries of industry allocated to industrial sites and
        aggregated per substation id and industrial subsector

    """
    sources = egon.data.config.datasets()["electrical_load_curves_industry"][
        "sources"
    ]

    # Select demands per industrial site including the subsector information
    demands_ind_sites = db.select_dataframe(
        f"""SELECT industrial_sites_id, wz, demand
            FROM {sources['sites']['schema']}.
            {sources['sites']['table']}
            WHERE scenario = '{scenario}'
            AND demand > 0
            """
    ).set_index(["industrial_sites_id"])

    # Select industrial sites as demand_areas from database

    demand_area = db.select_geodataframe(
        f"""SELECT id, geom FROM
                {sources['sites_geom']['schema']}.
                {sources['sites_geom']['table']}""",
        index_col="id",
        geom_col="geom",
        epsg=3035,
    )

    # Replace entries to bring it in line with demandregio's subsector
    # definitions
    demands_ind_sites.replace(1718, 17, inplace=True)
    share_wz_sites = demands_ind_sites.copy()

    # Create additional df on wz_share per industrial site, which is always
    # set to one as the industrial demand per site is subsector specific

    share_wz_sites.demand = 1
    share_wz_sites.reset_index(inplace=True)

    share_transpose = pd.DataFrame(
        index=share_wz_sites.industrial_sites_id.unique(),
        columns=share_wz_sites.wz.unique(),
    )
    share_transpose.index.rename("industrial_sites_id", inplace=True)
    for wz in share_transpose.columns:
        share_transpose[wz] = (
            share_wz_sites[share_wz_sites.wz == wz]
            .set_index("industrial_sites_id")
            .demand
        )

    load_curves = calc_load_curve(share_transpose, demands_ind_sites["demand"])

    curves_da = identify_bus(load_curves, demand_area)

    curves_da = pd.merge(
        curves_da, demands_ind_sites.wz, left_on="id", right_index=True
    )

    # Group all load curves per bus and wz
    curves_bus = (
        curves_da.fillna(0)
        .groupby(["bus_id", "wz"])
        .sum()
        .drop(["id"], axis=1)
    )

    # Initalize pandas.DataFrame for pf table load timeseries
    load_ts_df = pd.DataFrame(index=curves_bus.index, columns=["p_set"])

    # Insert data for pf load timeseries table
    load_ts_df.p_set = curves_bus.values.tolist()

    # Create Dataframe to store time series individually
    curves_individual_interim = (
        curves_da.drop(["bus_id", "geom", "wz"], axis=1).fillna(0)
    ).set_index("id")
    curves_individual = curves_da[["id", "bus_id"]]
    curves_individual["p_set"] = curves_individual_interim.values.tolist()
    curves_individual["scn_name"] = scenario
    curves_individual = curves_individual.merge(
        curves_da[["wz", "id"]], left_on="id", right_on="id"
    )
    curves_individual = curves_individual.rename(
        columns={"id": "site_id"}
    ).set_index(["site_id", "scn_name"])

    return load_ts_df, curves_individual


def insert_sites_ind_load():

    """Inserts electrical industry loads assigned to osm landuse areas to the
    database.

    Returns
    -------
    None.

    """

    targets = egon.data.config.datasets()["electrical_load_curves_industry"][
        "targets"
    ]

    for scenario in ["eGon2021", "eGon2035", "eGon100RE"]:

        # Delete existing data from database
        db.execute_sql(
            f"""
            DELETE FROM
            {targets['sites_load']['schema']}.{targets['sites_load']['table']}
            WHERE scn_name = '{scenario}'
            """
        )

        # Delete existing data from database
        db.execute_sql(
            f"""
            DELETE FROM
            {targets['sites_load_individual']['schema']}.
            {targets['sites_load_individual']['table']}
            WHERE scn_name = '{scenario}'
            """
        )

        # Calculate industrial load curves per bus
        data, curves_individual = calc_load_curves_ind_sites(scenario)
        data.index = data.index.rename(["bus", "wz"])
        data["scn_name"] = scenario

        data.set_index(["scn_name"], inplace=True, append=True)

        # Insert into database
        data.to_sql(
            targets["sites_load"]["table"],
            schema=targets["sites_load"]["schema"],
            con=db.engine(),
            if_exists="append",
        )

        curves_individual["peak_load"] = np.array(
            curves_individual["p_set"].values.tolist()
        ).max(axis=1)
        curves_individual["demand"] = np.array(
            curves_individual["p_set"].values.tolist()
        ).sum(axis=1)
        curves_individual = identify_voltage_level(curves_individual)

        curves_individual.to_sql(
            targets["sites_load_individual"]["table"],
            schema=targets["sites_load_individual"]["schema"],
            con=db.engine(),
            if_exists="append",
        )


1		"""The central module containing all code dealing with processing
2		timeseries data using demandregio
3
4		"""
5
6		from sqlalchemy.ext.declarative import declarative_base
7		import geopandas as gpd
8		import numpy as np
9		import pandas as pd
10
11		from egon.data import db
12		from egon.data.datasets.electricity_demand.temporal import calc_load_curve
13		import egon.data.config
14
15		Base = declarative_base()
16
17
18		def identify_voltage_level(df):
19
20		"""Identify the voltage_level of a grid component based on its peak load
21		and defined thresholds.
22
23
24		Parameters
25		----------
26		df : pandas.DataFrame
27		Data frame containing information about peak loads
28
29
30		Returns
31		-------
32		pandas.DataFrame
33		Data frame with an additional column with voltage level
34
35		"""
36
37		df["voltage_level"] = np.nan
38
39		# Identify voltage_level for every demand area taking thresholds into
40		# account which were defined in the eGon project
41		df.loc[df["peak_load"] <= 0.1, "voltage_level"] = 7
42		df.loc[df["peak_load"] > 0.1, "voltage_level"] = 6
43		df.loc[df["peak_load"] > 0.2, "voltage_level"] = 5
44		df.loc[df["peak_load"] > 5.5, "voltage_level"] = 4
45		df.loc[df["peak_load"] > 20, "voltage_level"] = 3
46		df.loc[df["peak_load"] > 120, "voltage_level"] = 1
47
48		return df
49
50
51		def identify_bus(load_curves, demand_area):
52		"""Identify the grid connection point for a consumer by determining its
53		grid level based on the time series' peak load and the spatial
54		intersection to mv grid districts or ehv voronoi cells.
55
56
57		Parameters
58		----------
59		load_curves : pandas.DataFrame
60		Demand timeseries per demand area (e.g. osm landuse area, industrial
61		site)
62
63		demand_area: pandas.DataFrame
64		Dataframe with id and geometry of areas where an industrial demand
65		is assigned to, such as osm landuse areas or industrial sites.
66
67		Returns
68		-------
69		pandas.DataFrame
70		Aggregated industrial demand timeseries per bus
71
72		"""
73
74		sources = egon.data.config.datasets()["electrical_load_curves_industry"][
75		"sources"
76		]
77
78		# Select mv griddistrict
79		griddistrict = db.select_geodataframe(
80		f"""SELECT bus_id, geom FROM
81		{sources['egon_mv_grid_district']['schema']}.
82		{sources['egon_mv_grid_district']['table']}""",
83		geom_col="geom",
84		epsg=3035,
85		)
86
87		# Initialize dataframe to identify peak load per demand area (e.g. osm
88		# landuse area or industrial site)
89		peak = pd.DataFrame(columns=["id", "peak_load"])
90		peak["id"] = load_curves.max(axis=0).index
91		peak["peak_load"] = load_curves.max(axis=0).values
92
93		peak = identify_voltage_level(peak)
94
95		# Assign bus_id to demand area by merging landuse and peak df
96		peak = pd.merge(demand_area, peak, right_on="id", left_index=True)
97
98		# Identify all demand areas connected to HVMV buses
99		peak_hv = peak[peak["voltage_level"] > 1]
100
101		# Perform a spatial join between the centroid of the demand area and mv
102		# grid districts to identify grid connection point
103		peak_hv["centroid"] = peak_hv["geom"].centroid
104		peak_hv = peak_hv.set_geometry("centroid")
105		peak_hv_c = gpd.sjoin(peak_hv, griddistrict, how="inner", op="intersects")
106
107		# Perform a spatial join between the polygon of the demand area and mv
108		# grid districts to ensure every area got assign to a bus
109		peak_hv_p = peak_hv[~peak_hv.isin(peak_hv_c)].dropna().set_geometry("geom")
110		peak_hv_p = gpd.sjoin(
111		peak_hv_p, griddistrict, how="inner", op="intersects"
112		).drop_duplicates(subset=["id"])
113
114		# Bring both dataframes together
115		peak_bus = peak_hv_c.append(peak_hv_p, ignore_index=True)
116
117		# Select ehv voronoi
118		ehv_voronoi = db.select_geodataframe(
119		f"""SELECT bus_id, geom FROM
120		{sources['egon_mv_grid_district']['schema']}.
121		{sources['egon_mv_grid_district']['table']}""",
122		geom_col="geom",
123		epsg=3035,
124		)
125
126		# Identify all demand areas connected to EHV buses
127		peak_ehv = peak[peak["voltage_level"] == 1]
128
129		# Perform a spatial join between the centroid of the demand area and ehv
130		# voronoi to identify grid connection point
131		peak_ehv["centroid"] = peak_ehv["geom"].centroid
132		peak_ehv = peak_ehv.set_geometry("centroid")
133		peak_ehv = gpd.sjoin(peak_ehv, ehv_voronoi, how="inner", op="intersects")
134
135		# Bring both dataframes together
136		peak_bus = peak_bus.append(peak_ehv, ignore_index=True)
137
138		# Combine dataframes to bring loadcurves and bus id together
139		curves_da = pd.merge(
140		load_curves.T,
141		peak_bus[["bus_id", "id", "geom"]],
142		left_index=True,
143		right_on="id",
144		)
145
146		return curves_da
147
148
149		def calc_load_curves_ind_osm(scenario):
150		"""Temporal disaggregate electrical demand per osm industrial landuse
151		area.
152
153
154		Parameters
155		----------
156		scenario : str
157		Scenario name.
158
159		Returns
160		-------
161		pandas.DataFrame
162		Demand timeseries of industry allocated to osm landuse areas and
163		aggregated per substation id
164
165		"""
166
167		sources = egon.data.config.datasets()["electrical_load_curves_industry"][
168		"sources"
169		]
170
171		# Select demands per industrial branch and osm landuse area
172		demands_osm_area = db.select_dataframe(
173		f"""SELECT osm_id, wz, demand
174		FROM {sources['osm']['schema']}.
175		{sources['osm']['table']}
176		WHERE scenario = '{scenario}'
177		AND demand > 0
178		"""
179		).set_index(["osm_id", "wz"])
180
181		# Select industrial landuse polygons as demand area
182		demand_area = db.select_geodataframe(
183		f"""SELECT id, geom FROM
184		{sources['osm_landuse']['schema']}.
185		{sources['osm_landuse']['table']}
186		WHERE sector = 3 """,
187		index_col="id",
188		geom_col="geom",
189		epsg=3035,
190		)
191
192		# Calculate shares of industrial branches per osm area
193		osm_share_wz = demands_osm_area.groupby("osm_id").apply(
194		lambda grp: grp / grp.sum()
195		)
196
197		osm_share_wz.reset_index(inplace=True)
198
199		share_wz_transpose = pd.DataFrame(
200		index=osm_share_wz.osm_id.unique(), columns=osm_share_wz.wz.unique()
201		)
202		share_wz_transpose.index.rename("osm_id", inplace=True)
203
204		for wz in share_wz_transpose.columns:
205		share_wz_transpose[wz] = (
206		osm_share_wz[osm_share_wz.wz == wz].set_index("osm_id").demand
207		)
208
209		# Rename columns to bring it in line with demandregio data
210		share_wz_transpose.rename(columns={1718: 17}, inplace=True)
211
212		# Calculate industrial annual demand per osm area
213		annual_demand_osm = demands_osm_area.groupby("osm_id").demand.sum()
214
215		# Return electrical load curves per osm industrial landuse area
216		load_curves = calc_load_curve(share_wz_transpose, annual_demand_osm)
217
218		curves_da = identify_bus(load_curves, demand_area)
219
220		# Group all load curves per bus
221		curves_bus = (
222		curves_da.drop(["id"], axis=1).fillna(0).groupby("bus_id").sum()
223		)
224
225		# Initalize pandas.DataFrame for export to database
226		load_ts_df = pd.DataFrame(index=curves_bus.index, columns=["p_set"])
227
228		# Insert time series data to df as an array
229		load_ts_df.p_set = curves_bus.values.tolist()
230
231		# Create Dataframe to store time series individually
232		curves_individual_interim = (
233		curves_da.drop(["bus_id", "geom"], axis=1).fillna(0)
234		).set_index("id")
235		curves_individual = curves_da[["id", "bus_id"]]
236		curves_individual["p_set"] = curves_individual_interim.values.tolist()
237		curves_individual["scn_name"] = scenario
238		curves_individual = curves_individual.rename(
239		columns={"id": "osm_id"}
240		).set_index(["osm_id", "scn_name"])
241
242		return load_ts_df, curves_individual
243
244
245	View Code Duplication	def insert_osm_ind_load():
		0 ignored issues – show Duplication introduced 2021-07-28 12:03 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
246		"""Inserts electrical industry loads assigned to osm landuse areas to the
247		database.
248
249		Returns
250		-------
251		None.
252
253		"""
254
255		targets = egon.data.config.datasets()["electrical_load_curves_industry"][
256		"targets"
257		]
258
259		for scenario in ["eGon2021", "eGon2035", "eGon100RE"]:
260
261		# Delete existing data from database
262		db.execute_sql(
263		f"""
264		DELETE FROM
265		{targets['osm_load']['schema']}.{targets['osm_load']['table']}
266		WHERE scn_name = '{scenario}'
267		"""
268		)
269
270		db.execute_sql(
271		f"""
272		DELETE FROM
273		{targets['osm_load_individual']['schema']}.
274		{targets['osm_load_individual']['table']}
275		WHERE scn_name = '{scenario}'
276		"""
277		)
278
279		# Calculate cts load curves per mv substation (hvmv bus)
280		data, curves_individual = calc_load_curves_ind_osm(scenario)
281		data.index = data.index.rename("bus")
282		data["scn_name"] = scenario
283
284		data.set_index(["scn_name"], inplace=True, append=True)
285
286		# Insert into database
287		data.to_sql(
288		targets["osm_load"]["table"],
289		schema=targets["osm_load"]["schema"],
290		con=db.engine(),
291		if_exists="append",
292		)
293
294		curves_individual["peak_load"] = np.array(
295		curves_individual["p_set"].values.tolist()
296		).max(axis=1)
297		curves_individual["demand"] = np.array(
298		curves_individual["p_set"].values.tolist()
299		).sum(axis=1)
300		curves_individual = identify_voltage_level(curves_individual)
301
302		curves_individual.to_sql(
303		targets["osm_load_individual"]["table"],
304		schema=targets["osm_load_individual"]["schema"],
305		con=db.engine(),
306		if_exists="append",
307		)
308
309
310		def calc_load_curves_ind_sites(scenario):
311		"""Temporal disaggregation of load curves per industrial site and
312		industrial subsector.
313
314
315		Parameters
316		----------
317		scenario : str
318		Scenario name.
319
320		Returns
321		-------
322		pandas.DataFrame
323		Demand timeseries of industry allocated to industrial sites and
324		aggregated per substation id and industrial subsector
325
326		"""
327		sources = egon.data.config.datasets()["electrical_load_curves_industry"][
328		"sources"
329		]
330
331		# Select demands per industrial site including the subsector information
332		demands_ind_sites = db.select_dataframe(
333		f"""SELECT industrial_sites_id, wz, demand
334		FROM {sources['sites']['schema']}.
335		{sources['sites']['table']}
336		WHERE scenario = '{scenario}'
337		AND demand > 0
338		"""
339		).set_index(["industrial_sites_id"])
340
341		# Select industrial sites as demand_areas from database
342
343		demand_area = db.select_geodataframe(
344		f"""SELECT id, geom FROM
345		{sources['sites_geom']['schema']}.
346		{sources['sites_geom']['table']}""",
347		index_col="id",
348		geom_col="geom",
349		epsg=3035,
350		)
351
352		# Replace entries to bring it in line with demandregio's subsector
353		# definitions
354		demands_ind_sites.replace(1718, 17, inplace=True)
355		share_wz_sites = demands_ind_sites.copy()
356
357		# Create additional df on wz_share per industrial site, which is always
358		# set to one as the industrial demand per site is subsector specific
359
360		share_wz_sites.demand = 1
361		share_wz_sites.reset_index(inplace=True)
362
363		share_transpose = pd.DataFrame(
364		index=share_wz_sites.industrial_sites_id.unique(),
365		columns=share_wz_sites.wz.unique(),
366		)
367		share_transpose.index.rename("industrial_sites_id", inplace=True)
368		for wz in share_transpose.columns:
369		share_transpose[wz] = (
370		share_wz_sites[share_wz_sites.wz == wz]
371		.set_index("industrial_sites_id")
372		.demand
373		)
374
375		load_curves = calc_load_curve(share_transpose, demands_ind_sites["demand"])
376
377		curves_da = identify_bus(load_curves, demand_area)
378
379		curves_da = pd.merge(
380		curves_da, demands_ind_sites.wz, left_on="id", right_index=True
381		)
382
383		# Group all load curves per bus and wz
384		curves_bus = (
385		curves_da.fillna(0)
386		.groupby(["bus_id", "wz"])
387		.sum()
388		.drop(["id"], axis=1)
389		)
390
391		# Initalize pandas.DataFrame for pf table load timeseries
392		load_ts_df = pd.DataFrame(index=curves_bus.index, columns=["p_set"])
393
394		# Insert data for pf load timeseries table
395		load_ts_df.p_set = curves_bus.values.tolist()
396
397		# Create Dataframe to store time series individually
398		curves_individual_interim = (
399		curves_da.drop(["bus_id", "geom", "wz"], axis=1).fillna(0)
400		).set_index("id")
401		curves_individual = curves_da[["id", "bus_id"]]
402		curves_individual["p_set"] = curves_individual_interim.values.tolist()
403		curves_individual["scn_name"] = scenario
404		curves_individual = curves_individual.merge(
405		curves_da[["wz", "id"]], left_on="id", right_on="id"
406		)
407		curves_individual = curves_individual.rename(
408		columns={"id": "site_id"}
409		).set_index(["site_id", "scn_name"])
410
411		return load_ts_df, curves_individual
412
413
414	View Code Duplication	def insert_sites_ind_load():
		0 ignored issues – show Duplication introduced 2021-07-28 12:03 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
415		"""Inserts electrical industry loads assigned to osm landuse areas to the
416		database.
417
418		Returns
419		-------
420		None.
421
422		"""
423
424		targets = egon.data.config.datasets()["electrical_load_curves_industry"][
425		"targets"
426		]
427
428		for scenario in ["eGon2021", "eGon2035", "eGon100RE"]:
429
430		# Delete existing data from database
431		db.execute_sql(
432		f"""
433		DELETE FROM
434		{targets['sites_load']['schema']}.{targets['sites_load']['table']}
435		WHERE scn_name = '{scenario}'
436		"""
437		)
438
439		# Delete existing data from database
440		db.execute_sql(
441		f"""
442		DELETE FROM
443		{targets['sites_load_individual']['schema']}.
444		{targets['sites_load_individual']['table']}
445		WHERE scn_name = '{scenario}'
446		"""
447		)
448
449		# Calculate industrial load curves per bus
450		data, curves_individual = calc_load_curves_ind_sites(scenario)
451		data.index = data.index.rename(["bus", "wz"])
452		data["scn_name"] = scenario
453
454		data.set_index(["scn_name"], inplace=True, append=True)
455
456		# Insert into database
457		data.to_sql(
458		targets["sites_load"]["table"],
459		schema=targets["sites_load"]["schema"],
460		con=db.engine(),
461		if_exists="append",
462		)
463
464		curves_individual["peak_load"] = np.array(
465		curves_individual["p_set"].values.tolist()
466		).max(axis=1)
467		curves_individual["demand"] = np.array(
468		curves_individual["p_set"].values.tolist()
469		).sum(axis=1)
470		curves_individual = identify_voltage_level(curves_individual)
471
472		curves_individual.to_sql(
473		targets["sites_load_individual"]["table"],
474		schema=targets["sites_load_individual"]["schema"],
475		con=db.engine(),
476		if_exists="append",
477		)
478

openego / eGon-data

Pull Request — dev (#1052)

calc_load_curves_ind_sites() B

Complexity

Size

Duplication

Importance

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like