data.datasets.industry.temporal.calc_load_curves_ind_sites() - Code Metrics - Inspection of "Features/#1014 load areas" - openego/eGon-data - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — dev (#1020)

unknown

created 2022-11-09 08:00 UTC

calc_load_curves_ind_sites() B

↳ Parent: data.datasets.industry.temporal

Complexity

Conditions

Size

Total Lines	95
Code Lines	43

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	43
dl	0
loc	95
rs	8.8478
c	0
b	0
f	0
cc	2
nop	1

How to fix Long Method

"""The central module containing all code dealing with processing
timeseries data using demandregio

"""

import egon.data.config
import geopandas as gpd
import pandas as pd
import numpy as np
from egon.data import db
from egon.data.datasets.electricity_demand.temporal import calc_load_curve
from sqlalchemy import ARRAY, Column, Float, Integer, String
from sqlalchemy.ext.declarative import declarative_base

Base = declarative_base()


def identify_voltage_level(df):

    """Identify the voltage_level of a grid component based on its peak load and
    defined thresholds.


    Parameters
    ----------
    df : pandas.DataFrame
        Data frame containing information about peak loads


    Returns
    -------
    pandas.DataFrame
        Data frame with an additional column with voltage level

    """

    df['voltage_level']= np.nan

    # Identify voltage_level for every demand area taking thresholds into account which were defined in the eGon project
    df.loc[df["peak_load"] <= 0.1, "voltage_level"] = 7
    df.loc[df["peak_load"] > 0.1, "voltage_level"] = 6
    df.loc[df["peak_load"] > 0.2, "voltage_level"] = 5
    df.loc[df["peak_load"] > 5.5, "voltage_level"] = 4
    df.loc[df["peak_load"] > 20, "voltage_level"] = 3
    df.loc[df["peak_load"] > 120, "voltage_level"] = 1

    return df


def identify_bus(load_curves, demand_area):
    """Identify the grid connection point for a consumer by determining its grid level
    based on the time series' peak load and the spatial intersection to mv
    grid districts or ehv voronoi cells.


    Parameters
    ----------
    load_curves : pandas.DataFrame
        Demand timeseries per demand area (e.g. osm landuse area, industrial site)

    demand_area: pandas.DataFrame
        Dataframe with id and geometry of areas where an industrial demand
        is assigned to, such as osm landuse areas or industrial sites.

    Returns
    -------
    pandas.DataFrame
        Aggregated industrial demand timeseries per bus

    """

    sources = egon.data.config.datasets()["electrical_load_curves_industry"][
        "sources"
    ]

    # Select mv griddistrict
    griddistrict = db.select_geodataframe(
        f"""SELECT bus_id, geom FROM
                {sources['egon_mv_grid_district']['schema']}.
                {sources['egon_mv_grid_district']['table']}""",
        geom_col="geom",
        epsg=3035,
    )

    # Initialize dataframe to identify peak load per demand area (e.g. osm landuse area or industrial site)
    peak = pd.DataFrame(columns=["id", "peak_load"])
    peak["id"] = load_curves.max(axis=0).index
    peak["peak_load"] = load_curves.max(axis=0).values

    peak = identify_voltage_level(peak)

    # Assign bus_id to demand area by merging landuse and peak df
    peak = pd.merge(demand_area, peak, right_on="id", left_index=True)

    # Identify all demand areas connected to HVMV buses
    peak_hv = peak[peak["voltage_level"] > 1]

    # Perform a spatial join between the centroid of the demand area and mv grid districts to identify grid connection point
    peak_hv["centroid"] = peak_hv["geom"].centroid
    peak_hv = peak_hv.set_geometry("centroid")
    peak_hv_c = gpd.sjoin(peak_hv, griddistrict, how="inner", op="intersects")

    # Perform a spatial join between the polygon of the demand area  and mv grid districts to ensure every area got assign to a bus
    peak_hv_p = peak_hv[~peak_hv.isin(peak_hv_c)].dropna().set_geometry("geom")
    peak_hv_p = gpd.sjoin(
        peak_hv_p, griddistrict, how="inner", op="intersects"
    ).drop_duplicates(subset=["id"])

    # Bring both dataframes together
    peak_bus = peak_hv_c.append(peak_hv_p, ignore_index=True)

    # Combine dataframes to bring loadcurves and bus id together
    curves_da = pd.merge(
        load_curves.T,
        peak_bus[["bus_id", "id", "geom"]],
        left_index=True,
        right_on="id",
    )

    return curves_da


def calc_load_curves_ind_osm(scenario):
    """Temporal disaggregate electrical demand per osm industrial landuse area.


    Parameters
    ----------
    scenario : str
        Scenario name.

    Returns
    -------
    pandas.DataFrame
        Demand timeseries of industry allocated to osm landuse areas and aggregated
        per substation id

    """

    sources = egon.data.config.datasets()["electrical_load_curves_industry"][
        "sources"
    ]

    # Select demands per industrial branch and osm landuse area
    demands_osm_area = db.select_dataframe(
        f"""SELECT osm_id, wz, demand
            FROM {sources['osm']['schema']}.
            {sources['osm']['table']}
            WHERE scenario = '{scenario}'
            AND demand > 0
            """
    ).set_index(["osm_id", "wz"])

    # Select industrial landuse polygons as demand area
    demand_area = db.select_geodataframe(
        f"""SELECT id, geom FROM
                {sources['osm_landuse']['schema']}.
                {sources['osm_landuse']['table']}
                WHERE sector = 3 """,
        index_col="id",
        geom_col="geom",
        epsg=3035,
    )

    # Calculate shares of industrial branches per osm area
    osm_share_wz = demands_osm_area.groupby("osm_id").apply(
        lambda grp: grp / grp.sum()
    )

    osm_share_wz.reset_index(inplace=True)

    share_wz_transpose = pd.DataFrame(
        index=osm_share_wz.osm_id.unique(), columns=osm_share_wz.wz.unique()
    )
    share_wz_transpose.index.rename("osm_id", inplace=True)

    for wz in share_wz_transpose.columns:
        share_wz_transpose[wz] = (
            osm_share_wz[osm_share_wz.wz == wz].set_index("osm_id").demand
        )

    # Rename columns to bring it in line with demandregio data
    share_wz_transpose.rename(columns={1718: 17}, inplace=True)

    # Calculate industrial annual demand per osm area
    annual_demand_osm = demands_osm_area.groupby("osm_id").demand.sum()

    # Return electrical load curves per osm industrial landuse area
    load_curves = calc_load_curve(share_wz_transpose, annual_demand_osm)

    curves_da = identify_bus(load_curves, demand_area)

    # Group all load curves per bus
    curves_bus = (
        curves_da.drop(["id"], axis=1).fillna(0).groupby("bus_id").sum()
    )

    # Initalize pandas.DataFrame for export to database
    load_ts_df = pd.DataFrame(index=curves_bus.index, columns=["p_set"])

    # Insert time series data to df as an array
    load_ts_df.p_set = curves_bus.values.tolist()

    # Create Dataframe to store time series individually
    curves_individual_interim = (
        curves_da.drop(["bus_id", "geom"], axis=1).fillna(0)
    ).set_index("id")
    curves_individual = curves_da[["id", "bus_id"]]
    curves_individual["p_set"] = curves_individual_interim.values.tolist()
    curves_individual["scn_name"]= scenario
    curves_individual = curves_individual.rename(columns={"id": "osm_id"}).set_index(['osm_id', 'scn_name'])

    return load_ts_df, curves_individual


def insert_osm_ind_load():

    """Inserts electrical industry loads assigned to osm landuse areas to the database

    Returns
    -------
    None.

    """

    targets = egon.data.config.datasets()["electrical_load_curves_industry"][
        "targets"
    ]

    for scenario in ["eGon2035", "eGon100RE"]:

        # Delete existing data from database
        db.execute_sql(
            f"""
            DELETE FROM
            {targets['osm_load']['schema']}.{targets['osm_load']['table']}
            WHERE scn_name = '{scenario}'
            """
        )

        db.execute_sql(
            f"""
            DELETE FROM
            {targets['osm_load_individual']['schema']}.{targets['osm_load_individual']['table']}
            WHERE scn_name = '{scenario}'
            """
        )

        # Calculate cts load curves per mv substation (hvmv bus)
        data, curves_individual = calc_load_curves_ind_osm(scenario)
        data.index = data.index.rename("bus")
        data["scn_name"] = scenario

        data.set_index(["scn_name"], inplace=True, append=True)

        # Insert into database
        data.to_sql(
            targets["osm_load"]["table"],
            schema=targets["osm_load"]["schema"],
            con=db.engine(),
            if_exists="append",
        )

        curves_individual['peak_load'] = np.array(curves_individual['p_set'].values.tolist()).max(axis=1)
        curves_individual['demand'] = np.array(curves_individual['p_set'].values.tolist()).sum(axis=1)
        curves_individual = identify_voltage_level(curves_individual)

        curves_individual.to_sql(
            targets["osm_load_individual"]["table"],
            schema=targets["osm_load_individual"]["schema"],
            con=db.engine(),
            if_exists="append",
        )




def calc_load_curves_ind_sites(scenario):
    """Temporal disaggregation of load curves per industrial site and industrial subsector.


    Parameters
    ----------
    scenario : str
        Scenario name.

    Returns
    -------
    pandas.DataFrame
        Demand timeseries of industry allocated to industrial sites and aggregated
        per substation id and industrial subsector

    """
    sources = egon.data.config.datasets()["electrical_load_curves_industry"][
        "sources"
    ]

    # Select demands per industrial site including the subsector information
    demands_ind_sites = db.select_dataframe(
        f"""SELECT industrial_sites_id, wz, demand
            FROM {sources['sites']['schema']}.
            {sources['sites']['table']}
            WHERE scenario = '{scenario}'
            AND demand > 0
            """
    ).set_index(["industrial_sites_id"])

    # Select industrial sites as demand_areas from database

    demand_area = db.select_geodataframe(
        f"""SELECT id, geom FROM
                {sources['sites_geom']['schema']}.
                {sources['sites_geom']['table']}""",
        index_col="id",
        geom_col="geom",
        epsg=3035,
    )

    # Replace entries to bring it in line with demandregio's subsector definitions
    demands_ind_sites.replace(1718, 17, inplace=True)
    share_wz_sites = demands_ind_sites.copy()

    # Create additional df on wz_share per industrial site, which is always set to one
    # as the industrial demand per site is subsector specific

    share_wz_sites.demand = 1
    share_wz_sites.reset_index(inplace=True)

    share_transpose = pd.DataFrame(
        index=share_wz_sites.industrial_sites_id.unique(),
        columns=share_wz_sites.wz.unique(),
    )
    share_transpose.index.rename("industrial_sites_id", inplace=True)
    for wz in share_transpose.columns:
        share_transpose[wz] = (
            share_wz_sites[share_wz_sites.wz == wz]
            .set_index("industrial_sites_id")
            .demand
        )

    load_curves = calc_load_curve(share_transpose, demands_ind_sites["demand"])

    curves_da = identify_bus(load_curves, demand_area)

    curves_da = pd.merge(
        curves_da, demands_ind_sites.wz, left_on="id", right_index=True
    )

    # Group all load curves per bus and wz
    curves_bus = (
        curves_da.fillna(0)
        .groupby(["bus_id", "wz"])
        .sum()
        .drop(["id"], axis=1)
    )

    # Initalize pandas.DataFrame for pf table load timeseries
    load_ts_df = pd.DataFrame(index=curves_bus.index, columns=["p_set"])

    # Insert data for pf load timeseries table
    load_ts_df.p_set = curves_bus.values.tolist()

    # Create Dataframe to store time series individually
    curves_individual_interim = (
        curves_da.drop(["bus_id", "geom", "wz"], axis=1).fillna(0)
    ).set_index("id")
    curves_individual = curves_da[["id", "bus_id"]]
    curves_individual["p_set"] = curves_individual_interim.values.tolist()
    curves_individual["scn_name"]= scenario
    curves_individual = curves_individual.rename(columns={"id": "site_id"}).set_index(['site_id', 'scn_name'])

    return load_ts_df, curves_individual


def insert_sites_ind_load():

    """Inserts electrical industry loads assigned to osm landuse areas to the database

    Returns
    -------
    None.

    """

    targets = egon.data.config.datasets()["electrical_load_curves_industry"][
        "targets"
    ]

    for scenario in ["eGon2035", "eGon100RE"]:

        # Delete existing data from database
        db.execute_sql(
            f"""
            DELETE FROM
            {targets['sites_load']['schema']}.{targets['sites_load']['table']}
            WHERE scn_name = '{scenario}'
            """
        )

        # Delete existing data from database
        db.execute_sql(
            f"""
            DELETE FROM
            {targets['sites_load_individual']['schema']}.
            {targets['sites_load_individual']['table']}
            WHERE scn_name = '{scenario}'
            """
        )

        # Calculate industrial load curves per bus
        data, curves_individual = calc_load_curves_ind_sites(scenario)
        data.index = data.index.rename(["bus", "wz"])
        data["scn_name"] = scenario

        data.set_index(["scn_name"], inplace=True, append=True)

        # Insert into database
        data.to_sql(
            targets["sites_load"]["table"],
            schema=targets["sites_load"]["schema"],
            con=db.engine(),
            if_exists="append",
        )

        curves_individual['peak_load'] = np.array(curves_individual['p_set'].values.tolist()).max(axis=1)
        curves_individual['demand'] = np.array(curves_individual['p_set'].values.tolist()).sum(axis=1)
        curves_individual = identify_voltage_level(curves_individual)

        curves_individual.to_sql(
            targets["sites_load_individual"]["table"],
            schema=targets["sites_load_individual"]["schema"],
            con=db.engine(),
            if_exists="append",
        )


1		"""The central module containing all code dealing with processing
2		timeseries data using demandregio
3
4		"""
5
6		import egon.data.config
7		import geopandas as gpd
8		import pandas as pd
9		import numpy as np
10		from egon.data import db
11		from egon.data.datasets.electricity_demand.temporal import calc_load_curve
12		from sqlalchemy import ARRAY, Column, Float, Integer, String
13		from sqlalchemy.ext.declarative import declarative_base
14
15		Base = declarative_base()
16
17
18		def identify_voltage_level(df):
19
20		"""Identify the voltage_level of a grid component based on its peak load and
21		defined thresholds.
22
23
24		Parameters
25		----------
26		df : pandas.DataFrame
27		Data frame containing information about peak loads
28
29
30		Returns
31		-------
32		pandas.DataFrame
33		Data frame with an additional column with voltage level
34
35		"""
36
37		df['voltage_level']= np.nan
38
39		# Identify voltage_level for every demand area taking thresholds into account which were defined in the eGon project
40		df.loc[df["peak_load"] <= 0.1, "voltage_level"] = 7
41		df.loc[df["peak_load"] > 0.1, "voltage_level"] = 6
42		df.loc[df["peak_load"] > 0.2, "voltage_level"] = 5
43		df.loc[df["peak_load"] > 5.5, "voltage_level"] = 4
44		df.loc[df["peak_load"] > 20, "voltage_level"] = 3
45		df.loc[df["peak_load"] > 120, "voltage_level"] = 1
46
47		return df
48
49
50		def identify_bus(load_curves, demand_area):
51		"""Identify the grid connection point for a consumer by determining its grid level
52		based on the time series' peak load and the spatial intersection to mv
53		grid districts or ehv voronoi cells.
54
55
56		Parameters
57		----------
58		load_curves : pandas.DataFrame
59		Demand timeseries per demand area (e.g. osm landuse area, industrial site)
60
61		demand_area: pandas.DataFrame
62		Dataframe with id and geometry of areas where an industrial demand
63		is assigned to, such as osm landuse areas or industrial sites.
64
65		Returns
66		-------
67		pandas.DataFrame
68		Aggregated industrial demand timeseries per bus
69
70		"""
71
72		sources = egon.data.config.datasets()["electrical_load_curves_industry"][
73		"sources"
74		]
75
76		# Select mv griddistrict
77		griddistrict = db.select_geodataframe(
78		f"""SELECT bus_id, geom FROM
79		{sources['egon_mv_grid_district']['schema']}.
80		{sources['egon_mv_grid_district']['table']}""",
81		geom_col="geom",
82		epsg=3035,
83		)
84
85		# Initialize dataframe to identify peak load per demand area (e.g. osm landuse area or industrial site)
86		peak = pd.DataFrame(columns=["id", "peak_load"])
87		peak["id"] = load_curves.max(axis=0).index
88		peak["peak_load"] = load_curves.max(axis=0).values
89
90		peak = identify_voltage_level(peak)
91
92		# Assign bus_id to demand area by merging landuse and peak df
93		peak = pd.merge(demand_area, peak, right_on="id", left_index=True)
94
95		# Identify all demand areas connected to HVMV buses
96		peak_hv = peak[peak["voltage_level"] > 1]
97
98		# Perform a spatial join between the centroid of the demand area and mv grid districts to identify grid connection point
99		peak_hv["centroid"] = peak_hv["geom"].centroid
100		peak_hv = peak_hv.set_geometry("centroid")
101		peak_hv_c = gpd.sjoin(peak_hv, griddistrict, how="inner", op="intersects")
102
103		# Perform a spatial join between the polygon of the demand area and mv grid districts to ensure every area got assign to a bus
104		peak_hv_p = peak_hv[~peak_hv.isin(peak_hv_c)].dropna().set_geometry("geom")
105		peak_hv_p = gpd.sjoin(
106		peak_hv_p, griddistrict, how="inner", op="intersects"
107		).drop_duplicates(subset=["id"])
108
109		# Bring both dataframes together
110		peak_bus = peak_hv_c.append(peak_hv_p, ignore_index=True)
111
112		# Combine dataframes to bring loadcurves and bus id together
113		curves_da = pd.merge(
114		load_curves.T,
115		peak_bus[["bus_id", "id", "geom"]],
116		left_index=True,
117		right_on="id",
118		)
119
120		return curves_da
121
122
123		def calc_load_curves_ind_osm(scenario):
124		"""Temporal disaggregate electrical demand per osm industrial landuse area.
125
126
127		Parameters
128		----------
129		scenario : str
130		Scenario name.
131
132		Returns
133		-------
134		pandas.DataFrame
135		Demand timeseries of industry allocated to osm landuse areas and aggregated
136		per substation id
137
138		"""
139
140		sources = egon.data.config.datasets()["electrical_load_curves_industry"][
141		"sources"
142		]
143
144		# Select demands per industrial branch and osm landuse area
145		demands_osm_area = db.select_dataframe(
146		f"""SELECT osm_id, wz, demand
147		FROM {sources['osm']['schema']}.
148		{sources['osm']['table']}
149		WHERE scenario = '{scenario}'
150		AND demand > 0
151		"""
152		).set_index(["osm_id", "wz"])
153
154		# Select industrial landuse polygons as demand area
155		demand_area = db.select_geodataframe(
156		f"""SELECT id, geom FROM
157		{sources['osm_landuse']['schema']}.
158		{sources['osm_landuse']['table']}
159		WHERE sector = 3 """,
160		index_col="id",
161		geom_col="geom",
162		epsg=3035,
163		)
164
165		# Calculate shares of industrial branches per osm area
166		osm_share_wz = demands_osm_area.groupby("osm_id").apply(
167		lambda grp: grp / grp.sum()
168		)
169
170		osm_share_wz.reset_index(inplace=True)
171
172		share_wz_transpose = pd.DataFrame(
173		index=osm_share_wz.osm_id.unique(), columns=osm_share_wz.wz.unique()
174		)
175		share_wz_transpose.index.rename("osm_id", inplace=True)
176
177		for wz in share_wz_transpose.columns:
178		share_wz_transpose[wz] = (
179		osm_share_wz[osm_share_wz.wz == wz].set_index("osm_id").demand
180		)
181
182		# Rename columns to bring it in line with demandregio data
183		share_wz_transpose.rename(columns={1718: 17}, inplace=True)
184
185		# Calculate industrial annual demand per osm area
186		annual_demand_osm = demands_osm_area.groupby("osm_id").demand.sum()
187
188		# Return electrical load curves per osm industrial landuse area
189		load_curves = calc_load_curve(share_wz_transpose, annual_demand_osm)
190
191		curves_da = identify_bus(load_curves, demand_area)
192
193		# Group all load curves per bus
194		curves_bus = (
195		curves_da.drop(["id"], axis=1).fillna(0).groupby("bus_id").sum()
196		)
197
198		# Initalize pandas.DataFrame for export to database
199		load_ts_df = pd.DataFrame(index=curves_bus.index, columns=["p_set"])
200
201		# Insert time series data to df as an array
202		load_ts_df.p_set = curves_bus.values.tolist()
203
204		# Create Dataframe to store time series individually
205		curves_individual_interim = (
206		curves_da.drop(["bus_id", "geom"], axis=1).fillna(0)
207		).set_index("id")
208		curves_individual = curves_da[["id", "bus_id"]]
209		curves_individual["p_set"] = curves_individual_interim.values.tolist()
210		curves_individual["scn_name"]= scenario
211		curves_individual = curves_individual.rename(columns={"id": "osm_id"}).set_index(['osm_id', 'scn_name'])
212
213		return load_ts_df, curves_individual
214
215
216	View Code Duplication	def insert_osm_ind_load():
		0 ignored issues – show Duplication introduced 2021-07-28 12:03 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
217		"""Inserts electrical industry loads assigned to osm landuse areas to the database
218
219		Returns
220		-------
221		None.
222
223		"""
224
225		targets = egon.data.config.datasets()["electrical_load_curves_industry"][
226		"targets"
227		]
228
229		for scenario in ["eGon2035", "eGon100RE"]:
230
231		# Delete existing data from database
232		db.execute_sql(
233		f"""
234		DELETE FROM
235		{targets['osm_load']['schema']}.{targets['osm_load']['table']}
236		WHERE scn_name = '{scenario}'
237		"""
238		)
239
240		db.execute_sql(
241		f"""
242		DELETE FROM
243		{targets['osm_load_individual']['schema']}.{targets['osm_load_individual']['table']}
244		WHERE scn_name = '{scenario}'
245		"""
246		)
247
248		# Calculate cts load curves per mv substation (hvmv bus)
249		data, curves_individual = calc_load_curves_ind_osm(scenario)
250		data.index = data.index.rename("bus")
251		data["scn_name"] = scenario
252
253		data.set_index(["scn_name"], inplace=True, append=True)
254
255		# Insert into database
256		data.to_sql(
257		targets["osm_load"]["table"],
258		schema=targets["osm_load"]["schema"],
259		con=db.engine(),
260		if_exists="append",
261		)
262
263		curves_individual['peak_load'] = np.array(curves_individual['p_set'].values.tolist()).max(axis=1)
264		curves_individual['demand'] = np.array(curves_individual['p_set'].values.tolist()).sum(axis=1)
265		curves_individual = identify_voltage_level(curves_individual)
266
267		curves_individual.to_sql(
268		targets["osm_load_individual"]["table"],
269		schema=targets["osm_load_individual"]["schema"],
270		con=db.engine(),
271		if_exists="append",
272		)
273
274
275
276
277		def calc_load_curves_ind_sites(scenario):
278		"""Temporal disaggregation of load curves per industrial site and industrial subsector.
279
280
281		Parameters
282		----------
283		scenario : str
284		Scenario name.
285
286		Returns
287		-------
288		pandas.DataFrame
289		Demand timeseries of industry allocated to industrial sites and aggregated
290		per substation id and industrial subsector
291
292		"""
293		sources = egon.data.config.datasets()["electrical_load_curves_industry"][
294		"sources"
295		]
296
297		# Select demands per industrial site including the subsector information
298		demands_ind_sites = db.select_dataframe(
299		f"""SELECT industrial_sites_id, wz, demand
300		FROM {sources['sites']['schema']}.
301		{sources['sites']['table']}
302		WHERE scenario = '{scenario}'
303		AND demand > 0
304		"""
305		).set_index(["industrial_sites_id"])
306
307		# Select industrial sites as demand_areas from database
308
309		demand_area = db.select_geodataframe(
310		f"""SELECT id, geom FROM
311		{sources['sites_geom']['schema']}.
312		{sources['sites_geom']['table']}""",
313		index_col="id",
314		geom_col="geom",
315		epsg=3035,
316		)
317
318		# Replace entries to bring it in line with demandregio's subsector definitions
319		demands_ind_sites.replace(1718, 17, inplace=True)
320		share_wz_sites = demands_ind_sites.copy()
321
322		# Create additional df on wz_share per industrial site, which is always set to one
323		# as the industrial demand per site is subsector specific
324
325		share_wz_sites.demand = 1
326		share_wz_sites.reset_index(inplace=True)
327
328		share_transpose = pd.DataFrame(
329		index=share_wz_sites.industrial_sites_id.unique(),
330		columns=share_wz_sites.wz.unique(),
331		)
332		share_transpose.index.rename("industrial_sites_id", inplace=True)
333		for wz in share_transpose.columns:
334		share_transpose[wz] = (
335		share_wz_sites[share_wz_sites.wz == wz]
336		.set_index("industrial_sites_id")
337		.demand
338		)
339
340		load_curves = calc_load_curve(share_transpose, demands_ind_sites["demand"])
341
342		curves_da = identify_bus(load_curves, demand_area)
343
344		curves_da = pd.merge(
345		curves_da, demands_ind_sites.wz, left_on="id", right_index=True
346		)
347
348		# Group all load curves per bus and wz
349		curves_bus = (
350		curves_da.fillna(0)
351		.groupby(["bus_id", "wz"])
352		.sum()
353		.drop(["id"], axis=1)
354		)
355
356		# Initalize pandas.DataFrame for pf table load timeseries
357		load_ts_df = pd.DataFrame(index=curves_bus.index, columns=["p_set"])
358
359		# Insert data for pf load timeseries table
360		load_ts_df.p_set = curves_bus.values.tolist()
361
362		# Create Dataframe to store time series individually
363		curves_individual_interim = (
364		curves_da.drop(["bus_id", "geom", "wz"], axis=1).fillna(0)
365		).set_index("id")
366		curves_individual = curves_da[["id", "bus_id"]]
367		curves_individual["p_set"] = curves_individual_interim.values.tolist()
368		curves_individual["scn_name"]= scenario
369		curves_individual = curves_individual.rename(columns={"id": "site_id"}).set_index(['site_id', 'scn_name'])
370
371		return load_ts_df, curves_individual
372
373
374	View Code Duplication	def insert_sites_ind_load():
		0 ignored issues – show Duplication introduced 2021-07-28 12:03 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
375		"""Inserts electrical industry loads assigned to osm landuse areas to the database
376
377		Returns
378		-------
379		None.
380
381		"""
382
383		targets = egon.data.config.datasets()["electrical_load_curves_industry"][
384		"targets"
385		]
386
387		for scenario in ["eGon2035", "eGon100RE"]:
388
389		# Delete existing data from database
390		db.execute_sql(
391		f"""
392		DELETE FROM
393		{targets['sites_load']['schema']}.{targets['sites_load']['table']}
394		WHERE scn_name = '{scenario}'
395		"""
396		)
397
398		# Delete existing data from database
399		db.execute_sql(
400		f"""
401		DELETE FROM
402		{targets['sites_load_individual']['schema']}.
403		{targets['sites_load_individual']['table']}
404		WHERE scn_name = '{scenario}'
405		"""
406		)
407
408		# Calculate industrial load curves per bus
409		data, curves_individual = calc_load_curves_ind_sites(scenario)
410		data.index = data.index.rename(["bus", "wz"])
411		data["scn_name"] = scenario
412
413		data.set_index(["scn_name"], inplace=True, append=True)
414
415		# Insert into database
416		data.to_sql(
417		targets["sites_load"]["table"],
418		schema=targets["sites_load"]["schema"],
419		con=db.engine(),
420		if_exists="append",
421		)
422
423		curves_individual['peak_load'] = np.array(curves_individual['p_set'].values.tolist()).max(axis=1)
424		curves_individual['demand'] = np.array(curves_individual['p_set'].values.tolist()).sum(axis=1)
425		curves_individual = identify_voltage_level(curves_individual)
426
427		curves_individual.to_sql(
428		targets["sites_load_individual"]["table"],
429		schema=targets["sites_load_individual"]["schema"],
430		con=db.engine(),
431		if_exists="append",
432		)
433

openego / eGon-data

Pull Request — dev (#1020)

calc_load_curves_ind_sites() B

Complexity

Size

Duplication

Importance

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like