Passed
Pull Request — dev (#1020)
by
unknown
02:51 queued 01:17
created

calc_load_curves_ind_sites()   B

Complexity

Conditions 2

Size

Total Lines 95
Code Lines 43

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 43
dl 0
loc 95
rs 8.8478
c 0
b 0
f 0
cc 2
nop 1

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
"""The central module containing all code dealing with processing
2
timeseries data using demandregio
3
4
"""
5
6
import egon.data.config
7
import geopandas as gpd
8
import pandas as pd
9
import numpy as np
10
from egon.data import db
11
from egon.data.datasets.electricity_demand.temporal import calc_load_curve
12
from sqlalchemy import ARRAY, Column, Float, Integer, String
13
from sqlalchemy.ext.declarative import declarative_base
14
15
Base = declarative_base()
16
17
18
def identify_voltage_level(df):
19
20
    """Identify the voltage_level of a grid component based on its peak load and
21
    defined thresholds.
22
23
24
    Parameters
25
    ----------
26
    df : pandas.DataFrame
27
        Data frame containing information about peak loads
28
29
30
    Returns
31
    -------
32
    pandas.DataFrame
33
        Data frame with an additional column with voltage level
34
35
    """
36
37
    df['voltage_level']= np.nan
38
39
    # Identify voltage_level for every demand area taking thresholds into account which were defined in the eGon project
40
    df.loc[df["peak_load"] <= 0.1, "voltage_level"] = 7
41
    df.loc[df["peak_load"] > 0.1, "voltage_level"] = 6
42
    df.loc[df["peak_load"] > 0.2, "voltage_level"] = 5
43
    df.loc[df["peak_load"] > 5.5, "voltage_level"] = 4
44
    df.loc[df["peak_load"] > 20, "voltage_level"] = 3
45
    df.loc[df["peak_load"] > 120, "voltage_level"] = 1
46
47
    return df
48
49
50
def identify_bus(load_curves, demand_area):
51
    """Identify the grid connection point for a consumer by determining its grid level
52
    based on the time series' peak load and the spatial intersection to mv
53
    grid districts or ehv voronoi cells.
54
55
56
    Parameters
57
    ----------
58
    load_curves : pandas.DataFrame
59
        Demand timeseries per demand area (e.g. osm landuse area, industrial site)
60
61
    demand_area: pandas.DataFrame
62
        Dataframe with id and geometry of areas where an industrial demand
63
        is assigned to, such as osm landuse areas or industrial sites.
64
65
    Returns
66
    -------
67
    pandas.DataFrame
68
        Aggregated industrial demand timeseries per bus
69
70
    """
71
72
    sources = egon.data.config.datasets()["electrical_load_curves_industry"][
73
        "sources"
74
    ]
75
76
    # Select mv griddistrict
77
    griddistrict = db.select_geodataframe(
78
        f"""SELECT bus_id, geom FROM
79
                {sources['egon_mv_grid_district']['schema']}.
80
                {sources['egon_mv_grid_district']['table']}""",
81
        geom_col="geom",
82
        epsg=3035,
83
    )
84
85
    # Initialize dataframe to identify peak load per demand area (e.g. osm landuse area or industrial site)
86
    peak = pd.DataFrame(columns=["id", "peak_load"])
87
    peak["id"] = load_curves.max(axis=0).index
88
    peak["peak_load"] = load_curves.max(axis=0).values
89
90
    peak = identify_voltage_level(peak)
91
92
    # Assign bus_id to demand area by merging landuse and peak df
93
    peak = pd.merge(demand_area, peak, right_on="id", left_index=True)
94
95
    # Identify all demand areas connected to HVMV buses
96
    peak_hv = peak[peak["voltage_level"] > 1]
97
98
    # Perform a spatial join between the centroid of the demand area and mv grid districts to identify grid connection point
99
    peak_hv["centroid"] = peak_hv["geom"].centroid
100
    peak_hv = peak_hv.set_geometry("centroid")
101
    peak_hv_c = gpd.sjoin(peak_hv, griddistrict, how="inner", op="intersects")
102
103
    # Perform a spatial join between the polygon of the demand area  and mv grid districts to ensure every area got assign to a bus
104
    peak_hv_p = peak_hv[~peak_hv.isin(peak_hv_c)].dropna().set_geometry("geom")
105
    peak_hv_p = gpd.sjoin(
106
        peak_hv_p, griddistrict, how="inner", op="intersects"
107
    ).drop_duplicates(subset=["id"])
108
109
    # Bring both dataframes together
110
    peak_bus = peak_hv_c.append(peak_hv_p, ignore_index=True)
111
112
    # Combine dataframes to bring loadcurves and bus id together
113
    curves_da = pd.merge(
114
        load_curves.T,
115
        peak_bus[["bus_id", "id", "geom"]],
116
        left_index=True,
117
        right_on="id",
118
    )
119
120
    return curves_da
121
122
123
def calc_load_curves_ind_osm(scenario):
124
    """Temporal disaggregate electrical demand per osm industrial landuse area.
125
126
127
    Parameters
128
    ----------
129
    scenario : str
130
        Scenario name.
131
132
    Returns
133
    -------
134
    pandas.DataFrame
135
        Demand timeseries of industry allocated to osm landuse areas and aggregated
136
        per substation id
137
138
    """
139
140
    sources = egon.data.config.datasets()["electrical_load_curves_industry"][
141
        "sources"
142
    ]
143
144
    # Select demands per industrial branch and osm landuse area
145
    demands_osm_area = db.select_dataframe(
146
        f"""SELECT osm_id, wz, demand
147
            FROM {sources['osm']['schema']}.
148
            {sources['osm']['table']}
149
            WHERE scenario = '{scenario}'
150
            AND demand > 0
151
            """
152
    ).set_index(["osm_id", "wz"])
153
154
    # Select industrial landuse polygons as demand area
155
    demand_area = db.select_geodataframe(
156
        f"""SELECT id, geom FROM
157
                {sources['osm_landuse']['schema']}.
158
                {sources['osm_landuse']['table']}
159
                WHERE sector = 3 """,
160
        index_col="id",
161
        geom_col="geom",
162
        epsg=3035,
163
    )
164
165
    # Calculate shares of industrial branches per osm area
166
    osm_share_wz = demands_osm_area.groupby("osm_id").apply(
167
        lambda grp: grp / grp.sum()
168
    )
169
170
    osm_share_wz.reset_index(inplace=True)
171
172
    share_wz_transpose = pd.DataFrame(
173
        index=osm_share_wz.osm_id.unique(), columns=osm_share_wz.wz.unique()
174
    )
175
    share_wz_transpose.index.rename("osm_id", inplace=True)
176
177
    for wz in share_wz_transpose.columns:
178
        share_wz_transpose[wz] = (
179
            osm_share_wz[osm_share_wz.wz == wz].set_index("osm_id").demand
180
        )
181
182
    # Rename columns to bring it in line with demandregio data
183
    share_wz_transpose.rename(columns={1718: 17}, inplace=True)
184
185
    # Calculate industrial annual demand per osm area
186
    annual_demand_osm = demands_osm_area.groupby("osm_id").demand.sum()
187
188
    # Return electrical load curves per osm industrial landuse area
189
    load_curves = calc_load_curve(share_wz_transpose, annual_demand_osm)
190
191
    curves_da = identify_bus(load_curves, demand_area)
192
193
    # Group all load curves per bus
194
    curves_bus = (
195
        curves_da.drop(["id"], axis=1).fillna(0).groupby("bus_id").sum()
196
    )
197
198
    # Initalize pandas.DataFrame for export to database
199
    load_ts_df = pd.DataFrame(index=curves_bus.index, columns=["p_set"])
200
201
    # Insert time series data to df as an array
202
    load_ts_df.p_set = curves_bus.values.tolist()
203
204
    # Create Dataframe to store time series individually
205
    curves_individual_interim = (
206
        curves_da.drop(["bus_id", "geom"], axis=1).fillna(0)
207
    ).set_index("id")
208
    curves_individual = curves_da[["id", "bus_id"]]
209
    curves_individual["p_set"] = curves_individual_interim.values.tolist()
210
    curves_individual["scn_name"]= scenario
211
    curves_individual = curves_individual.rename(columns={"id": "osm_id"}).set_index(['osm_id', 'scn_name'])
212
213
    return load_ts_df, curves_individual
214
215
216 View Code Duplication
def insert_osm_ind_load():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
217
    """Inserts electrical industry loads assigned to osm landuse areas to the database
218
219
    Returns
220
    -------
221
    None.
222
223
    """
224
225
    targets = egon.data.config.datasets()["electrical_load_curves_industry"][
226
        "targets"
227
    ]
228
229
    for scenario in ["eGon2035", "eGon100RE"]:
230
231
        # Delete existing data from database
232
        db.execute_sql(
233
            f"""
234
            DELETE FROM
235
            {targets['osm_load']['schema']}.{targets['osm_load']['table']}
236
            WHERE scn_name = '{scenario}'
237
            """
238
        )
239
240
        db.execute_sql(
241
            f"""
242
            DELETE FROM
243
            {targets['osm_load_individual']['schema']}.{targets['osm_load_individual']['table']}
244
            WHERE scn_name = '{scenario}'
245
            """
246
        )
247
248
        # Calculate cts load curves per mv substation (hvmv bus)
249
        data, curves_individual = calc_load_curves_ind_osm(scenario)
250
        data.index = data.index.rename("bus")
251
        data["scn_name"] = scenario
252
253
        data.set_index(["scn_name"], inplace=True, append=True)
254
255
        # Insert into database
256
        data.to_sql(
257
            targets["osm_load"]["table"],
258
            schema=targets["osm_load"]["schema"],
259
            con=db.engine(),
260
            if_exists="append",
261
        )
262
263
        curves_individual['peak_load'] = np.array(curves_individual['p_set'].values.tolist()).max(axis=1)
264
        curves_individual['demand'] = np.array(curves_individual['p_set'].values.tolist()).sum(axis=1)
265
        curves_individual = identify_voltage_level(curves_individual)
266
267
        curves_individual.to_sql(
268
            targets["osm_load_individual"]["table"],
269
            schema=targets["osm_load_individual"]["schema"],
270
            con=db.engine(),
271
            if_exists="append",
272
        )
273
274
275
276
277
def calc_load_curves_ind_sites(scenario):
278
    """Temporal disaggregation of load curves per industrial site and industrial subsector.
279
280
281
    Parameters
282
    ----------
283
    scenario : str
284
        Scenario name.
285
286
    Returns
287
    -------
288
    pandas.DataFrame
289
        Demand timeseries of industry allocated to industrial sites and aggregated
290
        per substation id and industrial subsector
291
292
    """
293
    sources = egon.data.config.datasets()["electrical_load_curves_industry"][
294
        "sources"
295
    ]
296
297
    # Select demands per industrial site including the subsector information
298
    demands_ind_sites = db.select_dataframe(
299
        f"""SELECT industrial_sites_id, wz, demand
300
            FROM {sources['sites']['schema']}.
301
            {sources['sites']['table']}
302
            WHERE scenario = '{scenario}'
303
            AND demand > 0
304
            """
305
    ).set_index(["industrial_sites_id"])
306
307
    # Select industrial sites as demand_areas from database
308
309
    demand_area = db.select_geodataframe(
310
        f"""SELECT id, geom FROM
311
                {sources['sites_geom']['schema']}.
312
                {sources['sites_geom']['table']}""",
313
        index_col="id",
314
        geom_col="geom",
315
        epsg=3035,
316
    )
317
318
    # Replace entries to bring it in line with demandregio's subsector definitions
319
    demands_ind_sites.replace(1718, 17, inplace=True)
320
    share_wz_sites = demands_ind_sites.copy()
321
322
    # Create additional df on wz_share per industrial site, which is always set to one
323
    # as the industrial demand per site is subsector specific
324
325
    share_wz_sites.demand = 1
326
    share_wz_sites.reset_index(inplace=True)
327
328
    share_transpose = pd.DataFrame(
329
        index=share_wz_sites.industrial_sites_id.unique(),
330
        columns=share_wz_sites.wz.unique(),
331
    )
332
    share_transpose.index.rename("industrial_sites_id", inplace=True)
333
    for wz in share_transpose.columns:
334
        share_transpose[wz] = (
335
            share_wz_sites[share_wz_sites.wz == wz]
336
            .set_index("industrial_sites_id")
337
            .demand
338
        )
339
340
    load_curves = calc_load_curve(share_transpose, demands_ind_sites["demand"])
341
342
    curves_da = identify_bus(load_curves, demand_area)
343
344
    curves_da = pd.merge(
345
        curves_da, demands_ind_sites.wz, left_on="id", right_index=True
346
    )
347
348
    # Group all load curves per bus and wz
349
    curves_bus = (
350
        curves_da.fillna(0)
351
        .groupby(["bus_id", "wz"])
352
        .sum()
353
        .drop(["id"], axis=1)
354
    )
355
356
    # Initalize pandas.DataFrame for pf table load timeseries
357
    load_ts_df = pd.DataFrame(index=curves_bus.index, columns=["p_set"])
358
359
    # Insert data for pf load timeseries table
360
    load_ts_df.p_set = curves_bus.values.tolist()
361
362
    # Create Dataframe to store time series individually
363
    curves_individual_interim = (
364
        curves_da.drop(["bus_id", "geom", "wz"], axis=1).fillna(0)
365
    ).set_index("id")
366
    curves_individual = curves_da[["id", "bus_id"]]
367
    curves_individual["p_set"] = curves_individual_interim.values.tolist()
368
    curves_individual["scn_name"]= scenario
369
    curves_individual = curves_individual.rename(columns={"id": "site_id"}).set_index(['site_id', 'scn_name'])
370
371
    return load_ts_df, curves_individual
372
373
374 View Code Duplication
def insert_sites_ind_load():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
375
    """Inserts electrical industry loads assigned to osm landuse areas to the database
376
377
    Returns
378
    -------
379
    None.
380
381
    """
382
383
    targets = egon.data.config.datasets()["electrical_load_curves_industry"][
384
        "targets"
385
    ]
386
387
    for scenario in ["eGon2035", "eGon100RE"]:
388
389
        # Delete existing data from database
390
        db.execute_sql(
391
            f"""
392
            DELETE FROM
393
            {targets['sites_load']['schema']}.{targets['sites_load']['table']}
394
            WHERE scn_name = '{scenario}'
395
            """
396
        )
397
398
        # Delete existing data from database
399
        db.execute_sql(
400
            f"""
401
            DELETE FROM
402
            {targets['sites_load_individual']['schema']}.
403
            {targets['sites_load_individual']['table']}
404
            WHERE scn_name = '{scenario}'
405
            """
406
        )
407
408
        # Calculate industrial load curves per bus
409
        data, curves_individual = calc_load_curves_ind_sites(scenario)
410
        data.index = data.index.rename(["bus", "wz"])
411
        data["scn_name"] = scenario
412
413
        data.set_index(["scn_name"], inplace=True, append=True)
414
415
        # Insert into database
416
        data.to_sql(
417
            targets["sites_load"]["table"],
418
            schema=targets["sites_load"]["schema"],
419
            con=db.engine(),
420
            if_exists="append",
421
        )
422
423
        curves_individual['peak_load'] = np.array(curves_individual['p_set'].values.tolist()).max(axis=1)
424
        curves_individual['demand'] = np.array(curves_individual['p_set'].values.tolist()).sum(axis=1)
425
        curves_individual = identify_voltage_level(curves_individual)
426
427
        curves_individual.to_sql(
428
            targets["sites_load_individual"]["table"],
429
            schema=targets["sites_load_individual"]["schema"],
430
            con=db.engine(),
431
            if_exists="append",
432
        )
433