Passed
Pull Request — dev (#1052)
by
unknown
02:51 queued 01:09
created

calc_load_curves_ind_sites()   B

Complexity

Conditions 2

Size

Total Lines 102
Code Lines 47

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 47
dl 0
loc 102
rs 8.7345
c 0
b 0
f 0
cc 2
nop 1

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
"""The central module containing all code dealing with processing
2
timeseries data using demandregio
3
4
"""
5
6
from sqlalchemy.ext.declarative import declarative_base
7
import geopandas as gpd
8
import numpy as np
9
import pandas as pd
10
11
from egon.data import db
12
from egon.data.datasets.electricity_demand.temporal import calc_load_curve
13
import egon.data.config
14
15
Base = declarative_base()
16
17
18
def identify_voltage_level(df):
19
20
    """Identify the voltage_level of a grid component based on its peak load
21
    and defined thresholds.
22
23
24
    Parameters
25
    ----------
26
    df : pandas.DataFrame
27
        Data frame containing information about peak loads
28
29
30
    Returns
31
    -------
32
    pandas.DataFrame
33
        Data frame with an additional column with voltage level
34
35
    """
36
37
    df["voltage_level"] = np.nan
38
39
    # Identify voltage_level for every demand area taking thresholds into
40
    # account which were defined in the eGon project
41
    df.loc[df["peak_load"] <= 0.1, "voltage_level"] = 7
42
    df.loc[df["peak_load"] > 0.1, "voltage_level"] = 6
43
    df.loc[df["peak_load"] > 0.2, "voltage_level"] = 5
44
    df.loc[df["peak_load"] > 5.5, "voltage_level"] = 4
45
    df.loc[df["peak_load"] > 20, "voltage_level"] = 3
46
    df.loc[df["peak_load"] > 120, "voltage_level"] = 1
47
48
    return df
49
50
51
def identify_bus(load_curves, demand_area):
52
    """Identify the grid connection point for a consumer by determining its
53
    grid level based on the time series' peak load and the spatial
54
    intersection to mv grid districts or ehv voronoi cells.
55
56
57
    Parameters
58
    ----------
59
    load_curves : pandas.DataFrame
60
        Demand timeseries per demand area (e.g. osm landuse area, industrial
61
        site)
62
63
    demand_area: pandas.DataFrame
64
        Dataframe with id and geometry of areas where an industrial demand
65
        is assigned to, such as osm landuse areas or industrial sites.
66
67
    Returns
68
    -------
69
    pandas.DataFrame
70
        Aggregated industrial demand timeseries per bus
71
72
    """
73
74
    sources = egon.data.config.datasets()["electrical_load_curves_industry"][
75
        "sources"
76
    ]
77
78
    # Select mv griddistrict
79
    griddistrict = db.select_geodataframe(
80
        f"""SELECT bus_id, geom FROM
81
                {sources['egon_mv_grid_district']['schema']}.
82
                {sources['egon_mv_grid_district']['table']}""",
83
        geom_col="geom",
84
        epsg=3035,
85
    )
86
87
    # Initialize dataframe to identify peak load per demand area (e.g. osm
88
    # landuse area or industrial site)
89
    peak = pd.DataFrame(columns=["id", "peak_load"])
90
    peak["id"] = load_curves.max(axis=0).index
91
    peak["peak_load"] = load_curves.max(axis=0).values
92
93
    peak = identify_voltage_level(peak)
94
95
    # Assign bus_id to demand area by merging landuse and peak df
96
    peak = pd.merge(demand_area, peak, right_on="id", left_index=True)
97
98
    # Identify all demand areas connected to HVMV buses
99
    peak_hv = peak[peak["voltage_level"] > 1]
100
101
    # Perform a spatial join between the centroid of the demand area and mv
102
    # grid districts to identify grid connection point
103
    peak_hv["centroid"] = peak_hv["geom"].centroid
104
    peak_hv = peak_hv.set_geometry("centroid")
105
    peak_hv_c = gpd.sjoin(peak_hv, griddistrict, how="inner", op="intersects")
106
107
    # Perform a spatial join between the polygon of the demand area  and mv
108
    # grid districts to ensure every area got assign to a bus
109
    peak_hv_p = peak_hv[~peak_hv.isin(peak_hv_c)].dropna().set_geometry("geom")
110
    peak_hv_p = gpd.sjoin(
111
        peak_hv_p, griddistrict, how="inner", op="intersects"
112
    ).drop_duplicates(subset=["id"])
113
114
    # Bring both dataframes together
115
    peak_bus = peak_hv_c.append(peak_hv_p, ignore_index=True)
116
117
    # Select ehv voronoi
118
    ehv_voronoi = db.select_geodataframe(
119
        f"""SELECT bus_id, geom FROM
120
                {sources['egon_mv_grid_district']['schema']}.
121
                {sources['egon_mv_grid_district']['table']}""",
122
        geom_col="geom",
123
        epsg=3035,
124
    )
125
126
    # Identify all demand areas connected to EHV buses
127
    peak_ehv = peak[peak["voltage_level"] == 1]
128
129
    # Perform a spatial join between the centroid of the demand area and ehv
130
    # voronoi to identify grid connection point
131
    peak_ehv["centroid"] = peak_ehv["geom"].centroid
132
    peak_ehv = peak_ehv.set_geometry("centroid")
133
    peak_ehv = gpd.sjoin(peak_ehv, ehv_voronoi, how="inner", op="intersects")
134
135
    # Bring both dataframes together
136
    peak_bus = peak_bus.append(peak_ehv, ignore_index=True)
137
138
    # Combine dataframes to bring loadcurves and bus id together
139
    curves_da = pd.merge(
140
        load_curves.T,
141
        peak_bus[["bus_id", "id", "geom"]],
142
        left_index=True,
143
        right_on="id",
144
    )
145
146
    return curves_da
147
148
149
def calc_load_curves_ind_osm(scenario):
150
    """Temporal disaggregate electrical demand per osm industrial landuse
151
    area.
152
153
154
    Parameters
155
    ----------
156
    scenario : str
157
        Scenario name.
158
159
    Returns
160
    -------
161
    pandas.DataFrame
162
        Demand timeseries of industry allocated to osm landuse areas and
163
        aggregated per substation id
164
165
    """
166
167
    sources = egon.data.config.datasets()["electrical_load_curves_industry"][
168
        "sources"
169
    ]
170
171
    # Select demands per industrial branch and osm landuse area
172
    demands_osm_area = db.select_dataframe(
173
        f"""SELECT osm_id, wz, demand
174
            FROM {sources['osm']['schema']}.
175
            {sources['osm']['table']}
176
            WHERE scenario = '{scenario}'
177
            AND demand > 0
178
            """
179
    ).set_index(["osm_id", "wz"])
180
181
    # Select industrial landuse polygons as demand area
182
    demand_area = db.select_geodataframe(
183
        f"""SELECT id, geom FROM
184
                {sources['osm_landuse']['schema']}.
185
                {sources['osm_landuse']['table']}
186
                WHERE sector = 3 """,
187
        index_col="id",
188
        geom_col="geom",
189
        epsg=3035,
190
    )
191
192
    # Calculate shares of industrial branches per osm area
193
    osm_share_wz = demands_osm_area.groupby("osm_id").apply(
194
        lambda grp: grp / grp.sum()
195
    )
196
197
    osm_share_wz.reset_index(inplace=True)
198
199
    share_wz_transpose = pd.DataFrame(
200
        index=osm_share_wz.osm_id.unique(), columns=osm_share_wz.wz.unique()
201
    )
202
    share_wz_transpose.index.rename("osm_id", inplace=True)
203
204
    for wz in share_wz_transpose.columns:
205
        share_wz_transpose[wz] = (
206
            osm_share_wz[osm_share_wz.wz == wz].set_index("osm_id").demand
207
        )
208
209
    # Rename columns to bring it in line with demandregio data
210
    share_wz_transpose.rename(columns={1718: 17}, inplace=True)
211
212
    # Calculate industrial annual demand per osm area
213
    annual_demand_osm = demands_osm_area.groupby("osm_id").demand.sum()
214
215
    # Return electrical load curves per osm industrial landuse area
216
    load_curves = calc_load_curve(share_wz_transpose, annual_demand_osm)
217
218
    curves_da = identify_bus(load_curves, demand_area)
219
220
    # Group all load curves per bus
221
    curves_bus = (
222
        curves_da.drop(["id"], axis=1).fillna(0).groupby("bus_id").sum()
223
    )
224
225
    # Initalize pandas.DataFrame for export to database
226
    load_ts_df = pd.DataFrame(index=curves_bus.index, columns=["p_set"])
227
228
    # Insert time series data to df as an array
229
    load_ts_df.p_set = curves_bus.values.tolist()
230
231
    # Create Dataframe to store time series individually
232
    curves_individual_interim = (
233
        curves_da.drop(["bus_id", "geom"], axis=1).fillna(0)
234
    ).set_index("id")
235
    curves_individual = curves_da[["id", "bus_id"]]
236
    curves_individual["p_set"] = curves_individual_interim.values.tolist()
237
    curves_individual["scn_name"] = scenario
238
    curves_individual = curves_individual.rename(
239
        columns={"id": "osm_id"}
240
    ).set_index(["osm_id", "scn_name"])
241
242
    return load_ts_df, curves_individual
243
244
245 View Code Duplication
def insert_osm_ind_load():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
246
    """Inserts electrical industry loads assigned to osm landuse areas to the
247
    database.
248
249
    Returns
250
    -------
251
    None.
252
253
    """
254
255
    targets = egon.data.config.datasets()["electrical_load_curves_industry"][
256
        "targets"
257
    ]
258
259
    for scenario in ["eGon2021", "eGon2035", "eGon100RE"]:
260
261
        # Delete existing data from database
262
        db.execute_sql(
263
            f"""
264
            DELETE FROM
265
            {targets['osm_load']['schema']}.{targets['osm_load']['table']}
266
            WHERE scn_name = '{scenario}'
267
            """
268
        )
269
270
        db.execute_sql(
271
            f"""
272
            DELETE FROM
273
            {targets['osm_load_individual']['schema']}.
274
            {targets['osm_load_individual']['table']}
275
            WHERE scn_name = '{scenario}'
276
            """
277
        )
278
279
        # Calculate cts load curves per mv substation (hvmv bus)
280
        data, curves_individual = calc_load_curves_ind_osm(scenario)
281
        data.index = data.index.rename("bus")
282
        data["scn_name"] = scenario
283
284
        data.set_index(["scn_name"], inplace=True, append=True)
285
286
        # Insert into database
287
        data.to_sql(
288
            targets["osm_load"]["table"],
289
            schema=targets["osm_load"]["schema"],
290
            con=db.engine(),
291
            if_exists="append",
292
        )
293
294
        curves_individual["peak_load"] = np.array(
295
            curves_individual["p_set"].values.tolist()
296
        ).max(axis=1)
297
        curves_individual["demand"] = np.array(
298
            curves_individual["p_set"].values.tolist()
299
        ).sum(axis=1)
300
        curves_individual = identify_voltage_level(curves_individual)
301
302
        curves_individual.to_sql(
303
            targets["osm_load_individual"]["table"],
304
            schema=targets["osm_load_individual"]["schema"],
305
            con=db.engine(),
306
            if_exists="append",
307
        )
308
309
310
def calc_load_curves_ind_sites(scenario):
311
    """Temporal disaggregation of load curves per industrial site and
312
    industrial subsector.
313
314
315
    Parameters
316
    ----------
317
    scenario : str
318
        Scenario name.
319
320
    Returns
321
    -------
322
    pandas.DataFrame
323
        Demand timeseries of industry allocated to industrial sites and
324
        aggregated per substation id and industrial subsector
325
326
    """
327
    sources = egon.data.config.datasets()["electrical_load_curves_industry"][
328
        "sources"
329
    ]
330
331
    # Select demands per industrial site including the subsector information
332
    demands_ind_sites = db.select_dataframe(
333
        f"""SELECT industrial_sites_id, wz, demand
334
            FROM {sources['sites']['schema']}.
335
            {sources['sites']['table']}
336
            WHERE scenario = '{scenario}'
337
            AND demand > 0
338
            """
339
    ).set_index(["industrial_sites_id"])
340
341
    # Select industrial sites as demand_areas from database
342
343
    demand_area = db.select_geodataframe(
344
        f"""SELECT id, geom FROM
345
                {sources['sites_geom']['schema']}.
346
                {sources['sites_geom']['table']}""",
347
        index_col="id",
348
        geom_col="geom",
349
        epsg=3035,
350
    )
351
352
    # Replace entries to bring it in line with demandregio's subsector
353
    # definitions
354
    demands_ind_sites.replace(1718, 17, inplace=True)
355
    share_wz_sites = demands_ind_sites.copy()
356
357
    # Create additional df on wz_share per industrial site, which is always
358
    # set to one as the industrial demand per site is subsector specific
359
360
    share_wz_sites.demand = 1
361
    share_wz_sites.reset_index(inplace=True)
362
363
    share_transpose = pd.DataFrame(
364
        index=share_wz_sites.industrial_sites_id.unique(),
365
        columns=share_wz_sites.wz.unique(),
366
    )
367
    share_transpose.index.rename("industrial_sites_id", inplace=True)
368
    for wz in share_transpose.columns:
369
        share_transpose[wz] = (
370
            share_wz_sites[share_wz_sites.wz == wz]
371
            .set_index("industrial_sites_id")
372
            .demand
373
        )
374
375
    load_curves = calc_load_curve(share_transpose, demands_ind_sites["demand"])
376
377
    curves_da = identify_bus(load_curves, demand_area)
378
379
    curves_da = pd.merge(
380
        curves_da, demands_ind_sites.wz, left_on="id", right_index=True
381
    )
382
383
    # Group all load curves per bus and wz
384
    curves_bus = (
385
        curves_da.fillna(0)
386
        .groupby(["bus_id", "wz"])
387
        .sum()
388
        .drop(["id"], axis=1)
389
    )
390
391
    # Initalize pandas.DataFrame for pf table load timeseries
392
    load_ts_df = pd.DataFrame(index=curves_bus.index, columns=["p_set"])
393
394
    # Insert data for pf load timeseries table
395
    load_ts_df.p_set = curves_bus.values.tolist()
396
397
    # Create Dataframe to store time series individually
398
    curves_individual_interim = (
399
        curves_da.drop(["bus_id", "geom", "wz"], axis=1).fillna(0)
400
    ).set_index("id")
401
    curves_individual = curves_da[["id", "bus_id"]]
402
    curves_individual["p_set"] = curves_individual_interim.values.tolist()
403
    curves_individual["scn_name"] = scenario
404
    curves_individual = curves_individual.merge(
405
        curves_da[["wz", "id"]], left_on="id", right_on="id"
406
    )
407
    curves_individual = curves_individual.rename(
408
        columns={"id": "site_id"}
409
    ).set_index(["site_id", "scn_name"])
410
411
    return load_ts_df, curves_individual
412
413
414 View Code Duplication
def insert_sites_ind_load():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
415
    """Inserts electrical industry loads assigned to osm landuse areas to the
416
    database.
417
418
    Returns
419
    -------
420
    None.
421
422
    """
423
424
    targets = egon.data.config.datasets()["electrical_load_curves_industry"][
425
        "targets"
426
    ]
427
428
    for scenario in ["eGon2021", "eGon2035", "eGon100RE"]:
429
430
        # Delete existing data from database
431
        db.execute_sql(
432
            f"""
433
            DELETE FROM
434
            {targets['sites_load']['schema']}.{targets['sites_load']['table']}
435
            WHERE scn_name = '{scenario}'
436
            """
437
        )
438
439
        # Delete existing data from database
440
        db.execute_sql(
441
            f"""
442
            DELETE FROM
443
            {targets['sites_load_individual']['schema']}.
444
            {targets['sites_load_individual']['table']}
445
            WHERE scn_name = '{scenario}'
446
            """
447
        )
448
449
        # Calculate industrial load curves per bus
450
        data, curves_individual = calc_load_curves_ind_sites(scenario)
451
        data.index = data.index.rename(["bus", "wz"])
452
        data["scn_name"] = scenario
453
454
        data.set_index(["scn_name"], inplace=True, append=True)
455
456
        # Insert into database
457
        data.to_sql(
458
            targets["sites_load"]["table"],
459
            schema=targets["sites_load"]["schema"],
460
            con=db.engine(),
461
            if_exists="append",
462
        )
463
464
        curves_individual["peak_load"] = np.array(
465
            curves_individual["p_set"].values.tolist()
466
        ).max(axis=1)
467
        curves_individual["demand"] = np.array(
468
            curves_individual["p_set"].values.tolist()
469
        ).sum(axis=1)
470
        curves_individual = identify_voltage_level(curves_individual)
471
472
        curves_individual.to_sql(
473
            targets["sites_load_individual"]["table"],
474
            schema=targets["sites_load_individual"]["schema"],
475
            con=db.engine(),
476
            if_exists="append",
477
        )
478